<a href="https://colab.research.google.com/github/techie-mayank/Data-Analytics/blob/main/R-Practice/Complex_Customer_Lifetime.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
install.packages(c("ggplot2", "dplyr"))

Installing packages into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)



In [3]:
# Load necessary libraries
library(ggplot2)
library(dplyr)

# Load the dataset
data <- read.csv("complex_customer_lifetime.csv")

# Convert dates to Date type
data$First_Purchase_Date <- as.Date(data$First_Purchase_Date, format='%Y-%m-%d')
data$Last_Purchase_Date <- as.Date(data$Last_Purchase_Date, format='%Y-%m-%d')

# Plot 1: Distribution of Customer Lifetime Value (CLV) (Total_Spent)
histogram_clv <- ggplot(data, aes(x=Total_Spent)) +
  geom_histogram(binwidth=500, fill='steelblue', color='black') +
  labs(title="Distribution of Customer Lifetime Value (CLV)", x="Total Spent", y="Count")

# Plot 2: Customer Retention over Time (Tenure)
histogram_tenure <- ggplot(data, aes(x=Tenure)) +
  geom_histogram(binwidth=10, fill='darkgreen', color='black') +
  labs(title="Customer Retention (Tenure Distribution)", x="Tenure (Months)", y="Count")

# Plot 3: CLV vs. Age and Customer Segment
boxplot_clv_age <- ggplot(data, aes(x=as.factor(Customer_Age), y=Total_Spent)) +
  geom_boxplot(fill='lightblue') +
  labs(title="Customer Lifetime Value by Age", x="Customer Age", y="Total Spent") +
  theme(axis.text.x = element_text(angle=90, hjust=1))

boxplot_clv_segment <- ggplot(data, aes(x=Customer_Segment, y=Total_Spent, fill=Customer_Segment)) +
  geom_boxplot() +
  labs(title="Customer Lifetime Value by Segment", x="Customer Segment", y="Total Spent") +
  theme(legend.position="none")

# Plot 4: Trends in Repeat Purchases
scatter_purchases <- ggplot(data, aes(x=First_Purchase_Date, y=Total_Purchases)) +
  geom_point(color='red', alpha=0.6) +
  geom_smooth(method='lm', se=FALSE, color='blue') +
  labs(title="Trends in Repeat Purchases", x="First Purchase Date", y="Total Purchases")

# Plot 5: High-Value vs. Low-Value Customers
boxplot_high_low <- ggplot(data, aes(x=Customer_Segment, y=Total_Spent, fill=Customer_Segment)) +
  geom_boxplot() +
  labs(title="Comparison of High-Value vs Low-Value Customers", x="Customer Segment", y="Total Spent") +
  theme(legend.position="none")

# Save plots
ggsave("histogram_clv.png", histogram_clv)
ggsave("histogram_tenure.png", histogram_tenure)
ggsave("boxplot_clv_age.png", boxplot_clv_age)
ggsave("boxplot_clv_segment.png", boxplot_clv_segment)
ggsave("scatter_purchases.png", scatter_purchases)
ggsave("boxplot_high_low.png", boxplot_high_low)

[1m[22mSaving 7 x 7 in image
[1m[22mSaving 7 x 7 in image
[1m[22mSaving 7 x 7 in image
[1m[22mSaving 7 x 7 in image
[1m[22mSaving 7 x 7 in image
[1m[22m`geom_smooth()` using formula = 'y ~ x'
[1m[22mSaving 7 x 7 in image
