# 🧾 Customer Lifetime Value (CLV) Prediction Project
This notebook predicts customer lifetime value (LTV) based on purchase behavior using Random Forest Regression.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [None]:
# Load dataset safely
data_preview = pd.read_excel('Online Retail.xlsx', nrows=1000)
print("Preview of data:")
print(data_preview.head())
print("\nColumns in the dataset:")
print(data_preview.columns)

chunks = pd.read_excel('Online Retail.xlsx', chunksize=10000)
data = pd.concat(chunks)
print(f"\n✅ Data loaded successfully: {data.shape}")

In [None]:
# Data Cleaning
data = data.dropna(subset=['CustomerID'])
data = data[~data['InvoiceNo'].astype(str).str.startswith('C')]
data['TotalAmount'] = data['Quantity'] * data['UnitPrice']
print("\nCleaned data sample:")
print(data.head())

In [None]:
# Feature Engineering
import datetime as dt
reference_date = data['InvoiceDate'].max() + pd.Timedelta(days=1)

customer_df = data.groupby('CustomerID').agg({
    'InvoiceDate': lambda x: (reference_date - x.max()).days,
    'InvoiceNo': 'nunique',
    'TotalAmount': 'sum'
}).reset_index()

customer_df.columns = ['CustomerID', 'Recency', 'Frequency', 'Monetary']
customer_df['AOV'] = customer_df['Monetary'] / customer_df['Frequency']
print("\nCustomer-level summary:")
print(customer_df.head())

In [None]:
# Train Regression Model
X = customer_df[['Recency', 'Frequency', 'AOV']]
y = customer_df['Monetary']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

In [None]:
# Model Evaluation
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"\nModel Performance:\nMAE = {mae:.2f}\nRMSE = {rmse:.2f}")

In [None]:
# Predict and Segment Customers
customer_df['Predicted_LTV'] = model.predict(X)
customer_df['Segment'] = pd.qcut(customer_df['Predicted_LTV'], q=4, labels=['Low', 'Mid-Low', 'Mid-High', 'High'])
print("\nPredicted LTV and Segments:")
print(customer_df.head())

In [None]:
# Visualization
sns.histplot(customer_df['Predicted_LTV'], bins=30)
plt.title('Predicted LTV Distribution')
plt.xlabel('Predicted LTV')
plt.ylabel('Count of Customers')
plt.show()

sns.boxplot(x='Segment', y='Predicted_LTV', data=customer_df)
plt.title('Customer Segments by LTV')
plt.xlabel('Segment')
plt.ylabel('Predicted LTV')
plt.show()

In [None]:
# Save Results
customer_df.to_csv('final_ltv_predictions.csv', index=False)
print("\n✅ Results saved as final_ltv_predictions.csv")

## 📈 Power BI Dashboard Design Suggestions
1. **Dashboard Title:** *Customer Lifetime Value Insights Dashboard*
2. **Main Pages to Include:**
   - **Overview Page:** Show KPIs like total customers, average LTV, MAE, and RMSE values.
   - **Segmentation Page:** Highlight different LTV segments (Low, Mid-Low, Mid-High, High) using pie or bar charts.
   - **Trend Page:** Visualize LTV trends over time using a line chart.
   - **Customer Detail Page:** Add an interactive table with slicers for Segment, Country, and Date filters.
3. **Interactive Elements:**
   - Use slicers for filtering by country, date, or LTV segment.
   - Use tooltips to show customer-level details on hover.
   - Add bookmarks for switching between summary and detailed views.
4. **Color Palette:**
   - Low LTV → Red
   - Mid-Low → Orange
   - Mid-High → Yellow
   - High → Green

✅ This Power BI dashboard helps visualize and interpret customer value distribution interactively for business strategy insights.