# 🧠 Customer Lifetime Value (LTV) Prediction
This notebook predicts customer lifetime value based on purchase history using recency, frequency, and average order value.

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error


In [None]:

from google.colab import files
uploaded = files.upload()  # Upload the downloaded CSV here


In [None]:

df = pd.read_csv('customer_orders.csv')
df['order_date'] = pd.to_datetime(df['order_date'])
df.head()


In [None]:

# Set reference date as max date + 1 day
NOW = df['order_date'].max() + pd.Timedelta(days=1)

# Calculate Recency, Frequency, Monetary, AOV
rfm = df.groupby('customer_id').agg({
    'order_date': lambda x: (NOW - x.max()).days,
    'order_id': 'nunique',
    'order_amount': ['sum', 'mean']
})
rfm.columns = ['recency', 'frequency', 'monetary', 'aov']
rfm.reset_index(inplace=True)
rfm.head()


In [None]:

X = rfm[['recency', 'frequency', 'aov']]
y = rfm['monetary']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = XGBRegressor()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)


In [None]:

mae = mean_absolute_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)

print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")


In [None]:

rfm['predicted_ltv'] = model.predict(X)
rfm['segment'] = pd.qcut(rfm['predicted_ltv'], 4, labels=['Low', 'Medium', 'High', 'Very High'])
rfm.head()


In [None]:

rfm.to_csv('predicted_ltv_customers.csv', index=False)
files.download('predicted_ltv_customers.csv')


In [None]:

import seaborn as sns
plt.figure(figsize=(8, 4))
sns.boxplot(x='segment', y='predicted_ltv', data=rfm)
plt.title("Predicted LTV by Segment")
plt.show()



## ✅ Outputs Generated:
- Trained XGBoost model
- LTV predictions for each customer
- MAE and RMSE scores
- Customer segments based on LTV
- CSV file: `predicted_ltv_customers.csv`
