In [None]:

import pandas as pd
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
import joblib  # for saving and loading the model

df = pd.read_csv("transactions.csv")
df['transaction_date'] = pd.to_datetime(df['transaction_date'])

df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')

today = df['transaction_date'].max() + pd.Timedelta(days=1)
rfm = df.groupby('customer_id').agg({
    'transaction_date': [lambda x: (today - x.max()).days, 'count'],
    'amount': ['mean', 'sum']
})
rfm.columns = ['recency', 'frequency', 'aov', 'total_spent']
rfm = rfm.reset_index()

rfm['ltv'] = rfm['frequency'] * rfm['aov'] * 3  # simple proxy

X = rfm[['recency', 'frequency', 'aov']]
y = rfm['ltv']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=4, random_state=42)
model.fit(X_train, y_train)

joblib.dump(model, "xgboost_ltv_model.pkl")
print("Model trained and saved as xgboost_ltv_model.pkl")
