In [3]:
import pandas as pd
import numpy as np
import pickle
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Load the dataset
df = pd.read_csv('synthetic_customer_data.csv')

# Feature engineering
df['LastPurchaseDate'] = pd.to_datetime(df['LastPurchaseDate'])
df['DaysSinceLastPurchase'] = (pd.Timestamp.now() - df['LastPurchaseDate']).dt.days
df = df.drop(columns=['LastPurchaseDate'])

# Encode categorical features
df = pd.get_dummies(df, columns=['Gender', 'ProductCategory'])

# Define features and targets
features = ['Age', 'AnnualIncome', 'TransactionHistory', 'PurchaseAmount', 'DaysSinceLastPurchase'] + \
           [col for col in df.columns if col.startswith('Gender_') or col.startswith('ProductCategory_')]
target_return = 'ReturnLikelihood'
target_repurchase = 'RepurchaseLikelihood'

# Split the data
X = df[features]
y_return = df[target_return]
y_repurchase = df[target_repurchase]

X_train, X_test, y_train_return, y_test_return = train_test_split(X, y_return, test_size=0.2, random_state=42)
X_train, X_test, y_train_repurchase, y_test_repurchase = train_test_split(X, y_repurchase, test_size=0.2, random_state=42)

# Standardize numerical features
scaler = StandardScaler()
X_train[numeric_features] = scaler.fit_transform(X_train[numeric_features])
X_test[numeric_features] = scaler.transform(X_test[numeric_features])

# Train the models
model_return = DecisionTreeClassifier(random_state=42)
model_return.fit(X_train, y_train_return)

model_repurchase = DecisionTreeClassifier(random_state=42)
model_repurchase.fit(X_train, y_train_repurchase)

# Predict and evaluate the models
y_pred_return = model_return.predict(X_test)
y_pred_repurchase = model_repurchase.predict(X_test)

return_accuracy = accuracy_score(y_test_return, y_pred_return)
repurchase_accuracy = accuracy_score(y_test_repurchase, y_pred_repurchase)

print('Return Likelihood Model Accuracy:', return_accuracy)
print('Repurchase Likelihood Model Accuracy:', repurchase_accuracy)

# Save the models using pickle
with open('model_return.pkl', 'wb') as f:
    pickle.dump(model_return, f)

with open('model_repurchase.pkl', 'wb') as f:
    pickle.dump(model_repurchase, f)


Return Likelihood Model Accuracy: 0.5
Repurchase Likelihood Model Accuracy: 0.48
