In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the dataset
data = pd.read_csv('data/purchase_history.csv')

# Data cleaning and preprocessing
data = data.dropna()  # Example of handling missing values
# Feature engineering
data['total_spent'] = data['quantity'] * data['price']

# Split the data
X = data.drop(columns=['customer_return', 'product_repurchase'])
y_return = data['customer_return']
y_repurchase = data['product_repurchase']

X_train, X_test, y_return_train, y_return_test = train_test_split(X, y_return, test_size=0.2, random_state=42)
X_train, X_test, y_repurchase_train, y_repurchase_test = train_test_split(X, y_repurchase, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib
# Train a model for customer return prediction
model_return = RandomForestClassifier(random_state=42)
model_return.fit(X_train, y_return_train)
y_return_pred = model_return.predict(X_test)
print('Customer Return Prediction Accuracy:', accuracy_score(y_return_test, y_return_pred))

# Train a model for product repurchase prediction
model_repurchase = RandomForestClassifier(random_state=42)
model_repurchase.fit(X_train, y_repurchase_train)
y_repurchase_pred = model_repurchase.predict(X_test)
print('Product Repurchase Prediction Accuracy:', accuracy_score(y_repurchase_test, y_repurchase_pred))
# Save the models
joblib.dump(model_return, 'models/model_return.pkl')
joblib.dump(model_repurchase, 'models/model_repurchase.pkl')
joblib.dump(scaler, 'models/scaler.pkl')