In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
import joblib

In [2]:
# Load the data and model
customer_features = pd.read_csv('../data/processed/customer_features.csv', index_col=0)
model = joblib.load('../models/discount_prediction_model.joblib')
scaler = joblib.load('../models/discount_prediction_scaler.joblib')

In [3]:
# Prepare the data
X = customer_features[['PurchaseFrequency', 'TotalSpent', 'CustomerLifetime', 'AverageOrderValue']]
y = customer_features['TotalSpent'] * 0.1  # Assume 10% of total spent as potential discount

In [4]:
# Scale the features
X_scaled = scaler.transform(X)

In [5]:
# Make predictions
y_pred = model.predict(X_scaled)

In [6]:
# Evaluate the model
mse = mean_squared_error(y, y_pred)
r2 = r2_score(y, y_pred)

In [7]:
print(f"Mean Squared Error: {mse:.2f}")
print(f"R-squared Score: {r2:.2f}")

Mean Squared Error: 5688.35
R-squared Score: 0.99


In [8]:
# Feature importance
feature_importance = pd.DataFrame({
    'feature': X.columns,
    'importance': model.feature_importances_
}).sort_values('importance', ascending=False)

In [9]:
print("\nFeature Importance:")
print(feature_importance)


Feature Importance:
             feature  importance
1         TotalSpent    0.976449
3  AverageOrderValue    0.012075
2   CustomerLifetime    0.006244
0  PurchaseFrequency    0.005232


In [10]:
# Example predictions for a few customers
for customer_id in customer_features.index[:5]:
    customer_data = X.loc[customer_id].values.reshape(1, -1)
    predicted_discount = model.predict(scaler.transform(customer_data))[0]
    print(f"\nCustomer ID: {customer_id}")
    print(f"Predicted Discount: ${predicted_discount:.2f}")


Customer ID: 12346.0
Predicted Discount: $-0.00

Customer ID: 12347.0
Predicted Discount: $431.12

Customer ID: 12348.0
Predicted Discount: $179.74

Customer ID: 12349.0
Predicted Discount: $175.77

Customer ID: 12350.0
Predicted Discount: $33.44


