# Project 5 - Deal / Pack Value Regression

This notebook is self-contained and uses synthetic data.


In [None]:

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, GradientBoostingRegressor, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, mean_squared_error, r2_score
import pickle
import matplotlib.pyplot as plt
np.random.seed(42)
print('Imports ready')


In [None]:

# Synthetic bundles and uplift/value regression
n_items=120
items=pd.DataFrame({'item_id':np.arange(1,n_items+1),'price':np.round(np.random.normal(12,4,n_items).clip(3,60),2)})
bundles=[]
for i in range(800):
    k=np.random.choice([2,3])
    its=list(np.random.choice(items['item_id'], size=k, replace=False))
    price_sum=items.loc[items['item_id'].isin(its),'price'].sum()
    discount=np.random.choice([0,5,10,15,20],p=[0.4,0.25,0.2,0.1,0.05])
    popularity=np.random.rand()
    expected_value = 0.03*popularity*price_sum + 0.02*discount + np.random.normal(0,1)
    bundles.append({'bundle_id':i+1,'items':its,'price_sum':price_sum,'discount':discount,'popularity':popularity,'expected_value':expected_value})
bundles_df=pd.DataFrame(bundles)
X=bundles_df[['price_sum','discount','popularity']]; y=bundles_df['expected_value']
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)
gbr=GradientBoostingRegressor(n_estimators=200, random_state=42).fit(X_train,y_train)
print('Deal RMSE:', mean_squared_error(y_test,gbr.predict(X_test),squared=False))
def score_bundles(df_b, top_k=10):
    df_b['pred']=gbr.predict(df_b[['price_sum','discount','popularity']])
    return df_b.sort_values('pred', ascending=False).head(top_k)
score_bundles(bundles_df, top_k=6)
with open('project5_deal_gbr.pkl','wb') as f:
    pickle.dump(gbr,f)
print('Saved project5_deal_gbr.pkl')
