# Project 1 - Next-Order Reorder Prediction

This notebook is self-contained and uses synthetic data.


In [None]:

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, GradientBoostingRegressor, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, mean_squared_error, r2_score
import pickle
import matplotlib.pyplot as plt
np.random.seed(42)
print('Imports ready')


In [None]:

# Synthetic data
n=5000
cuisines=['Pizza','Burger','Sushi','Indian','Mexican','Salad','Dessert']
df=pd.DataFrame({
 'customer_id':np.random.randint(1000,2000,n),
 'cuisine':np.random.choice(cuisines,n),
 'price':np.round(np.random.normal(14,6,n).clip(3,80),2),
 'discount':np.random.choice([0,5,10,15],n,p=[0.6,0.2,0.12,0.08]),
 'delivery_time':np.random.normal(30,8,n).round().astype(int),
 'previous_orders':np.random.poisson(2,n),
 'days_since_last':np.random.randint(1,60,n),
 'hour':np.random.randint(8,23,n)
})
logit = -1.5 + 0.6*df['previous_orders'] -0.02*df['delivery_time'] + 0.03*df['discount'] -0.01*df['price'] -0.02*df['days_since_last']
prob = 1/(1+np.exp(-logit))
df['reorder']=(np.random.rand(n)<prob).astype(int)
df.head()


In [None]:

# Train test split & pipeline
features=['cuisine','price','discount','delivery_time','previous_orders','days_since_last','hour']
X=df[features]; y=df['reorder']
cat=['cuisine']; num=['price','discount','delivery_time','previous_orders','days_since_last','hour']
pre = ColumnTransformer([('cat', OneHotEncoder(handle_unknown='ignore', sparse=False), cat), ('num', StandardScaler(), num)])
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,stratify=y,random_state=42)
pipe = Pipeline([('pre',pre),('clf',RandomForestClassifier(n_estimators=150, random_state=42))])
pipe.fit(X_train,y_train)
pred=pipe.predict(X_test); probas=pipe.predict_proba(X_test)[:,1]
print('Acc:', accuracy_score(y_test,pred)); print('ROC AUC:', roc_auc_score(y_test,probas))


In [None]:

# Save
with open('project1_reorder.pkl','wb') as f:
    pickle.dump(pipe,f)
print('Saved project1_reorder.pkl')
