In [13]:
import pandas as pd
import joblib
import time
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score,mean_absolute_error,mean_squared_error,mean_squared_log_error,r2_score

from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import Lasso
from sklearn.ensemble import RandomForestRegressor

In [4]:
df=pd.read_csv("sample_data/df3.csv")

In [5]:
df.head()

Unnamed: 0.1,Unnamed: 0,week,center_id,meal_id,checkout_price,num_orders,region_code,op_area,discount_perc,discount y/n,featured_promotion,new_category_cuisine,new_center_type_city_code
0,0,1,55,1885,136.83,177,56,2.25,151.39,1.0,0.0,3.0,57.0
1,1,2,55,1885,135.83,323,56,2.25,151.4,1.0,0.0,3.0,57.0
2,2,3,55,1885,132.92,96,56,2.25,132.93,1.0,0.0,3.0,57.0
3,3,4,55,1885,135.86,163,56,2.25,133.85,1.0,0.0,3.0,57.0
4,4,5,55,1885,146.5,215,56,2.25,146.51,1.0,0.0,3.0,57.0


In [8]:
X=df.drop(['num_orders'],axis=1)
y=df['num_orders']

In [9]:

scaler = StandardScaler()

scaled_x = scaler.fit_transform(X)


In [12]:
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.2,shuffle=False)

In [14]:
# SVR
svr = SVR()
svr_params = {
    'kernel': ['linear', 'poly', 'rbf'],
    'C': [0.1, 1, 10],
    'epsilon': [0.1, 0.2, 0.5]
}

# Linear Regression
lr = LinearRegression()
lr_params = {
    'fit_intercept': [True, False],
    'normalize': [True, False]
}

# Decision Tree Regressor
dt = DecisionTreeRegressor()
dt_params = {
    'criterion': ['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Lasso
lasso = Lasso()
lasso_params = {
    'alpha': [0.1, 1, 10],
    'fit_intercept': [True, False],
    'normalize': [True, False],
    'max_iter': [1000, 2000, 3000]
}

# Random Forest Regressor
rf = RandomForestRegressor()
rf_params = {
    'n_estimators': [100, 200, 500],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}


In [15]:
best_estimators={}

In [16]:
data= {
    'Model':[],
    'MAE':[],
    'MSE':[],
    'R2_score':[],
    'parameters':[]
}

### SVR

In [None]:

grid_search = GridSearchCV(svr, svr_params, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(x_train, y_train)
best_estimators['svr'] = grid_search.best_estimator_
print(f'SVR Best Params: {grid_search.best_params_}')
print(f'SVR Best MSE: {-grid_search.best_score_}')

In [None]:
y_pred = best_estimators['svr'].predict(x_test)
print('mean_absolute_error:',mean_absolute_error(y_pred,y_test))
print('mean_squared_error:',mean_squared_error(y_pred,y_test))
print('r2_score:',r2_score(y_test,y_pred))

In [None]:
joblib.dump(best_estimators['svr'], 'svr.pkl')

In [None]:
data['Model'].append('SVR')
data['MAE'].append(mean_absolute_error(y_pred,y_test))
data['MSE'].append(mean_squared_error(y_pred,y_test))
data['R2_score'].append(r2_score(y_test,y_pred))
data['parameters'].append(best_estimators['svr'].get_params())
print(data)

In [None]:
pd.DataFrame(data).to_csv('modelinfo1.csv', index=False)

#### decision Tree

In [None]:
start=time.time()
grid_search = GridSearchCV(dt, dt_params, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(x_train, y_train)
end=time.time()
print(f'Time taken: {end-start}')
best_estimators['decision_tree'] = grid_search.best_estimator_
print(f'decision tree Best Params: {grid_search.best_params_}')
print(f'decision tree Best MSE: {-grid_search.best_score_}')

In [None]:
y_pred = best_estimators['decision_tree'].predict(x_test)
print('mean_absolute_error:',mean_absolute_error(y_pred,y_test))
print('mean_squared_error:',mean_squared_error(y_pred,y_test))
print('r2_score:',r2_score(y_test,y_pred))

In [None]:
joblib.dump(best_estimators['decision_tree'], 'decision_tree.pkl')

In [None]:
data['Model'].append('decision_tree')
data['MAE'].append(mean_absolute_error(y_pred,y_test))
data['MSE'].append(mean_squared_error(y_pred,y_test))
data['R2_score'].append(r2_score(y_test,y_pred))
data['parameter'].append(best_estimators['decision_tree'].get_params())
print(data)

In [None]:
pd.DataFrame(data).to_csv('modelinfo2.csv', index=False)