In [1]:
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [2]:
df = pd.read_excel("main_pizza_dataset_ansh.xlsx")

In [3]:
df.head()

Unnamed: 0,order_date,time_phase,quantity,total_price,Date,Month_name,Day_name,Weekend
0,2015-01-01,Morning,6,105.25,1,January,Thursday,0
1,2015-01-01,Afternoon,106,1757.35,1,January,Thursday,0
2,2015-01-01,Night,50,851.25,1,January,Thursday,0
3,2015-01-02,Night,82,1358.95,2,January,Friday,0
4,2015-01-02,Morning,5,87.25,2,January,Friday,0


In [4]:
df["Month_name"].unique()

array(['January', 'February', 'March', 'April', 'May', 'June', 'July',
       'August', 'September', 'October', 'November', 'December'],
      dtype=object)

In [5]:

oe = OrdinalEncoder(categories= [['Monday', 'Tuesday','Wednesday','Thursday', 'Friday', 'Saturday', 'Sunday']])
oe_1 = OrdinalEncoder(categories= [['January', 'February', 'March', 'April', 'May', 'June', 'July','August', 'September', 'October', 'November', 'December']])
oe_2 = OrdinalEncoder(categories= [["Morning","Afternoon","Night"]])

In [6]:
df['Day_name'] = oe.fit_transform(df[['Day_name']])
df['Month_name'] = oe_1.fit_transform(df[['Month_name']])
df["time_phase"] = oe_2.fit_transform(df[["time_phase"]])

In [7]:
df.head()

Unnamed: 0,order_date,time_phase,quantity,total_price,Date,Month_name,Day_name,Weekend
0,2015-01-01,0.0,6,105.25,1,0.0,3.0,0
1,2015-01-01,1.0,106,1757.35,1,0.0,3.0,0
2,2015-01-01,2.0,50,851.25,1,0.0,3.0,0
3,2015-01-02,2.0,82,1358.95,2,0.0,4.0,0
4,2015-01-02,0.0,5,87.25,2,0.0,4.0,0


In [8]:
order = ["order_date","Date","Month_name","Day_name","Weekend","time_phase","quantity","total_price"]
df = df[order]
df

Unnamed: 0,order_date,Date,Month_name,Day_name,Weekend,time_phase,quantity,total_price
0,2015-01-01,1,0.0,3.0,0,0.0,6,105.25
1,2015-01-01,1,0.0,3.0,0,1.0,106,1757.35
2,2015-01-01,1,0.0,3.0,0,2.0,50,851.25
3,2015-01-02,2,0.0,4.0,0,2.0,82,1358.95
4,2015-01-02,2,0.0,4.0,0,0.0,5,87.25
...,...,...,...,...,...,...,...,...
1043,2015-12-30,30,11.0,2.0,0,2.0,16,276.25
1044,2015-12-30,30,11.0,2.0,0,1.0,45,736.15
1045,2015-12-31,31,11.0,3.0,0,0.0,5,77.75
1046,2015-12-31,31,11.0,3.0,0,1.0,97,1566.95


In [9]:
df.rename(columns={'quantity': 'Total_quantity','total_price':'Total_Sale',"order_date":"Full_Date","time_phase":"Time_phase"},inplace=True)
df

Unnamed: 0,Full_Date,Date,Month_name,Day_name,Weekend,Time_phase,Total_quantity,Total_Sale
0,2015-01-01,1,0.0,3.0,0,0.0,6,105.25
1,2015-01-01,1,0.0,3.0,0,1.0,106,1757.35
2,2015-01-01,1,0.0,3.0,0,2.0,50,851.25
3,2015-01-02,2,0.0,4.0,0,2.0,82,1358.95
4,2015-01-02,2,0.0,4.0,0,0.0,5,87.25
...,...,...,...,...,...,...,...,...
1043,2015-12-30,30,11.0,2.0,0,2.0,16,276.25
1044,2015-12-30,30,11.0,2.0,0,1.0,45,736.15
1045,2015-12-31,31,11.0,3.0,0,0.0,5,77.75
1046,2015-12-31,31,11.0,3.0,0,1.0,97,1566.95


In [10]:
#Splitting the data
X = df.drop(columns=["Total_Sale","Full_Date","Total_quantity"])
y = df[["Total_Sale","Total_quantity"]]

In [11]:
X_train, X_test ,y_train,y_test = train_test_split(X,y,test_size=0.1)

## Random Forest

In [12]:
rgr = RandomForestRegressor(n_estimators=100,random_state=42)

In [13]:
rgr.fit(X_train, y_train)

In [14]:
y_pred = rgr.predict(X_test)

In [15]:
# Calculating the Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error (MAE):", mae)

Mean Absolute Error (MAE): 87.70740476190477


In [16]:
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error (MSE):", mse)

Mean Squared Error (MSE): 29915.877132926198


In [17]:
r2 = r2_score(y_test, y_pred)
print("R-squared (R2) score:", r2)


R-squared (R2) score: 0.8386195060420618


In [18]:
num_features = X_train.shape[1]
adjusted_r2 = 1 - (1 - r2) * (len(y_test) - 1) / (len(y_test) - num_features - 1)
print("R-squared score:", r2)
print("Adjusted R-squared score:", adjusted_r2)

R-squared score: 0.8386195060420618
Adjusted R-squared score: 0.8304689760441861


In [19]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor

# Define hyperparameters to search
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

rf_regressor = RandomForestRegressor(random_state=42)
grid_search = GridSearchCV(estimator=rf_regressor, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Getting the best hyperparameters
best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)

# Training a new Random Forest regressor with the best hyperparameters
best_rf_regressor = RandomForestRegressor(random_state=42, **best_params)
best_rf_regressor.fit(X_train, y_train)

# Evaluating the model
y_pred = best_rf_regressor.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("R-squared (R2) score:", r2)
adjusted_r2 = 1 - (1 - r2) * (len(y_test) - 1) / (len(y_test) - num_features - 1)
print("Adjusted R-squared score:", adjusted_r2)

Best Hyperparameters: {'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 10, 'n_estimators': 200}
Mean Absolute Error (MAE): 81.44309455543943
Mean Squared Error (MSE): 26361.182162839126
R-squared (R2) score: 0.8582401784806901
Adjusted R-squared score: 0.8510805915352704


In [20]:
X_test.head()

Unnamed: 0,Date,Month_name,Day_name,Weekend,Time_phase
0,1,0.0,3.0,0,0.0
716,2,8.0,2.0,0,0.0
989,11,11.0,4.0,0,2.0
164,25,1.0,2.0,0,1.0
991,12,11.0,5.0,1,1.0


In [21]:
rgr.predict([[ 8 , 5 , 6.0 , 1 , 1.0]])



array([[1297.1975,   79.29  ]])

In [22]:
best_rf_regressor.predict([[ 8 , 5 , 6.0 , 1 , 1.0]])



array([[1272.45046055,   77.3550501 ]])

In [23]:
pd.DataFrame(best_rf_regressor.predict([[ 13 ,5 , 6 , 1 , 1]]),columns=["Expected_Sale","Expected_Quantity"])



Unnamed: 0,Expected_Sale,Expected_Quantity
0,1239.358703,75.081487
