In [1]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import numpy as np


In [3]:
# Load dataset
df = pd.read_csv("../dataset/restaurant_sales.csv")
df

Unnamed: 0,Day,Dish,Quantity_Sold
0,Monday,Pasta,50
1,Monday,Burger,30
2,Monday,Salad,20
3,Tuesday,Pasta,55
4,Tuesday,Burger,28
5,Tuesday,Salad,18
6,Wednesday,Pasta,60
7,Wednesday,Burger,35
8,Wednesday,Salad,22
9,Thursday,Pasta,45


In [4]:
# Encode categorical variables
label_encoder_day = LabelEncoder()
label_encoder_dish = LabelEncoder()

df["Day_Encoded"] = label_encoder_day.fit_transform(df["Day"])
df["Dish_Encoded"] = label_encoder_dish.fit_transform(df["Dish"])

# Features and target variable
X = df[["Day_Encoded", "Dish_Encoded"]]
y = df["Quantity_Sold"]


In [5]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Train RandomForest Model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [7]:

# Predictions and evaluation
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"Model RMSE: {rmse:.2f}")

Model RMSE: 13.24


In [8]:

# Save model and encoders
joblib.dump(model, "../backend/models/sales_model.pkl")
joblib.dump(label_encoder_day, "../backend/models/label_encoder_day.pkl")
joblib.dump(label_encoder_dish, "../backend/models/label_encoder_dish.pkl")

print("Model and encoders saved successfully.")

Model and encoders saved successfully.
