In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [2]:
# Path to the replicated train data
data_path = "D:/sem 5/Mini Project/Prediction-and-Optimization-of-Smart-Dish-Specific-Demand-in-Restaurants-using-Machine-Learning/data/train_replicated_with_dates.csv"

# Load the data
train_data = pd.read_csv(data_path)
print(f"Data loaded with {len(train_data)} rows.")

# Split into features (X) and target (y)
X = train_data.drop(columns=["id", "num_orders", "date"])  # Exclude ID, date, and target
y = train_data["num_orders"]  # Target variable

# Split into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Save the train-test split for later use
X_train.to_csv("D:/sem 5/Mini Project/Prediction-and-Optimization-of-Smart-Dish-Specific-Demand-in-Restaurants-using-Machine-Learning/data/X_train.csv", index=False)
X_val.to_csv("D:/sem 5/Mini Project/Prediction-and-Optimization-of-Smart-Dish-Specific-Demand-in-Restaurants-using-Machine-Learning/data/X_val.csv", index=False)
y_train.to_csv("D:/sem 5/Mini Project/Prediction-and-Optimization-of-Smart-Dish-Specific-Demand-in-Restaurants-using-Machine-Learning/data/y_train.csv", index=False)
y_val.to_csv("D:/sem 5/Mini Project/Prediction-and-Optimization-of-Smart-Dish-Specific-Demand-in-Restaurants-using-Machine-Learning/data/y_val.csv", index=False)

print(f"Train and validation sets saved. Train: {len(X_train)}, Validation: {len(X_val)}.")


Data loaded with 2000000 rows.
Train and validation sets saved. Train: 1600000, Validation: 400000.


In [None]:
from xgboost import XGBRegressor

# Load prepared datasets
X_train = pd.read_csv("D:/sem 5/Mini Project/Prediction-and-Optimization-of-Smart-Dish-Specific-Demand-in-Restaurants-using-Machine-Learning/data/X_train.csv")
y_train = pd.read_csv("D:/sem 5/Mini Project/Prediction-and-Optimization-of-Smart-Dish-Specific-Demand-in-Restaurants-using-Machine-Learning/data/y_train.csv").squeeze()  # Convert to Series
X_val = pd.read_csv("D:/sem 5/Mini Project/Prediction-and-Optimization-of-Smart-Dish-Specific-Demand-in-Restaurants-using-Machine-Learning/data/X_val.csv")
y_val = pd.read_csv("D:/sem 5/Mini Project/Prediction-and-Optimization-of-Smart-Dish-Specific-Demand-in-Restaurants-using-Machine-Learning/data/y_val.csv").squeeze()  # Convert to Series

# Initialize the model with GPU support
model = XGBRegressor(
    n_estimators=100,          # Number of trees
    max_depth=10,              # Maximum depth of trees
    learning_rate=0.1,         # Learning rate
    subsample=0.8,             # Subsample ratio
    colsample_bytree=0.8,      # Feature sampling ratio
    tree_method="gpu_hist",    # Use GPU acceleration
    random_state=42            # Reproducibility
)

# Train the model
print("Training the model on GPU...")
model.fit(X_train, y_train)
print("Model training completed using GPU.")


KeyboardInterrupt: 

In [None]:
# Predict on validation set
y_val_pred = model.predict(X_val)

# Evaluate the model
mae = mean_absolute_error(y_val, y_val_pred)
rmse = mean_squared_error(y_val, y_val_pred, squared=False)
r2 = r2_score(y_val, y_val_pred)

print(f"Model Evaluation on Validation Set:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"R-squared (R2): {r2:.2f}")


In [None]:
import joblib

# Save the trained model
model_path = "D:/sem 5/Mini Project/Prediction-and-Optimization-of-Smart-Dish-Specific-Demand-in-Restaurants-using-Machine-Learning/models/random_forest_model.pkl"
joblib.dump(model, model_path)

print(f"Model saved to {model_path}")
