In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from pathlib import Path
import numpy as np

In [2]:
# Import the dataset
sales_df = pd.read_csv(Path('./Resources/total_food.csv'))
sales_df.head()

Unnamed: 0,RMS,OCC,ADR,FOOD_SALES,BEV_SALES
0,98.0,29.3,204.1,13543.0,9023.0
1,48.0,14.3,230.09,6234.0,5999.0
2,35.0,10.4,226.25,2359.0,1217.0
3,76.0,22.7,252.22,4440.0,3923.0
4,270.0,80.6,301.69,33383.0,34220.0


In [3]:
# Drop the "BEV_SALES" column
sales_df.drop(columns="BEV_SALES", inplace=True)
sales_df.head()

Unnamed: 0,RMS,OCC,ADR,FOOD_SALES
0,98.0,29.3,204.1,13543.0
1,48.0,14.3,230.09,6234.0
2,35.0,10.4,226.25,2359.0
3,76.0,22.7,252.22,4440.0
4,270.0,80.6,301.69,33383.0


In [4]:
# Delete any rows with negative values
sales_df = sales_df[sales_df['FOOD_SALES'] > 0]
sales_df



Unnamed: 0,RMS,OCC,ADR,FOOD_SALES
0,98.0,29.3,204.10,13543.0
1,48.0,14.3,230.09,6234.0
2,35.0,10.4,226.25,2359.0
3,76.0,22.7,252.22,4440.0
4,270.0,80.6,301.69,33383.0
...,...,...,...,...
254,160.0,47.8,192.38,6572.0
255,266.0,79.4,196.89,13415.0
256,313.0,93.4,201.98,18605.0
257,235.0,70.1,219.99,29557.0


In [5]:
# Set up test and training data
X = sales_df[['RMS', 'OCC', 'ADR']]
y = sales_df['FOOD_SALES']

# Use standard scaler to scale the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X)
X_scaled = scaler.transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, random_state=1)






In [6]:
# Train the model
regressor = LinearRegression()
regressor.fit(X_train, y_train)

LinearRegression()

In [7]:
# Make predictions on the test set
y_pred = regressor.predict(X_test)


In [8]:
# Evaluate the model's performance
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

Mean Squared Error: 111170850.00729406


In [9]:
# Calculate the root mean squared error
rmse = np.sqrt(mse)
print("Root Mean Squared Error:", rmse)

Root Mean Squared Error: 10543.758817769594


In [10]:
# Use a random forest regressor to predict the food sales
from sklearn.ensemble import RandomForestRegressor

# Train the model
regressor = RandomForestRegressor(n_estimators=100, random_state=0)
regressor.fit(X_train, y_train)

# Make predictions on the test set
y_pred = regressor.predict(X_test)

# Evaluate the model's performance
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# Calculate the root mean squared error
rmse = np.sqrt(mse)
print("Root Mean Squared Error:", rmse)

Mean Squared Error: 146887340.00706917
Root Mean Squared Error: 12119.708742666598


In [11]:
# Use a gradient boosting regressor to predict the food sales
from sklearn.ensemble import GradientBoostingRegressor

# Train the model
regressor = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=1, random_state=0, loss='squared_error')
regressor.fit(X_train, y_train)

# Make predictions on the test set
y_pred = regressor.predict(X_test)

# Evaluate the model's performance
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# Calculate the root mean squared error
rmse = np.sqrt(mse)
print("Root Mean Squared Error:", rmse)




Mean Squared Error: 121599757.71587124
Root Mean Squared Error: 11027.228015955381


In [12]:
# Use a support vector machine regressor to predict the food sales
from sklearn.svm import SVR

# Train the model

regressor = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=.1)
regressor.fit(X_train, y_train)

# Make predictions on the test set
y_pred = regressor.predict(X_test)

# Evaluate the model's performance
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# Calculate the root mean squared error
rmse = np.sqrt(mse)
print("Root Mean Squared Error:", rmse)

# Assess the accuracy of the model
regressor.score(X_test, y_test)




Mean Squared Error: 223723874.1085168
Root Mean Squared Error: 14957.40198391809


0.18888408477306307

In [18]:
# Use a neural network regressor to predict the food sales
from sklearn.neural_network import MLPRegressor

# Train the model
regressor = MLPRegressor(hidden_layer_sizes=(100,100,100), activation='relu', solver='adam', max_iter=100000)
regressor.fit(X_train, y_train)

# Make predictions on the test set
y_pred = regressor.predict(X_test)

# Evaluate the model's performance
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# Calculate the root mean squared error
rmse = np.sqrt(mse)
print("Root Mean Squared Error:", rmse)


Mean Squared Error: 142515333.2458372
Root Mean Squared Error: 11937.978608032317


In [19]:
import hvplot.pandas
import plotly.express as px
# Plot the predicted values against the actual values using hvplot
df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
df.hvplot.scatter(x='Actual', y='Predicted', xlim=(0, 100000), ylim=(0, 100000))


In [20]:
# Use a lasse regressor to predict the food sales
from sklearn.linear_model import Lasso

# Train the model
regressor = Lasso(alpha=0.1, max_iter=1e7)
regressor.fit(X_train, y_train)

# Make predictions on the test set
y_pred = regressor.predict(X_test)

# Evaluate the model's performance
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# Calculate the root mean squared error
rmse = np.sqrt(mse)
print("Root Mean Squared Error:", rmse)

Mean Squared Error: 110246297.99211048
Root Mean Squared Error: 10499.82371243015
