# **Food Security - Model Development**

## **Importing Cleaned Dataset**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# This line of code is reading the data cleaning code first and anticipates that the file exists on the mentioned path
%run '/content/drive/MyDrive/Colab Notebooks/name_of_data_cleaning_file.ipynb'

# **Data Visualizations**

In [None]:
dataset.columns

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [None]:
from sklearn.metrics._plot.confusion_matrix import ConfusionMatrixDisplay

grouped_data = dataset.groupby(['year', 'day_of_week'])['served_meals'].sum().reset_index()

years = dataset['year'].unique()
num_years = len(years)
fig = make_subplots(rows=num_years, cols=1, shared_xaxes=True, subplot_titles=[str(year) for year in years])

for i, year in enumerate(years):
    data_year = grouped_data[grouped_data['year'] == year]
    fig.add_trace(
        go.Bar(x=data_year['day_of_week'], y=data_year['served_meals']),
        row=i+1, col=1
    )

fig.update_layout(
    title="Served Meals Throughout the Week for Each Year",
    xaxis=dict(title="Day of Week"),
    yaxis=dict(title="Served Meals"),
    height=1000,
)

fig.show()

# DataScience Fellows in DataLab:
# Eden Alem - Computer Science Major | Pronouns: She/Her
# Orion Gant - Math Major; Minor in Music, German and Women & Gender Studies | Pronouns: She/Her
# Carson Coody - Economics Major; Minor Business and Computer Science | Pronouns: He/Him
# Bryce Comer - Computer Science Major | Pronouns: He/Him



In [None]:
from sklearn.metrics._plot.confusion_matrix import ConfusionMatrixDisplay

cong_years =  [2016, 2017, 2018, 2019, 2022]
grouped_data = dataset[dataset['year'].isin(cong_years)].groupby(['year', 'day_of_week'])['served_meals'].sum().reset_index()

num_years = len(cong_years)
fig = make_subplots(rows=num_years, cols=1, shared_xaxes=True, subplot_titles=[str(year) for year in cong_years])

for i, year in enumerate(cong_years):
    data_year = grouped_data[grouped_data['year'] == year]
    fig.add_trace(
        go.Bar(x=data_year['day_of_week'], y=data_year['served_meals']),
        row=i+1, col=1
    )

fig.update_layout(
    title="Served Meals Throughout the Week for Each Congregate Year",
    xaxis=dict(title="Day of Week"),
    yaxis=dict(title="Served Meals"),
    height=1000,
)

fig.show()


In [None]:
from sklearn.metrics._plot.confusion_matrix import ConfusionMatrixDisplay

noncong_years = [2020, 2021, 2023]
grouped_data = dataset[dataset['year'].isin(noncong_years)].groupby(['year', 'day_of_week'])['served_meals'].sum().reset_index()

num_years = len(noncong_years)
fig = make_subplots(rows=num_years, cols=1, shared_xaxes=True, subplot_titles=[str(year) for year in noncong_years])

for i, year in enumerate(noncong_years):
    data_year = grouped_data[grouped_data['year'] == year]
    fig.add_trace(
        go.Bar(x=data_year['day_of_week'], y=data_year['served_meals']),
        row=i+1, col=1
    )

fig.update_layout(
    title="Served Meals Throughout the Week for Each Non Congregate Year",
    xaxis=dict(title="Day of Week"),
    yaxis=dict(title="Served Meals"),
    height=1000,
)

fig.show()


In [None]:
from sklearn.metrics._plot.confusion_matrix import ConfusionMatrixDisplay

grouped_data = dataset.groupby(['year', 'day_of_week'])['wasted_meals'].sum().reset_index()

years = dataset['year'].unique()
num_years = len(years)
fig = make_subplots(rows=num_years, cols=1, shared_xaxes=True, subplot_titles=[str(year) for year in years])

for i, year in enumerate(years):
    data_year = grouped_data[grouped_data['year'] == year]
    fig.add_trace(
        go.Bar(x=data_year['day_of_week'], y=data_year['wasted_meals']),
        row=i+1, col=1
    )

fig.update_layout(
    title="Wasted Meals Throughout the Week for Each Year",
    xaxis=dict(title="Day of Week"),
    yaxis=dict(title="Wasted Meals"),
    height=1000,
)

fig.show()


In [None]:
from sklearn.metrics._plot.confusion_matrix import ConfusionMatrixDisplay

cong_years =  [2016, 2017, 2018, 2019, 2022]
grouped_data = dataset[dataset['year'].isin(cong_years)].groupby(['year', 'day_of_week'])['wasted_meals'].sum().reset_index()

num_years = len(cong_years)
fig = make_subplots(rows=num_years, cols=1, shared_xaxes=True, subplot_titles=[str(year) for year in cong_years])

for i, year in enumerate(cong_years):
    data_year = grouped_data[grouped_data['year'] == year]
    fig.add_trace(
        go.Bar(x=data_year['day_of_week'], y=data_year['wasted_meals']),
        row=i+1, col=1
    )

fig.update_layout(
    title="Wasted Meals Throughout the Week for Each Congregate Year",
    xaxis=dict(title="Day of Week"),
    yaxis=dict(title="Wasted Meals"),
    height=1000,
)

fig.show()


In [None]:
from sklearn.metrics._plot.confusion_matrix import ConfusionMatrixDisplay

noncong_years = [2020, 2021, 2023]
grouped_data = dataset[dataset['year'].isin(noncong_years)].groupby(['year', 'day_of_week'])['wasted_meals'].sum().reset_index()

num_years = len(noncong_years)
fig = make_subplots(rows=num_years, cols=1, shared_xaxes=True, subplot_titles=[str(year) for year in noncong_years])

for i, year in enumerate(noncong_years):
    data_year = grouped_data[grouped_data['year'] == year]
    fig.add_trace(
        go.Bar(x=data_year['day_of_week'], y=data_year['wasted_meals']),
        row=i+1, col=1
    )

fig.update_layout(
    title="Wasted Meals Throughout the Week for Each Non Congregate Year",
    xaxis=dict(title="Day of Week"),
    yaxis=dict(title="Wasted Meals"),
    height=1000,
)

fig.show()


In [None]:
from sklearn.metrics._plot.confusion_matrix import ConfusionMatrixDisplay

grouped_data = dataset.groupby(['year', 'week_number'])['served_meals'].sum().reset_index()

years = dataset['year'].unique()
num_years = len(years)
fig = make_subplots(rows=num_years, cols=1, shared_xaxes=True, subplot_titles=[str(year) for year in years])

for i, year in enumerate(years):
    data_year = grouped_data[grouped_data['year'] == year]
    fig.add_trace(
        go.Bar(x=data_year['week_number'], y=data_year['served_meals']),
        row=i+1, col=1
    )

fig.update_layout(
    title="Served Meals Throughout the Weeks for Each Year",
    xaxis=dict(title="Week Operation"),
    yaxis=dict(title="Served Meals"),
    height=1000,
)

fig.show()


In [None]:
from sklearn.metrics._plot.confusion_matrix import ConfusionMatrixDisplay

cong_years =  [2016, 2017, 2018, 2019, 2022]
grouped_data = dataset[dataset['year'].isin(cong_years)].groupby(['year', 'week_number'])['served_meals'].sum().reset_index()

num_years = len(cong_years)
fig = make_subplots(rows=num_years, cols=1, shared_xaxes=True, subplot_titles=[str(year) for year in cong_years])

for i, year in enumerate(cong_years):
    data_year = grouped_data[grouped_data['year'] == year]
    fig.add_trace(
        go.Bar(x=data_year['week_number'], y=data_year['served_meals']),
        row=i+1, col=1
    )

fig.update_layout(
    title="Served Meals Throughout the Weeks for Each Congregate Year",
    xaxis=dict(title="Week Operation"),
    yaxis=dict(title="Served Meals"),
    height=1000,
)

fig.show()


In [None]:
from sklearn.metrics._plot.confusion_matrix import ConfusionMatrixDisplay

noncong_years =  [2020, 2021, 2023]
grouped_data = dataset[dataset['year'].isin(noncong_years)].groupby(['year', 'week_number'])['served_meals'].sum().reset_index()

num_years = len(noncong_years)
fig = make_subplots(rows=num_years, cols=1, shared_xaxes=True, subplot_titles=[str(year) for year in noncong_years])

for i, year in enumerate(noncong_years):
    data_year = grouped_data[grouped_data['year'] == year]
    fig.add_trace(
        go.Bar(x=data_year['week_number'], y=data_year['served_meals']),
        row=i+1, col=1
    )

fig.update_layout(
    title="Served Meals Throughout the Weeks for Each Non Congregate Year",
    xaxis=dict(title="Week Operation"),
    yaxis=dict(title="Served Meals"),
    height=1000,
)

fig.show()


In [None]:
from sklearn.metrics._plot.confusion_matrix import ConfusionMatrixDisplay

grouped_data = dataset.groupby(['year', 'week_number'])['wasted_meals'].sum().reset_index()

years = dataset['year'].unique()
num_years = len(years)
fig = make_subplots(rows=num_years, cols=1, shared_xaxes=True, subplot_titles=[str(year) for year in years])

for i, year in enumerate(years):
    data_year = grouped_data[grouped_data['year'] == year]
    fig.add_trace(
        go.Bar(x=data_year['week_number'], y=data_year['wasted_meals']),
        row=i+1, col=1
    )

fig.update_layout(
    title="Wasted Meals Throughout the Weeks for Each Year",
    xaxis=dict(title="Week Operation"),
    yaxis=dict(title="Wasted Meals"),
    height=1000,
)

fig.show()


In [None]:
from sklearn.metrics._plot.confusion_matrix import ConfusionMatrixDisplay

cong_years =  [2016, 2017, 2018, 2019, 2022]
grouped_data = dataset[dataset['year'].isin(cong_years)].groupby(['year', 'week_number'])['wasted_meals'].sum().reset_index()

num_years = len(cong_years)
fig = make_subplots(rows=num_years, cols=1, shared_xaxes=True, subplot_titles=[str(year) for year in cong_years])

for i, year in enumerate(cong_years):
    data_year = grouped_data[grouped_data['year'] == year]
    fig.add_trace(
        go.Bar(x=data_year['week_number'], y=data_year['wasted_meals']),
        row=i+1, col=1
    )

fig.update_layout(
    title="Wasted Meals Throughout the Weeks for Each Congregate Year",
    xaxis=dict(title="Week Operation"),
    yaxis=dict(title="Wasted Meals"),
    height=1000,
)

fig.show()


In [None]:
from sklearn.metrics._plot.confusion_matrix import ConfusionMatrixDisplay

noncong_years =  [2020, 2021, 2023]
grouped_data = dataset[dataset['year'].isin(noncong_years)].groupby(['year', 'week_number'])['wasted_meals'].sum().reset_index()

num_years = len(noncong_years)
fig = make_subplots(rows=num_years, cols=1, shared_xaxes=True, subplot_titles=[str(year) for year in noncong_years])

for i, year in enumerate(noncong_years):
    data_year = grouped_data[grouped_data['year'] == year]
    fig.add_trace(
        go.Bar(x=data_year['week_number'], y=data_year['wasted_meals']),
        row=i+1, col=1
    )

fig.update_layout(
    title="Wasted Meals Throughout the Weeks for Each Non Congregate Year",
    xaxis=dict(title="Week Operation"),
    yaxis=dict(title="Wasted Meals"),
    height=1000,
)

fig.show()


# **Feature Engineering**

In [None]:
dataset.columns

In [None]:
data = dataset.groupby('region')['served_meals'].sum().reset_index()
data.head()

In [None]:
total_meals = dataset['served_meals'].sum()
region_meals = dataset.groupby('region')['served_meals'].sum()
percent_dist = (region_meals / total_meals) * 100
percent_dist.reset_index()

for v in  zip(percent_dist.index, percent_dist):
  print(v)

In [None]:
df1 = dataset.groupby(['week_serial_number', 'region']).nunique()['day_of_week'].reset_index(name='number_of_days_operated')
df2 = df1.groupby('week_serial_number').sum()['number_of_days_operated']

In [None]:
regions_data = dataset.groupby(['week_serial_number']).nunique()['region'].reset_index(name='number_of_regions')
sites_data = dataset.groupby(['week_serial_number']).nunique()['site_name'].reset_index(name='number_of_sites')

In [None]:
temp = pd.merge(dataset, regions_data, on=['week_serial_number'])
df3 = pd.merge(temp, sites_data, on=['week_serial_number'])

In [None]:
df4=df3[['ordered_meals',
       'previous_day_meals', 'first_meals', 'second_meals',
       'program_adult_meals', 'nonprogram_adult_meals', 'served_meals',
       'damaged_meals', 'leftover_meals','unserved_people','region', 'available_meals', 'wasted_meals',
       'week_serial_number', 'temperature', 'wind',
       'humidity', 'precipitation', 'number_of_regions',
       'number_of_sites', 'year', 'event', 'size']]

In [None]:
df4['unserved_people'].fillna(0, inplace=True)

In [None]:
df4['meals_needed'] = df4['served_meals'] + df4['unserved_people']

In [None]:
df4.shape

In [None]:
df4.columns

In [None]:
df4.fillna(0, inplace=True)
df6 = pd.get_dummies(data=df4, columns=['region'])

md1 = df6.groupby('week_serial_number')['meals_needed', 'event', 'size'].agg({'meals_needed': 'sum', 'event': 'sum', 'size': lambda x: x.unique().sum()}).reset_index()

binary_columns = ['event']
for col in binary_columns:
  md1.loc[md1[col] >= 1, col] = 1

md2 = df6.groupby('week_serial_number')[['number_of_regions', 'number_of_sites', 'year', 'temperature', 'humidity', 'precipitation', 'wind']].mean()

md2['week_order'] = md2.groupby(['year']).cumcount() + 1

md3=df6.drop(['meals_needed','temperature', 'humidity', 'precipitation', 'wind',
                   'number_of_regions', 'number_of_sites', 'year', 'event', 'size'], axis=1)
md4=md3.groupby('week_serial_number').sum().reset_index()

temp=pd.merge(md1,md2, on='week_serial_number')
df7=pd.merge(temp,md4, on='week_serial_number')
df7.head()

In [None]:
df = pd.merge(df7, df2, on=['week_serial_number'])

In [None]:
df.shape

In [None]:
df.head()

In [None]:
df['meals_needed_2weeks_prior'] = df[['meals_needed']].shift(2)
df['meals_available_2weeks_prior'] = df[['available_meals']].shift(2)
df['meals_served_2weeks_prior'] = df[['served_meals']].shift(2)
df['people_unserved_2weeks_prior'] = df[['unserved_people']].shift(2)
# df['meals_wasted_2weeks_prior'] = df[['wasted_meals']].shift(2)

df['meals_needed_2weeks_prior'].fillna(df['meals_needed_2weeks_prior'].mean(), inplace=True)
df['meals_available_2weeks_prior'].fillna(df['meals_available_2weeks_prior'].mean(), inplace=True)
df['meals_served_2weeks_prior'].fillna(df['meals_served_2weeks_prior'].mean(), inplace=True)
df['people_unserved_2weeks_prior'].fillna(df['people_unserved_2weeks_prior'].mean(), inplace=True)
# df['meals_wasted_2weeks_prior'].fillna(df['meals_wasted_2weeks_prior'].mean(), inplace=True)

In [None]:
df['prev_year_meals_needed_this_week'] = df.groupby('week_order')['meals_needed'].shift()
df['prev_year_meals_needed_this_week'].fillna(df['prev_year_meals_needed_this_week'].mean(), inplace=True)

# df['prev_year_meals_available_this_week'] = df.groupby('week_order')['available_meals'].shift()
# df['prev_year_meals_available_this_week'].fillna(df.groupby('week_order')['available_meals'].mean(), inplace=True)

# df['prev_year_meals_served_this_week'] = df.groupby('week_order')['served_meals'].shift()
# df['prev_year_meals_served_this_week'].fillna(df.groupby('week_order')['served_meals'].mean(), inplace=True)

# df['prev_year_people_unserved_this_week'] = df.groupby('week_order')['unserved_people'].shift()
# df['prev_year_people_unserved_this_week'].fillna(df.groupby('week_order')['unserved_people'].mean(), inplace=True)

# df['prev_year_meals_wasted_this_week'] = df.groupby('week_order')['wasted_meals'].shift()
# df['prev_year_meals_wasted_this_week'].fillna(df.groupby('week_order')['wasted_meals'].mean(), inplace=True)

In [None]:
# df.tail(10)
# df = df[:59]
# df.shape
df
#weather forecasts 2 weeks ahead
#number of sites
#ordered meals

## **Prepping Data for Prediction**

In [None]:
from xgboost import XGBRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [None]:
df.head()

In [None]:
df_subset = df.drop(['number_of_sites', 'year', 'ordered_meals', 'previous_day_meals', 'first_meals', 'second_meals',
       'program_adult_meals', 'nonprogram_adult_meals','damaged_meals', 'leftover_meals','available_meals',
       'served_meals','unserved_people', 'wasted_meals' ], axis=1)
df_subset.columns

In [None]:
# df_corr = df_subset[['meals_needed', 'temperature', 'humidity',
#        'precipitation', 'event', 'number_of_regions', 'week_order', 'number_of_days_operated',
#        'meals_needed_2weeks_prior', 'people_unserved_2weeks_prior',
#        'prev_year_meals_needed_this_week']]

In [None]:
# sns.pairplot(df_corr)

In [None]:
# corr = df_corr.corr()
# sns.heatmap(corr )

In [None]:
df_subset.head()

In [None]:
df_subset.columns

In [None]:
df_subset.shape

In [None]:
test_data = df_subset.tail(10)
train_data = df_subset.head(len(df_subset) - 10)

In [None]:
def model_building(train_data, test_data):
  X_train = train_data.drop(['meals_needed', 'week_serial_number'], axis=1)
  y_train = train_data['meals_needed']
  X_test = test_data.drop(['meals_needed', 'week_serial_number'], axis=1)
  y_test = test_data['meals_needed']

  return X_train, X_test, y_train, y_test


X_train, X_test, y_train, y_test = model_building(train_data, test_data)

In [None]:
X_test.shape

## **Prediction using XGBRegression**

In [None]:
xgbr = XGBRegressor()
xgbr.fit(X_train, y_train)

predictions = xgbr.predict(X_test)

mae = mean_absolute_error(y_test, predictions)
print("Mean Absolute Error: ", mae)

rmse = mean_squared_error(y_test, predictions, squared=False)
print("Root Mean Squared Error (RMSE):", rmse)

mape = np.mean(np.abs((y_test - predictions) / y_test)) * 100

print("Mean Absolute Percentage Error (MAPE):", mape)

In [None]:
test_data = pd.concat([X_test, y_test], axis=1)
test_data['predicted_meals'] = predictions
test_data['error_xgbr']=test_data['predicted_meals'] - test_data['meals_needed']
test_data

In [None]:
test_data['available_meals'] = df['available_meals'].tail(16)
test_data['served_meals'] = df['served_meals'].tail(16)
test_data['human_error'] = test_data['available_meals'] - test_data['served_meals']

test_data

In [None]:
import plotly.graph_objects as go

labels = test_data.index.tolist()  # Use row index as labels
values_meals_needed = test_data['meals_needed']
values_predicted_meals = test_data['predicted_meals']
values_available_meals = test_data['available_meals']

data = [
    go.Bar(
        name='Meals Needed',
        y=labels,
        x=values_meals_needed,
        orientation='h',
        offsetgroup=0
    ),
    go.Bar(
        name='Predicted Meals',
        y=labels,
        x=values_predicted_meals,
        orientation='h',
        offsetgroup=1
    ),
    go.Bar(
        name='Available Meals',
        y=labels,
        x=values_available_meals,
        orientation='h',
        offsetgroup=2
    )
]

layout = go.Layout(
    title='Meal Demand',
    xaxis=dict(title='Number of Meals'),
    yaxis=dict(title='Rows'),
    height=1000,
    barmode='group'
)

figure = go.Figure(data=data, layout=layout)
figure.show()


In [None]:
X_test.columns

In [None]:
test_data1=test_data.sort_values('meals_needed', ascending=True)
plt.plot(test_data1['meals_needed'], test_data1['predicted_meals'])
plt.plot(test_data1['meals_needed'], test_data1['meals_needed'])

In [None]:
xgbr.feature_importances_

In [None]:
feat_importances = pd.DataFrame(xgbr.feature_importances_, index=X_train.columns).reset_index().sort_values(0,ascending=False)
feat_importances.head(5)
plt.figure(figsize=(10,10))
plt.barh(feat_importances['index'], feat_importances[0])

# **Prediction using Gradient Boosting Machines**

In [None]:
from sklearn.ensemble import GradientBoostingRegressor

gbm = GradientBoostingRegressor()

gbm.fit(X_train, y_train)

predictions = gbm.predict(X_test)

mae = mean_absolute_error(y_test, predictions)
print("Mean Absolute Error: ", mae)

rmse = mean_squared_error(y_test, predictions, squared=False)
print("Root Mean Squared Error (RMSE):", rmse)

mape = np.mean(np.abs((y_test - predictions) / y_test)) * 100

print("Mean Absolute Percentage Error (MAPE):", mape)

In [None]:
test_data_gbm = pd.concat([X_test, y_test], axis=1)

test_data_gbm['predicted_meals_gbm'] = predictions
test_data_gbm['error_gbm']=test_data_gbm['predicted_meals_gbm'] - test_data_gbm['meals_needed']

test_data_gbm['available_meals'] = df['available_meals'].tail(16)
test_data_gbm['served_meals'] = df['served_meals'].tail(16)
test_data_gbm['human_error'] = test_data_gbm['available_meals'] - test_data_gbm['served_meals']

test_data_gbm

In [None]:
import plotly.graph_objects as go

labels = test_data_gbm.index.tolist()  # Use row index as labels
values_meals_needed = test_data_gbm['meals_needed']
values_predicted_meals = test_data_gbm['predicted_meals_gbm']
values_available_meals = test_data_gbm['available_meals']

data = [
    go.Bar(
        name='Meals Needed',
        y=labels,
        x=values_meals_needed,
        orientation='h',
        offsetgroup=0
    ),
    go.Bar(
        name='Predicted Meals',
        y=labels,
        x=values_predicted_meals,
        orientation='h',
        offsetgroup=1
    ),
    go.Bar(
        name='Available Meals',
        y=labels,
        x=values_available_meals,
        orientation='h',
        offsetgroup=2
    )
]

layout = go.Layout(
    title='Meal Demand',
    xaxis=dict(title='Number of Meals'),
    yaxis=dict(title='Rows'),
    height=1000,
    barmode='group'
)

figure = go.Figure(data=data, layout=layout)
figure.show()


In [None]:
test_data2=test_data_gbm.sort_values('meals_needed', ascending=True)
plt.plot(test_data2['meals_needed'], test_data2['predicted_meals_gbm'])
plt.plot(test_data2['meals_needed'], test_data2['meals_needed'])

# **Prediction using Random Forest**

In [None]:
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor()

rf.fit(X_train, y_train)

predictions = rf.predict(X_test)
mae = mean_absolute_error(y_test, predictions)
print("Mean Absolute Error: ", mae)

rmse = mean_squared_error(y_test, predictions, squared=False)
print("Root Mean Squared Error (RMSE):", rmse)

mape = np.mean(np.abs((y_test - predictions) / y_test)) * 100

print("Mean Absolute Percentage Error (MAPE):", mape)

In [None]:
test_data_rf = pd.concat([X_test, y_test], axis=1)

test_data_rf['predicted_meals_rf'] = predictions
test_data_rf['error_rf']=test_data_rf['predicted_meals_rf'] - test_data_rf['meals_needed']

test_data_rf['available_meals'] = df['available_meals'].tail(16)
test_data_rf['served_meals'] = df['served_meals'].tail(16)
test_data_rf['human_error'] = test_data_rf['available_meals'] - test_data_rf['served_meals']
test_data_rf

In [None]:
import plotly.graph_objects as go

labels = test_data_rf.index.tolist()  # Use row index as labels
values_meals_needed = test_data_rf['meals_needed']
values_predicted_meals = test_data_rf['predicted_meals_rf']
values_available_meals = test_data_rf['available_meals']

data = [
    go.Bar(
        name='Meals Needed',
        y=labels,
        x=values_meals_needed,
        orientation='h',
        offsetgroup=0
    ),
    go.Bar(
        name='Predicted Meals',
        y=labels,
        x=values_predicted_meals,
        orientation='h',
        offsetgroup=1
    ),
    go.Bar(
        name='Available Meals',
        y=labels,
        x=values_available_meals,
        orientation='h',
        offsetgroup=2
    )
]

layout = go.Layout(
    title='Meal Demand',
    xaxis=dict(title='Number of Meals'),
    yaxis=dict(title='Rows'),
    height=1000,
    barmode='group'
)

figure = go.Figure(data=data, layout=layout)
figure.show()


In [None]:
test_data3=test_data_rf.sort_values('meals_needed', ascending=True)
plt.plot(test_data3['meals_needed'], test_data3['predicted_meals_rf'])
plt.plot(test_data3['meals_needed'], test_data3['meals_needed'])

# **Congregate Data Prediction**

In [None]:
df_cong = df[df['year'].isin([2016, 2017, 2018, 2019, 2022])].reset_index()

df_subset1 = df_cong.drop(['number_of_sites', 'year', 'ordered_meals', 'previous_day_meals', 'first_meals', 'second_meals',
       'program_adult_meals', 'nonprogram_adult_meals','damaged_meals', 'leftover_meals','available_meals',
       'served_meals','unserved_people', 'wasted_meals' ], axis=1)
df_subset1.columns

In [None]:
test_data1 = df_subset1.tail(10)
train_data1 = df_subset1.head(len(df_subset1) - 10)

In [None]:
X_train1, X_test1, y_train1, y_test1 = model_building(train_data1, test_data1)

In [None]:
xgbr1 = XGBRegressor()
xgbr1.fit(X_train1, y_train1)

predictions = xgbr1.predict(X_test1)

mae = mean_absolute_error(y_test1, predictions)
print("Mean Absolute Error: ", mae)

rmse = mean_squared_error(y_test1, predictions, squared=False)
print("Root Mean Squared Error (RMSE):", rmse)

mape = np.mean(np.abs((y_test1 - predictions) / y_test1)) * 100

print("Mean Absolute Percentage Error (MAPE):", mape)

In [None]:
test_data_cong = pd.concat([X_test1, y_test1], axis=1)

test_data_cong['predicted_meals_cong'] = predictions
test_data_cong['error_cong']=test_data_cong['predicted_meals_cong'] - test_data_cong['meals_needed']

test_data_cong['available_meals'] = df_cong['available_meals'].tail(10)
test_data_cong['served_meals'] = df_cong['served_meals'].tail(10)
test_data_cong['human_error'] = test_data_cong['available_meals'] - test_data_cong['served_meals']

test_data_cong

In [None]:
import plotly.graph_objects as go

labels = test_data_cong.index.tolist()  # Use row index as labels
values_meals_needed = test_data_cong['meals_needed']
values_predicted_meals = test_data_cong['predicted_meals_cong']
values_available_meals = test_data_cong['available_meals']

data = [
    go.Bar(
        name='Meals Needed',
        y=labels,
        x=values_meals_needed,
        orientation='h',
        offsetgroup=0
    ),
    go.Bar(
        name='Predicted Meals',
        y=labels,
        x=values_predicted_meals,
        orientation='h',
        offsetgroup=1
    ),
    go.Bar(
        name='Available Meals',
        y=labels,
        x=values_available_meals,
        orientation='h',
        offsetgroup=2
    )
]

layout = go.Layout(
    title='Meal Demand',
    xaxis=dict(title='Number of Meals'),
    yaxis=dict(title='Rows'),
    height=1000,
    barmode='group'
)

figure = go.Figure(data=data, layout=layout)
figure.show()


In [None]:
test_data4=test_data_cong.sort_values('meals_needed', ascending=True)
plt.plot(test_data4['meals_needed'], test_data4['predicted_meals_cong'])
plt.plot(test_data4['meals_needed'], test_data4['meals_needed'])

# **Non-Congregate Data Prediction**

In [None]:
df_noncong = df[df['year'].isin([2020, 2021])]

df_subset2 = df_noncong.drop(['number_of_sites', 'year', 'ordered_meals', 'previous_day_meals', 'first_meals', 'second_meals',
       'program_adult_meals', 'nonprogram_adult_meals','damaged_meals', 'leftover_meals','available_meals',
       'served_meals','unserved_people', 'wasted_meals' ], axis=1)
df_subset2.columns

In [None]:
test_data2 = df_subset2.tail(10)
train_data2 = df_subset2.head(len(df_subset2) - 10)

In [None]:
X_train2, X_test2, y_train2, y_test2 = model_building(train_data2, test_data2)

In [None]:
xgbr2 = XGBRegressor()
xgbr2.fit(X_train2, y_train2)

predictions = xgbr2.predict(X_test2)

mae = mean_absolute_error(y_test2, predictions)
print("Mean Absolute Error: ", mae)

rmse = mean_squared_error(y_test2, predictions, squared=False)
print("Root Mean Squared Error (RMSE):", rmse)

mape = np.mean(np.abs((y_test2 - predictions) / y_test2)) * 100

print("Mean Absolute Percentage Error (MAPE):", mape)

In [None]:
test_data_noncong = pd.concat([X_test2, y_test2], axis=1)

test_data_noncong['predicted_meals_noncong'] = predictions
test_data_noncong['error_noncong']=test_data_noncong['predicted_meals_noncong'] - test_data_noncong['meals_needed']

test_data_noncong['available_meals'] = df_noncong['available_meals'].tail(10)
test_data_noncong['served_meals'] = df_noncong['served_meals'].tail(10)
test_data_noncong['human_error'] = test_data_cong['available_meals'] - test_data_cong['served_meals']

test_data_noncong

In [None]:
import plotly.graph_objects as go

labels = test_data_noncong.index.tolist()  # Use row index as labels
values_meals_needed = test_data_noncong['meals_needed']
values_predicted_meals = test_data_noncong['predicted_meals_noncong']
values_available_meals = test_data_noncong['available_meals']

data = [
    go.Bar(
        name='Meals Needed',
        y=labels,
        x=values_meals_needed,
        orientation='h',
        offsetgroup=0
    ),
    go.Bar(
        name='Predicted Meals',
        y=labels,
        x=values_predicted_meals,
        orientation='h',
        offsetgroup=1
    ),
    go.Bar(
        name='Available Meals',
        y=labels,
        x=values_available_meals,
        orientation='h',
        offsetgroup=2
    )
]

layout = go.Layout(
    title='Meal Demand',
    xaxis=dict(title='Number of Meals'),
    yaxis=dict(title='Rows'),
    height=1000,
    barmode='group'
)

figure = go.Figure(data=data, layout=layout)
figure.show()


In [None]:
test_data5=test_data_noncong.sort_values('meals_needed', ascending=True)
plt.plot(test_data5['meals_needed'], test_data5['predicted_meals_noncong'])
plt.plot(test_data5['meals_needed'], test_data5['meals_needed'])

# **Error Function**

In [None]:
def error_function(error_col):
  value = 0
  for error in error_col:
    if int(error) < 0:
      value += (7 * int(abs(error)))
    else:
      value += int(error)

  return value

value = error_function(test_data['error_xgbr'])
print(value)
value = error_function(test_data_gbm['error_gbm'])
print(value)
value = error_function(test_data_rf['error_rf'])
print(value)

In [None]:
values = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 200, 300, 400, 500, 600, 700]

predicted_meals = list(test_data['predicted_meals'])
meals_needed = list(test_data['meals_needed'])

n = len(predicted_meals)
padding = [0] * n
for i in range(n):
  for j in range(n):
    if (meals_needed[i] >= (predicted_meals[i] - values[j])) and (meals_needed[i] <= (predicted_meals[i] + values[j])):
      padding[i] += 1

print(padding)

# 100 - 10%
# 150 - 20%
# 200 - 30%
# 250 - 30%
# 300 - 50%
# 350 - 50%
# 600 - 60%
# 700 - 80%



# **Integration with the Dashboard**

In [None]:
X_test.columns

In [None]:
from collections import defaultdict
import urllib.request
import csv
import codecs

try:
    ResultBytes = urllib.request.urlopen("https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/Sewanee?include=fcst%2Cobs%2Chistfcst%2Cstats%2Cdays&key=3QCZSDMUE4G8VWNUXHY7ZZEMN&options=beta&contentType=csv")

    # Parse the results as CSV
    CSVText = csv.reader(codecs.iterdecode(ResultBytes, 'utf-8'))

    # Skip the header row
    next(CSVText)

    # Process each row of the CSV data
    d=defaultdict(list)
    for row in (CSVText):
        d['date'].append(row[1])
        d['temperature'].append(row[4])
        d['humidity'].append(row[9])
        d['precipitation'].append(row[10])
        d['wind'].append(row[17])

except urllib.error.HTTPError as e:
    ErrorInfo = e.read().decode()
    print('Error code:', e.code, ErrorInfo)
    sys.exit()
except urllib.error.URLError as e:
    ErrorInfo = e.read().decode()
    print('Error code:', e.code, ErrorInfo)
    sys.exit()

del(d['date'])
print(d)


In [None]:
for i in d.keys():
  d[i]=[float(j) for j in d[i]]

unseen_data={}

unseen_data['temperature']=np.mean(np.array(d['temperature'][7:]))

unseen_data['humidity']=np.mean(np.array(d['humidity'][7:]))

unseen_data['wind']=np.mean(np.array(d['wind'][7:]))

unseen_data['precipitation']=np.mean(np.array(d['precipitation'][7:]))

def region_cols(input_list):
  d={}
  regions=['region_Altamont',
       'region_Beersheba Springs', 'region_Coalmont',
       'region_Coalmont-Altamont', 'region_Decherd',
       'region_Downtown Winchester', 'region_Gruetli-Palmer', 'region_Midway',
       'region_Monteagle', 'region_Pelham', 'region_Rural Decherd',
       'region_Sewanee', 'region_Sherwood', 'region_Tracy City',
       'region_Winchester']
  for i in regions:
    if i in input_list:
      d[i]=1
    else:
      d[i]=0
  return d

unseen_data['event']= 0
unseen_data['week_order']= 1
unseen_data['number_of_regions']=2
unseen_data['number_of_days_operated']=2

from datetime import date
last_yr=date.today().year -1
unseen_data['prev_year_meals_needed_this_week']=int(df[(df['year']==last_yr) & (df['week_order']==unseen_data['week_order'])]['meals_needed'])
unseen_data['meals_needed_2weeks_prior']=float(df[df['week_serial_number']==df['week_serial_number'].max()]['meals_needed'])
unseen_data['meals_available_2weeks_prior']=float(df[df['week_serial_number']==df['week_serial_number'].max()]['available_meals'])
unseen_data['meals_served_2weeks_prior']=float(df[df['week_serial_number']==df['week_serial_number'].max()]['served_meals'])
unseen_data['people_unserved_2weeks_prior']=float(df[df['week_serial_number']==df['week_serial_number'].max()]['unserved_people'])

input_list=['Altamont', 'Decherd']
input_list1=['region_'+i for i in input_list]
region_dict=region_cols(input_list1)

unseen_data['size'] = 0

for i in input_list:
  size = region_size[region_size['region'] == i]['size']
  unseen_data['size'] += size



In [None]:
test_d = {**unseen_data, **region_dict}
x_test=pd.DataFrame(test_d, index=[0])
x_test

In [None]:
x_test['temperature']=x_test['temperature'].apply(lambda x: 0 if x <  threshold_temp[0]
                                                                                  else (1 if (x <  threshold_temp[1])
                                                                                  else (2 if x <  threshold_temp[2] else 3)))

x_test['humidity']=x_test['humidity'].apply(lambda x: 0 if x <  threshold_humidity[0]
                                                                                  else (1 if (x <  threshold_humidity[1])
                                                                                  else (2 if x <  threshold_humidity[2] else 3)))

x_test['wind']=x_test['wind'].apply(lambda x: 0 if x <  threshold_wind[0]
                                                                                  else (1 if (x <  threshold_wind[1])
                                                                                  else (2 if x <  threshold_wind[2] else 3)))
x_test['precipitation']=x_test['precipitation'].apply(lambda x: 0 if x <  threshold_prec[0]
                                                                                  else (1 if (x <  threshold_prec[1])
                                                                                  else (2 if x <  threshold_prec[2] else 3)))

x_test

In [None]:
x_test=x_test[X_test.columns]
x_test

In [None]:
X_test.info()

In [None]:
xgbr.predict(x_test)

In [None]:
df.columns