# **Importing Code**

In [None]:
from google.colab import drive
drive.mount('/content/drive')
import pandas as pd
import seaborn as sns
import plotly.express as px


%run '/content/drive/MyDrive/Colab Notebooks/Data Cleaning - Food Security.ipynb'


In [None]:
meals_dataset.columns

In [None]:
import os
os.listdir('/content/drive/MyDrive/Colab Notebooks')

In [None]:
## Inputing data for 2023 and validation
SHEET_ID = 'REDACTED'
SHEET_NAME6 = 'REDACTED'
SHEET_NAME7 = 'REDACTED'
url6 = f'https://docs.google.com/spreadsheets/d/{SHEET_ID}/gviz/tq?tqx=out:csv&sheet={SHEET_NAME6}'
url7 = f'https://docs.google.com/spreadsheets/d/{SHEET_ID}/gviz/tq?tqx=out:csv&sheet={SHEET_NAME7}'
validating = pd.read_csv(url6, index_col=0)
data_test = pd.read_csv(url7, index_col=0)

In [None]:
dataset.columns

# **Reading Data**

In [None]:
## Adding column for weeks in operations
week_number_dict_min = dict(dataset.groupby('year')['week_number'].min())

In [None]:
def week(row):
  return row['week_number'] - week_number_dict_min[row['year']] + 1

dataset['Relative_Week_Number'] = dataset.apply(week, axis=1)

dataset.head()

## **Data Insights**

**Insight 1**

In [None]:
dataset['Percentage of Wasted'] = dataset['served_meals'].sum() / dataset['ordered_meals'].sum() * 100

In [None]:
site_meals_served = year2018.groupby('Relative_Week_Number').mean()[['served_meals', 'unserved_people']].reset_index()
plt.figure(figsize=(8,8))
sns.lineplot(data=site_meals_served, x='Relative_Week_Number', y='served_meals', label='People Served')
sns.lineplot(data=site_meals_served, x='Relative_Week_Number', y='unserved_people', label='Unserved People')
plt.xlabel('Week of Operation')
plt.ylabel('Number of People')
plt.title('Number of people Unserved and Served vs Week Number')

plt.errorbar(x=site_meals_served['Relative_Week_Number'], y=site_meals_served['served_meals'],)
plt.errorbar(x=site_meals_served['Relative_Week_Number'], y=site_meals_served['unserved_people'],)

In [None]:
dataset.groupby('Relative_Week_Number')[['served_meals', 'unserved_people']].mean().sort_values(by='unserved_people', ascending=True)

In [None]:
dataset['unserved_people'].sort_values(ascending = False)

In [None]:
year2018 = dataset[dataset['year'] == 2020]
site_meals_served = year2018.groupby('Relative_Week_Number').mean()[['served_meals', 'unserved_people']].reset_index()
plt.figure(figsize=(8,8))
sns.lineplot(data=site_meals_served, x='Relative_Week_Number', y='served_meals', label='People Served')
sns.lineplot(data=site_meals_served, x='Relative_Week_Number', y='unserved_people', label='Unserved People')
plt.xlabel('Week of Operation')
plt.ylabel('Number of People')
plt.title('Number of people Unserved and Served vs Week Number')

plt.errorbar(x=site_meals_served['Relative_Week_Number'], y=site_meals_served['served_meals'],)
plt.errorbar(x=site_meals_served['Relative_Week_Number'], y=site_meals_served['unserved_people'],)

In [None]:
dataset.columns

**Insight 2**

In [None]:
site_meals_served = dataset.groupby('Relative_Week_Number').mean()['Percentage of Wasted'].reset_index()
plt.figure(figsize=(8,8))
sns.barplot(data=site_meals_served, x='Relative_Week_Number', y='Percentage of Wasted', label='Wasted')
plt.xlabel('Week of Operation')
plt.ylabel('Percentage of Wasted Meals')
plt.title('Percentage of Wasted Meals per Week Number')

served_meals_error = dataset.groupby('Relative_Week_Number')['served_meals'].std().reset_index()
unserved_people_error = dataset.groupby('Relative_Week_Number')['unserved_people'].std().reset_index()

**Insight 3**

In [None]:
site_meals_served = dataset.groupby('Relative_Week_Number').mean()[['ordered_meals']].reset_index()
plt.figure(figsize=(8,8))
sns.lineplot(data=site_meals_served, x='Relative_Week_Number', y='ordered_meals')
plt.xlabel('Week of Operation')
plt.ylabel('Number of Meals Ordered')
plt.title('Week Number vs Number of Meals Ordered')

**Insight 4**

In [None]:
site_meals_melted = pd.melt(dataset, id_vars = ['date', 'day_of_week', 'year', 'region'], value_vars = ['wasted_meals', 'served_meals', 'unserved_people'])

In [None]:
site_meals_melted[site_meals_melted['variable'] == 'unserved_people']

In [None]:
from matplotlib.pyplot import figure

In [None]:
day_name = {2:"Mondays", 3:"Tuesdays", 4:"Wednesdays", 5:"Thursdays", 6:"Fridays", 7:"Saturdays", 1:"Sundays"}
site_meals_melted['text_days']=site_meals_melted['day_of_week'].apply(lambda x: day_name[x])
site_meals_melted=site_meals_melted.sort_values('day_of_week', ascending=True)

plt.figure(figsize=(12,8), dpi=80)
sns.barplot(data = site_meals_melted, x = site_meals_melted['text_days'], y = site_meals_melted['value'], hue = site_meals_melted['variable'], errorbar = None)
plt.legend(labels = ['Wasted Meals', 'Unserved Meals', 'Served Meals'])
plt.xlabel('Weekday')
plt.ylabel('Number of Meals')
plt.title('Weekday vs Number of Meals')

In [None]:
dataset['unserved_people'].fillna(0, inplace=True)

In [None]:
dataset['meals_needed'] = dataset['unserved_people'] + dataset['served_meals']

In [None]:
df = dataset[dataset['year'] == 2016]

In [None]:
sns.boxplot(data=df['meals_needed'])

In [None]:
sns.boxplot(data=dataset['meals_needed'])

In [None]:
dataset['meals_needed']

In [None]:
## autocorrelation plot for meals needed
pd.plotting.autocorrelation_plot(dataset['meals_needed'])

In [None]:
meal_counts = dataset[(dataset['year'].isin([2020,2021]))]

In [None]:
meal_counts.head()

In [None]:
## Autocorrelation plot for 2020
pd.plotting.autocorrelation_plot(meal_counts['meals_needed'])

In [None]:
dataset.head()

In [None]:
plt.boxplot(dataset['wind_speed'])

In [None]:
dataset[dataset['wind_speed'] == dataset['wind_speed'].max()]

In [None]:
dataset.columns


In [None]:
dataset['meals_needed_shifted'] = dataset['meals_needed'].shift(2)
dataset['wasted_meals_shifted'] = dataset['wasted_meals'].shift(2)

In [None]:
regions_data = df.groupby(['week_serial_number']).nunique()['region'].reset_index(name='number_of_regions')
sites_data = df.groupby(['week_serial_number']).nunique()['site_name'].reset_index(name='number_of_sites')

In [None]:
df1 = dataset.groupby(['week_serial_number', 'region']).nunique()['day_of_week'].reset_index(name='number_of_days_operated')
df2 = df1.groupby('week_serial_number').sum()['number_of_days_operated']

In [None]:
datacorr = dataset[['meals_needed', 'Relative_Week_Number', 'temperature', 'precipitation', 'day_of_week', 'wind_speed', 'meals_needed_shifted', 'week_serial_number', 'wasted_meals_shifted', 'event']]

In [None]:
corr = datacorr.corr()
sns.heatmap(corr)

In [None]:
meal_counts['diffmeals'].fillna(0, inplace= True)

In [None]:
meal_counts

In [None]:
df1 = dataset.groupby(['week_serial_number', 'region']).nunique()['day_of_week'].reset_index(name='number_of_days_operated')
df2 = df1.groupby('week_serial_number').sum()['number_of_days_operated']

In [None]:
dataset['Relative_Week_Number'].unique()

In [None]:
validating.head()

In [None]:
## Graph to show process with data cleaning
fig = px.bar(validating, x='Steps', y='#Rows',
             hover_data=['Step'], color='Steps', color_discrete_sequence=['#009ACD'], labels={'Step':'Our data'}, height=400)
fig.update_layout(title='Our Process with Data Cleaning')
fig.update_xaxes(title_text='Stages')
fig.update_yaxes(title_text='Number of Rows')
fig.show()



In [None]:
import plotly.express as px

fig = px.bar(validating, x='Steps', y='#Rows',
             hover_data=['Step'], color_discrete_sequence=['#6922de'], labels={'Step':'Our data'}, height=400)
fig.update_layout(title='Our Process with Data Cleaning')
fig.update_xaxes(title_text='Stages')
fig.update_yaxes(title_text='Number of Rows')
fig.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
dataset['week_number'] = pd.to_numeric(dataset['week_number'], errors='coerce')
dataset['served_meals'] = pd.to_numeric(dataset['served_meals'], errors='coerce')
dataset = dataset.sort_values('week_number')
plt.bar(dataset['week_number'], dataset['served_meals'], label='Meals Served')
event_indices = np.where(dataset['event'] == 1)[0]
plt.plot(dataset['week_number'].iloc[event_indices], trendline[event_indices], 'ro', markersize=5)
plt.xlabel('Week')
plt.ylabel('Count')
plt.title('Meals Served with Events')
plt.legend()
plt.show()

In [None]:
site_meals_served = dataset.groupby('Relative_Week_Number')[['meals_needed', 'event', 'year']].mean().reset_index()
fig = px.bar(site_meals_served, x="Relative_Week_Number", y="meals_needed", color="Relative_Week_Number", barmode="group")

help_fig = px.scatter(site_meals_served, x="Relative_Week_Number", y="event", trendline="ols")
x_trend = help_fig["data"][1]['x']
y_trend = help_fig["data"][1]['y']

fig.add_trace(go.Line(x=x_trend, y=y_trend))
fig.update_layout(title='Number of Meals Needed with Events')
fig.update_xaxes(title_text='Week of Operation')
fig.update_yaxes(title_text='Meals Needed')
fig.update_traces(name='Event?')

fig.show(config={'displayModeBar': False})

In [None]:
import plotly.graph_objects as go
import plotly.express as px

site_meals_served = dataset.groupby(['Relative_Week_Number'])[['meals_needed', 'event']].mean().reset_index()
fig = px.bar(site_meals_served, x="Relative_Week_Number", y="meals_needed", color='Relative_Week_Number', barmode="group")

help_fig = px.scatter(site_meals_served, x="Relative_Week_Number", y="event", trendline="ols")
x_trend = help_fig["data"][1]['x']
y_trend = help_fig["data"][1]['y']

fig.show()

In [None]:
dataset.head()

In [None]:
import plotly.express as px

import plotly.express as px
fig = px.sunburst(dataset, path=['year', 'region', 'site_name'], values='wasted_meals')
fig.update_traces(textinfo='label+percent parent')

fig.update_layout(title='The Percent of Meals Wasted')

fig.show()

In [None]:
meals_dataset['meals_needed_2'] = meals_dataset['unserved_people'] + meals_dataset['served_meals'] + meals_dataset['wasted_meals']

In [None]:
meals_dataset.groupby('year')['served_meals'].sum()

In [None]:
yearly_ordered = meals_dataset.groupby('year')[['meals_needed_2', 'ordered_meals']].sum().reset_index()

In [None]:
dataset.columns

In [None]:
fig = px.line(yearly_ordered, x="year", y="meals_needed_2", title='Life expectancy in Canada')
fig.show()

In [None]:
meals_dataset.groupby('year')

In [None]:
meals_2020 = data_test[meals_dataset['year'] == 2020]

In [None]:
meals_2021 = meals_dataset[meals_dataset['year'] == 2021]

In [None]:
data_test.columns

In [None]:
import math

def divide_meals_by_5(counts):
  meal_numbers = ['Number of Meals Received / Prepared',
       'Meals available from previous day', 'Total number of first meals ',
       'Total number second meals', 'total program adult meals',
       'total non-program adult meals', 'total number of meals served',
       'Total damaged/incomplete/other non-reimbursable meals',
       'Total leftover meals',
       'Number of additional children requesting meals after all available meals were served:']

  for column in meal_numbers:
    counts[column] = counts[column].apply(lambda x: math.ceil(x // 5))

divide_meals_by_5(data_test)


In [None]:
data_test.head()

In [None]:
data_test = data_test[['Number of Meals Received / Prepared',
       'Meals available from previous day', 'Total number of first meals ',
       'Total number second meals', 'total program adult meals',
       'total non-program adult meals', 'total number of meals served',
       'Total damaged/incomplete/other non-reimbursable meals',
       'Total leftover meals',
       'Number of additional children requesting meals after all available meals were served:']]

In [None]:
meals_dataset.head()

In [None]:
df3 = pd.concat([meals_dataset, data_test], ignore_index=True)

In [None]:
meals_dataset.columns

In [None]:
meals_dataset['available_meals'] = meals_dataset['Number of Meals Received / Prepared'] + meals_dataset['Meals available from previous day']

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

yearly_meals = meals_dataset.groupby('Year').sum()[['total number of meals served']].reset_index()
# Set the figure size and background color
plt.figure(figsize=(10, 6))
plt.style.use('seaborn-whitegrid')
# Define a color palette
colors = ['#6922de', '#ad98cf']
# Plot the line for ordered meals with data labels
plt.plot(yearly_meals['Year'], yearly_meals['total number of meals served'], marker='o', markersize=8, label='Served Meals', color=colors[0], linewidth=2)
# Fill the area between the lines with a gradient color
plt.fill_between(yearly_meals['Year'], yearly_meals['total number of meals served'], alpha=0.3, color=colors[1])
# Add a reference line at zero
plt.axhline(0, color='gray', linestyle='--')
# Set the chart title and labels with increased font sizes
plt.title('Served Meals Over Years', fontsize=18)
plt.xlabel('Year', fontsize=12)
plt.ylabel('Number of Meals', fontsize=12)
# Display a legend with proper font size
plt.legend(fontsize=12)
# Rotate the x-axis tick labels for better readability
plt.xticks(rotation=45, fontsize=10)
# Add data labels to each point on the lines with custom positioning and formatting
# Adjust the layout to avoid cutting off labels or tick marks
plt.tight_layout()
# Show the plot
plt.figure(figsize=(1, 45), dpi=200)
plt.show()

In [None]:
meals_dataset.columns

In [None]:
yearly_meals.head()

In [None]:
dataset[dataset['year'] == 2020]

In [None]:
yearly_dataset = dataset.groupby('year')

In [None]:
meals_dataset.columns

In [None]:
dataset.groupby('year')['ordered_meals'].sum()

In [None]:
dataset.groupby('year')['served_meals'].sum()

In [None]:
site_meals_served = (dataset.groupby('year')['ordered_meals'].sum() - dataset.groupby('year')['served_meals'].sum()) / dataset.groupby('year')['ordered_meals'].sum() * 100

In [None]:
site_meals_served.plot(kind='bar', color='#6922de')
plt.title('Percentage of meals wasted per year')
plt.xlabel('Year')
plt.ylabel('Percentage')
plt.show()

In [None]:
site_meals_served = dataset.groupby('year')['Percentage of Wasted'].mean().reset_index()
plt.figure(figsize=(8,8))
sns.barplot(data=site_meals_served, x='year', y='Percentage of Wasted', label='Wasted', color='#6922de')
plt.xlabel('Year')
plt.ylabel('Percentage of Wasted Meals')
plt.title('Percentage of Wasted Meals per Year')