### Exploratory data analysis
* ingest data & format dates
* quick pivot table of label language types
* visualize z-scaled timedeltas across language types

In [None]:
# package and data import
import numpy as np
import pandas as pd
from datetime import datetime
 
df = pd.read_csv('/kaggle/input/brooklyn-food-waste/brooklyn.csv',index_col=0)

In [None]:
# format dates to calculate time delta
df[['date_collected','label_date']] = df[['date_collected','label_date']]\
   .apply(pd.to_datetime, errors = 'coerce', format = '%Y-%m-%d')

In [None]:
# calculate time delta between date on label and date item was trashed
# if positive, the item was trashed before the label date indicated
df['label_collect_difference'] = df['label_date'] - df['date_collected']
df.label_collect_difference.fillna(pd.Timedelta('0 days'), inplace=True)
df['label_collect_int'] = df.label_collect_difference/pd.Timedelta(days=1)
df.head()

In [None]:
# pivot table of mean time delta based on label language
labels = df.groupby('label_language').label_collect_difference\
.agg(counts = 'count', mean_time_diff = lambda group: group.sort_values()\
.diff().mean())
labels.mean_time_diff.fillna(pd.Timedelta('0 days'), inplace=True)
labels

In [None]:
# prep z-scaled time delta data for visualization
labels.mean_time_diff.fillna(pd.Timedelta('0 days'), inplace=True)
labels['mean_time_delta'] = labels.mean_time_diff/pd.Timedelta(days=1)
labels['mean_time_delta'] = (labels['mean_time_delta'] - labels['mean_time_delta'].mean()) / labels['mean_time_delta'].std()
labels.head()

In [None]:
ax = labels.mean_time_delta.plot.bar(figsize=(10,7))
ax.set_ylabel('z-scaled mean timedelta (days)');