In [37]:
# Combine and clean data from Interbay Giving Garden
import datetime as dt
from dateutil.parser import parse
import numpy as np
from os import listdir
import pandas as pd

In [39]:
dpath = r'./Clean Data/'
files = listdir('Clean Data')

all_data = pd.DataFrame()

for fname in files :
    fpath = dpath + fname
    try :
        if 'individual' in fname :
            df = pd.read_csv(fpath)
            all_data = pd.concat([all_data, df])
        elif 'giving' in fname :
            df = pd.read_csv(fpath)
            df.insert(2, 'gardener', 'Giving Garden', allow_duplicates = False)
            all_data = pd.concat([all_data, df])
        else :
            print('Skipping', fname)
            break
        print('Loaded', fname)
    except :
        print('Error loading', fname)
        break

all_data.reset_index(drop = True, inplace = True)
all_data['date'] = [dt.date.isoformat(parse(d)) for d in all_data['date']]
all_data['produce'] = [p.lower() for p in all_data['produce']]
all_data['week'] = [dt.date.isocalendar(parse(d))[1] for d in all_data['date']]
all_data['year'] = [dt.date.isocalendar(parse(d))[0] for d in all_data['date']]

Loaded giving_garden_2020.csv
Loaded giving_garden_2013.csv
Loaded giving_garden_2010.csv
Loaded giving_garden_2017.csv
Loaded giving_garden_2012.csv
Loaded individuals_2012.csv
Loaded giving_garden_2018.csv
Loaded individuals_2011.csv
Loaded individuals_2020.csv
Loaded individuals_2013.csv
Loaded giving_garden_2011.csv
Loaded giving_garden_2014.csv
Loaded individuals_2014.csv
Loaded individuals_2010.csv
Loaded giving_garden_2009.csv


In [45]:
# Get list of unique produce names, export to temporary file for offline handling
# Manually cleaned list to create first mapping table
p = sorted(all_data['produce'].unique())
np.savetxt('produce_list.csv', p, delimiter = ', ', fmt ='% s')

In [46]:
# Pull in mapping table created in cell above
mapping = pd.read_csv('produce_mapping.csv', delimiter = ',')

In [47]:
# Create combined produce mapping table
produce = pd.DataFrame(data = {'produce' : p})
produce = produce.join(mapping)
produce.head()

Unnamed: 0,produce,prod_name,generic_name,prod_group
0,apples,apples,apples,fruit
1,"apples, green",green apples,apples,fruit
2,armenian cucumbers,armenian cucumbers,cucumbers,cucumbers
3,artichokes,artichokes,artichokes,artichokes
4,arugula,arugula,arugula,herbs


In [62]:
complete = pd.merge(left = all_data, 
                  right = produce, 
                  on = 'produce')
complete.to_csv('Giving Garden Combined.csv')

In [48]:
rollup = pd.merge(left = all_data[['produce', 'year', 'weight']], 
                  right = produce, 
                  on = 'produce')

Unnamed: 0,produce,year,weight,prod_name,generic_name,prod_group
0,"kale, red russian",2020,9.0,red russian kale,kale,greens
1,"kale, red russian",2020,5.0,red russian kale,kale,greens
2,"kale, red russian",2020,3.25,red russian kale,kale,greens
3,"kale, red russian",2020,9.0,red russian kale,kale,greens
4,"kale, red russian",2020,6.3,red russian kale,kale,greens


In [54]:
rollup[['year', 'prod_group', 'weight']][rollup['year'] == 2020].groupby(['year', 'prod_group']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,weight
year,prod_group,Unnamed: 2_level_1
2020,beets,61.55
2020,broccoli,10.0
2020,carrots,4.0
2020,chard,141.3
2020,cucumbers,195.025
2020,fruit,93.0
2020,green beans,93.1
2020,green onions,1.75
2020,greens,262.35
2020,herbs,35.55


In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.plot(week_totals)