In [1]:
# import statements
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns 

# settings
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [2]:
# load data
prod22 = pd.read_csv('Data/brewery_production_2022_melted.csv')

# remove total annual rows
prod22 = prod22[prod22['month'] != 'total_annual']
prod22.reset_index(inplace=True, drop=True)

In [3]:
# get breweries that opened in 2022
brew22 = prod22[prod22['year_established'] == 2022][['brewery']].drop_duplicates()['brewery'].tolist()
#brew22

# month of 2022 breweries
opening_month = {
    'Bizarre Brewing':10,
    'Byron Street Brewery':7,
    'Cursed Brewing':10,
    'Hildegard Ferments and Botanicals':10,
    'No Drought Brewing Company':1,
    'Peace of Mind Brewing':8,
    'Uprise Brewing Company':8,
    'Vice Beer':5,
    'Wild Oak Project':2,
    '45 Degree Brewhouse':6,
    'High Bluff Brewing':8,
    'Spokanite Brewing':6,
    'Top Knot Brewing':1,
    'Darach Brewing Company':12,
    'Common Language Brewing Company':4,
    'Griffin Brewing Pilot House':3,
    'New Maitland Brewing Llc':7
}

# input zeros for months prior to opening
months = {1:'January', 2:'February', 3:'March', 4:'April', 5:'May', 6:'June',
         7:'July', 8:'August', 9:'September', 10:'October', 11:'November', 12:'December'}

for k,v in opening_month.items():
    m = [value for key, value in months.items() if key < v]
    prod22['production'] = np.where((prod22['brewery'] == k) & (prod22['production'].isna()) & 
                                    (prod22['month'].isin(m)), 0, prod22['production'])

In [4]:
# check for consecutive nulls
prod22['is_null'] = prod22['production'].isnull().astype(int)
prod22['consec_nulls'] = prod22.groupby(['brewery', prod22['is_null'].eq(0).cumsum()])['is_null'].cumsum()
prod22['max_consec_nulls'] =  prod22.groupby(['brewery'])['consec_nulls'].transform(max)

In [5]:
# interpolate for breweries with 3 or fewer consecutive nulls
prod22['linear_interpolation'] = prod22.groupby('brewery')['production'].transform(
    lambda x: x.interpolate(method='linear', limit_direction='both')).reset_index()['production']

prod22['interpolated'] = np.where((prod22['max_consec_nulls'] <= 3) & (prod22['production'].isnull()), 1, 0)

prod22['production'] = np.round(
        np.where((prod22['max_consec_nulls'] <= 3) & (prod22['max_consec_nulls'] > 0),
            prod22['linear_interpolation'],
            prod22['production']), 2)

In [6]:
# calculate annual totals
cols = ['brewery','brewery_type','address', 'city','zipcode','county',
        'latitude','longitude','year_established','guild_member',
        'closed_since_2022','untappd_profile_link']

annuals1 = pd.DataFrame(prod22[prod22['max_consec_nulls'] <= 3].groupby(cols)['production'].sum().reset_index())

annuals2 = pd.DataFrame(prod22[prod22['max_consec_nulls'] > 3][cols]).drop_duplicates().reset_index(drop=True)
annuals2['production'] = np.NaN

annuals = pd.concat([annuals1, annuals2]).reset_index(drop=True)
annuals['month'] = 'total_annual'

# merge back into prod22
prod22 = pd.concat([prod22, annuals])

# set order
months = ['January','February','March','April','May','June','July',
        'August','September','October','November','December','total_annual']
prod22['month'] = pd.Categorical(prod22['month'], months)
prod22 = prod22.sort_values(by=['brewery', 'month']).reset_index(drop=True)

In [7]:
# clean up interpolated
prod22['interpolated'] = np.where(prod22['month'] == 'total_annual', 0, prod22['interpolated'])
prod22['interpolated'] = prod22['interpolated'].astype(int)
    
# drop unnecessary columns
cols_to_drop = ['is_null','consec_nulls','max_consec_nulls','linear_interpolation']
prod22.drop(columns=cols_to_drop, axis=1, inplace=True)

In [8]:
prod22.head()

Unnamed: 0,brewery,brewery_type,address,city,zipcode,county,latitude,longitude,year_established,guild_member,closed_since_2022,untappd_profile_link,month,production,interpolated
0,192 Brewing Company,brewpub,7324 NE 175th Street Ste F,Kenmore,98028,King,47.756677,-122.242502,2010,1,0,https://untappd.com/192BrewingCo,January,,0
1,192 Brewing Company,brewpub,7324 NE 175th Street Ste F,Kenmore,98028,King,47.756677,-122.242502,2010,1,0,https://untappd.com/192BrewingCo,February,0.5,0
2,192 Brewing Company,brewpub,7324 NE 175th Street Ste F,Kenmore,98028,King,47.756677,-122.242502,2010,1,0,https://untappd.com/192BrewingCo,March,,0
3,192 Brewing Company,brewpub,7324 NE 175th Street Ste F,Kenmore,98028,King,47.756677,-122.242502,2010,1,0,https://untappd.com/192BrewingCo,April,,0
4,192 Brewing Company,brewpub,7324 NE 175th Street Ste F,Kenmore,98028,King,47.756677,-122.242502,2010,1,0,https://untappd.com/192BrewingCo,May,,0


In [9]:
# save data
prod22.to_csv('Data/brewery_production_2022_melted_interpolated.csv', index=False)

In [10]:
# make example viz for report and presentation