In [1]:
# import statements
import pandas as pd

# settings
pd.set_option('display.max_rows', None)

In [2]:
# load data
breweries = pd.read_csv('Data/wa_breweries.csv')
prod22 = pd.read_csv('Data/Production/brewery_monthly_production_2022.csv')

In [3]:
prod22.head()

Unnamed: 0,BreweryInWAList,Brewery,4,8,12,2,1,7,6,3,5,11,10,9,total
0,192 brewing company,192 BREWING COMPANY,,0.33,,0.5,,,,,,,,,0.83
1,20 corners brewing company,20 CORNERS BREWING,72.33,209.93,106.5,104.64,,,,147.98,,75.86,,132.3,849.54
2,23rd ave brewery,23RD AVE BREWERY,,,,,,5.0,4.78,,2.0,,14.0,1.0,26.78
3,4 stitch brewing company,4 STITCH BREWING CO.,,3.4,,2.0,,1.46,,1.25,4.72,,,,12.83
4,45 degree brewhouse,45 DEGREE BREWHOUSE,,,31.13,,,,,,,,,7.09,38.22


In [4]:
prod22.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 388 entries, 0 to 387
Data columns (total 15 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   BreweryInWAList  388 non-null    object 
 1   Brewery          388 non-null    object 
 2   4                292 non-null    float64
 3   8                299 non-null    float64
 4   12               303 non-null    float64
 5   2                311 non-null    float64
 6   1                288 non-null    float64
 7   7                317 non-null    float64
 8   6                300 non-null    float64
 9   3                299 non-null    float64
 10  5                297 non-null    float64
 11  11               308 non-null    float64
 12  10               293 non-null    float64
 13  9                313 non-null    float64
 14  total            388 non-null    float64
dtypes: float64(13), object(2)
memory usage: 45.6+ KB


In [5]:
breweries.head()

Unnamed: 0,brewery,satellite_location,flagship,brewery_type,address,city,zipcode,county,latitude,longitude,year_established,guild_member,production_data_2017,production_data_2022,closed_since_2022,untappd_profile_link
0,192 Brewing Company,Mount Vernon,0,taproom,1405 South 2nd Street,Mount Vernon,98273,Skagit,48.411774,-122.33797,2010,1,1,1,1,https://untappd.com/192BrewingCo
1,192 Brewing Company,,1,brewpub,7324 NE 175th Street Ste F,Kenmore,98028,King,47.756677,-122.242502,2010,1,1,1,0,https://untappd.com/192BrewingCo
2,20 Corners Brewing Company,,1,brewpub,14148 NE 190th St - Suite A,Woodinville,98072,King,47.765067,-122.151015,2016,0,1,1,0,https://untappd.com/20CornersBrewingCompany
3,210 Brewing Company,,1,taproom,3438 Stoluckquamish Lane,Arlington,98223,Snohomish,48.213532,-122.184854,2015,1,1,0,0,https://untappd.com/210Brewing
4,23rd Ave Brewery,,1,micro,2313 S Jackson St,Seattle,98144,King,47.59927,-122.3018,2018,0,0,1,0,https://untappd.com/23rdAveBrewery


In [6]:
# clean data

# rename production columns
prod22 = prod22.rename(columns={
    'BreweryInWAList':'brewery_wa',
    'Brewery':'brewery_prod',
    '1':'January',    '2':'February', '3':'March',     '4':'April',
    '5':'May',        '6':'June',     '7':'July',      '8':'August',
    '9':'September', '10':'October', '11':'November', '12':'December',
    'total':'total_annual'})

# reorder production columns
prod22 = prod22[['brewery_wa','brewery_prod','January','February','March','April','May','June',
         'July','August','September','October','November','December','total_annual']]

# create brewery mapping field
breweries['brewery_map'] = breweries['brewery'].str.lower()

# get primary brewery locations
breweries_primary = breweries[breweries['flagship'] == 1]

In [7]:
prod22.head()

Unnamed: 0,brewery_wa,brewery_prod,January,February,March,April,May,June,July,August,September,October,November,December,total_annual
0,192 brewing company,192 BREWING COMPANY,,0.5,,,,,,0.33,,,,,0.83
1,20 corners brewing company,20 CORNERS BREWING,,104.64,147.98,72.33,,,,209.93,132.3,,75.86,106.5,849.54
2,23rd ave brewery,23RD AVE BREWERY,,,,,2.0,4.78,5.0,,1.0,14.0,,,26.78
3,4 stitch brewing company,4 STITCH BREWING CO.,,2.0,1.25,,4.72,,1.46,3.4,,,,,12.83
4,45 degree brewhouse,45 DEGREE BREWHOUSE,,,,,,,,,7.09,,,31.13,38.22


In [8]:
prod22.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 388 entries, 0 to 387
Data columns (total 15 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   brewery_wa    388 non-null    object 
 1   brewery_prod  388 non-null    object 
 2   January       288 non-null    float64
 3   February      311 non-null    float64
 4   March         299 non-null    float64
 5   April         292 non-null    float64
 6   May           297 non-null    float64
 7   June          300 non-null    float64
 8   July          317 non-null    float64
 9   August        299 non-null    float64
 10  September     313 non-null    float64
 11  October       293 non-null    float64
 12  November      308 non-null    float64
 13  December      303 non-null    float64
 14  total_annual  388 non-null    float64
dtypes: float64(13), object(2)
memory usage: 45.6+ KB


In [9]:
# melt production data
months = ['January','February','March','April','May','June','July',
        'August','September','October','November','December','total_annual']

prod22_melt = prod22.melt(id_vars=['brewery_wa','brewery_prod'], 
              value_vars=months,
              var_name='month',value_name='production')

# create custom sort
prod22_melt['month'] = pd.Categorical(prod22_melt['month'], months)
prod22_melt = prod22_melt.sort_values(by=['brewery_wa','month']).reset_index()

prod22_melt.head()

Unnamed: 0,index,brewery_wa,brewery_prod,month,production
0,0,192 brewing company,192 BREWING COMPANY,January,
1,388,192 brewing company,192 BREWING COMPANY,February,0.5
2,776,192 brewing company,192 BREWING COMPANY,March,
3,1164,192 brewing company,192 BREWING COMPANY,April,
4,1552,192 brewing company,192 BREWING COMPANY,May,


In [10]:
# merge dataframes
cols1 = ['brewery','brewery_type','address','city','zipcode',
        'county','latitude','longitude','year_established','guild_member',
        'closed_since_2022','untappd_profile_link',
        'January','February','March','April','May', 'June','July','August',
        'September','October','November','December','total_annual']
brewery_production_2022 = breweries_primary.merge(prod22, left_on='brewery_map', right_on='brewery_wa')
brewery_production_2022 = brewery_production_2022[cols1]

cols2 = ['brewery','brewery_type','address','city','zipcode',
        'county','latitude','longitude','year_established','guild_member',
        'closed_since_2022','untappd_profile_link',
        'month','production']
brewery_production_2022_melted = breweries_primary.merge(prod22_melt, left_on='brewery_map', right_on='brewery_wa')
brewery_production_2022_melted = brewery_production_2022_melted[cols2]

In [11]:
brewery_production_2022.head()

Unnamed: 0,brewery,brewery_type,address,city,zipcode,county,latitude,longitude,year_established,guild_member,...,April,May,June,July,August,September,October,November,December,total_annual
0,192 Brewing Company,brewpub,7324 NE 175th Street Ste F,Kenmore,98028,King,47.756677,-122.242502,2010,1,...,,,,,0.33,,,,,0.83
1,20 Corners Brewing Company,brewpub,14148 NE 190th St - Suite A,Woodinville,98072,King,47.765067,-122.151015,2016,0,...,72.33,,,,209.93,132.3,,75.86,106.5,849.54
2,23rd Ave Brewery,micro,2313 S Jackson St,Seattle,98144,King,47.59927,-122.3018,2018,0,...,,2.0,4.78,5.0,,1.0,14.0,,,26.78
3,4 Stitch Brewing Company,brewpub,16709 9th Ave SE,Mill Creek,98012,Snohomish,47.846174,-122.220188,2023,1,...,,4.72,,1.46,3.4,,,,,12.83
4,5 North Brewing Company,taproom,6501 N Cedar Rd,Spokane,99208,Spokane,47.717587,-117.43285,2020,0,...,28.27,24.65,22.47,25.81,23.14,18.49,13.17,16.04,14.83,267.32


In [12]:
brewery_production_2022_melted.head()

Unnamed: 0,brewery,brewery_type,address,city,zipcode,county,latitude,longitude,year_established,guild_member,closed_since_2022,untappd_profile_link,month,production
0,192 Brewing Company,brewpub,7324 NE 175th Street Ste F,Kenmore,98028,King,47.756677,-122.242502,2010,1,0,https://untappd.com/192BrewingCo,January,
1,192 Brewing Company,brewpub,7324 NE 175th Street Ste F,Kenmore,98028,King,47.756677,-122.242502,2010,1,0,https://untappd.com/192BrewingCo,February,0.5
2,192 Brewing Company,brewpub,7324 NE 175th Street Ste F,Kenmore,98028,King,47.756677,-122.242502,2010,1,0,https://untappd.com/192BrewingCo,March,
3,192 Brewing Company,brewpub,7324 NE 175th Street Ste F,Kenmore,98028,King,47.756677,-122.242502,2010,1,0,https://untappd.com/192BrewingCo,April,
4,192 Brewing Company,brewpub,7324 NE 175th Street Ste F,Kenmore,98028,King,47.756677,-122.242502,2010,1,0,https://untappd.com/192BrewingCo,May,


In [13]:
# save data
brewery_production_2022.to_csv('Data/brewery_production_2022.csv', index=False)
brewery_production_2022_melted.to_csv('Data/brewery_production_2022_melted.csv', index=False)

In [14]:
# count months with nulls
brewery_production_2022_melted[['brewery','month','production']].groupby(['brewery'])['production'].count()-1

brewery
192 Brewing Company                        2
20 Corners Brewing Company                 7
23rd Ave Brewery                           5
4 Stitch Brewing Company                   5
45 Degree Brewhouse                        2
5 North Brewing Company                   12
5 Rights Brewing Company                   9
54°40' Brewing Company                     8
5th Line Brewing Company                  12
7 Seas Brewing Company                    12
Acorn Brewing                             10
Airways Brewing Company                   12
Ale Spike Camano Island Brewing           12
Anacortes Brewery                         12
Ancient Lakes Brewing Company              9
Another Round Brewing Company             10
Ashtown Brewing Company                   10
Aslan Brewing Company                     12
At Large Brewing                          12
Atomic Ale Brewpub and Eatery              9
Atwood Farm Brewery                        9
Audacity Brewing                          12
Ba

In [18]:
# get months with nulls
brewery_production_2022_melted[['brewery','month','production']].groupby(['month'])['production'].count()

month
January         288
February        311
March           299
April           292
May             297
June            300
July            317
August          299
September       313
October         293
November        308
December        303
total_annual    388
Name: production, dtype: int64