In [1]:
# Import Statements
import pandas as pd
import numpy as np
import difflib

### Brewery Data

In [2]:
# Load Data
breweries = pd.read_csv('Data/wa_breweries.csv')

In [3]:
# View Data
breweries.head(5)

Unnamed: 0,brewery,satellite_location,brewery_type,address,city,zipcode,county,latitude,longitude,year_established,guild_member,production_data_2017,production_data_2022,closed_since_2022,untappd_profile_link
0,192 Brewing Company,Mount Vernon,taproom,1405 South 2nd Street,Mount Vernon,98273,Skagit,48.41177416,-122.3379704,2010,1,1,1,1,https://untappd.com/192BrewingCo
1,192 Brewing Company,,brewpub,7324 NE 175th Street Ste F,Kenmore,98028,King,47.7566771,-122.2425016,2010,1,1,1,0,https://untappd.com/192BrewingCo
2,20 Corners Brewing Company,,brewpub,14148 NE 190th St - Suite A,Woodinville,98072,King,47.76506654,-122.1510149,2016,0,1,1,0,https://untappd.com/20CornersBrewingCompany
3,210 Brewing Company,,taproom,3438 Stoluckquamish Lane,Arlington,98223,Snohomish,48.2135319,-122.1848541,2015,1,1,0,0,https://untappd.com/210Brewing
4,23rd Ave Brewery,,micro,2313 S Jackson St,Seattle,98144,King,47.59927,-122.3018,2018,0,0,1,0,https://untappd.com/23rdAveBrewery


In [4]:
# Data Info
breweries.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 453 entries, 0 to 452
Data columns (total 15 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   brewery               453 non-null    object
 1   satellite_location    65 non-null     object
 2   brewery_type          453 non-null    object
 3   address               453 non-null    object
 4   city                  453 non-null    object
 5   zipcode               453 non-null    int64 
 6   county                453 non-null    object
 7   latitude              453 non-null    object
 8   longitude             453 non-null    object
 9   year_established      453 non-null    int64 
 10  guild_member          453 non-null    int64 
 11  production_data_2017  453 non-null    int64 
 12  production_data_2022  453 non-null    int64 
 13  closed_since_2022     453 non-null    int64 
 14  untappd_profile_link  453 non-null    object
dtypes: int64(6), object(9)
memory usage: 53.

### Production Data

In [5]:
# Method for cleaning production data
def clean_prod_df(df):
    
    # rename columns
    df = df.rename(columns={'Brewery':'brewery',
        '1':'January',    '2':'February', '3':'March',     '4':'April',
        '5':'May',        '6':'June',     '7':'July',      '8':'August',
        '9':'September', '10':'October', '11':'November', '12':'December'})
    
    # reorder columns
    df = df[['brewery','January','February','March','April','May','June',
             'July','August','September','October','November','December']]
    
    # remove certain breweries
    brews_to_remove = ['ANHEUSER-BUSCH','MILLERCOORS','MARK ANTHONY BRANDS','PABST BREWING',
                       'THE BOSTON BEER COMPANY','NEW BELGIUM','SIERRA NEVADA BREWING']
    #df = df[~df['brewery'].isin(brews_to_remove)]
    
    return df

In [6]:
# Method for consistent brewery names
def update_names(df):
    
    # remove periods
    df['brewery'] = df['brewery'].str.replace('.','')
    
    # replace end 'co' with 'company'
    df['brewery'] = np.where(df['brewery'].str[-2:] == 'CO',
                             df['brewery'].str[:-2] + 'COMPANY',
                             df['brewery'])

    # replace '&'' and 'and'
    df['brewery'] = df['brewery'].str.replace('&','AND')
    
    return df

In [7]:
# Method for getting summed annual production
def get_annual(df):
    df['annual_production'] = df.iloc[:,1:].sum(axis=1)
    return df

#### 2022 Production Data

In [8]:
# Load Data
production_2022 = pd.read_csv('Data/Production/brewery_monthly_production_2022.csv')

In [9]:
# Clean Data
production_2022 = clean_prod_df(production_2022)
production_2022 = update_names(production_2022)

In [10]:
# Sum Months
production_2022 = get_annual(production_2022)

In [11]:
# View Data
production_2022.head(5)

Unnamed: 0,brewery,January,February,March,April,May,June,July,August,September,October,November,December,annual_production
0,ANHEUSER-BUSCH,82718.39,86605.2,98521.98,84711.58,99589.33,92468.25,109179.9,121777.66,92403.54,82934.26,70992.27,69647.65,1091550.01
1,MILLERCOORS,72612.34,62743.02,77260.51,81442.02,93336.98,86396.11,84828.92,92874.04,88021.5,64324.32,73882.35,52628.0,930350.11
2,MARK ANTHONY BRANDS,22059.64,24801.43,39612.35,29996.0,42147.28,35631.05,23886.75,24572.94,31030.51,33271.36,19960.46,9499.35,336469.12
3,PABST BREWING,14641.35,12214.09,12939.65,9765.34,31595.86,19194.7,18157.79,17224.35,18875.1,15312.95,12107.57,13345.76,195374.51
4,GEORGETOWN BREWING COMPANY,6409.97,6503.01,8538.23,8672.18,8855.93,9571.99,8722.62,10193.51,9227.2,8985.2,9315.17,7380.14,102375.15


In [12]:
# Data Info
production_2022.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 865 entries, 0 to 864
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   brewery            865 non-null    object 
 1   January            510 non-null    float64
 2   February           552 non-null    float64
 3   March              536 non-null    float64
 4   April              531 non-null    float64
 5   May                536 non-null    float64
 6   June               539 non-null    float64
 7   July               543 non-null    float64
 8   August             543 non-null    float64
 9   September          562 non-null    float64
 10  October            528 non-null    float64
 11  November           574 non-null    float64
 12  December           532 non-null    float64
 13  annual_production  865 non-null    float64
dtypes: float64(13), object(1)
memory usage: 94.7+ KB


### Name Match Table

In [13]:
# function for matching names
def match_names(prod_data, brewery_data):    

    # brewery names to test from
    names = []
    for n in brewery_data['brewery'].unique():
        n2 = n.lower().replace(' company','').replace(' brewery',' brewing')
        names.append((n,n2))

    # match to production data names
    prod_name_match = []
    for name in prod_data['brewery'].unique():
        name_test = name.lower().replace(' company','').replace(' inc','').replace(
            ' brewery',' brewing').replace(' -','').replace(' taproom','')
        matches = difflib.get_close_matches(name_test, [n[1] for n in names], cutoff=0.90)
        if len(matches) > 0:
            m = matches[0]
            best_match = [n[0] for n in names if n[1] == m][0]
        else:
            best_match = None
        prod_name_match.append((name, best_match))
        
    return pd.DataFrame(prod_name_match, columns =['brewery_production', 'brewery_name'])

In [14]:
# 2022 production matching
prod_name_match_22 = match_names(production_2022, breweries)

### Merge Data on Match Table

In [15]:
cols = ['brewery','brewery_name','January','February','March','April','May',
        'June','July','August','September','October','November','December','annual_production']

In [16]:
# 2022 data
breweries_2022 = production_2022.merge(prod_name_match_22, how='inner', 
                                       left_on='brewery', right_on='brewery_production')
breweries_2022 = breweries_2022[cols]

In [19]:
breweries_2022.sort_values(by='annual_production', ascending=False).head(50)

Unnamed: 0,brewery,brewery_name,January,February,March,April,May,June,July,August,September,October,November,December,annual_production
0,ANHEUSER-BUSCH,,82718.39,86605.2,98521.98,84711.58,99589.33,92468.25,109179.9,121777.66,92403.54,82934.26,70992.27,69647.65,1091550.01
1,MILLERCOORS,,72612.34,62743.02,77260.51,81442.02,93336.98,86396.11,84828.92,92874.04,88021.5,64324.32,73882.35,52628.0,930350.11
2,MARK ANTHONY BRANDS,,22059.64,24801.43,39612.35,29996.0,42147.28,35631.05,23886.75,24572.94,31030.51,33271.36,19960.46,9499.35,336469.12
3,PABST BREWING,,14641.35,12214.09,12939.65,9765.34,31595.86,19194.7,18157.79,17224.35,18875.1,15312.95,12107.57,13345.76,195374.51
4,GEORGETOWN BREWING COMPANY,Georgetown Brewing Company,6409.97,6503.01,8538.23,8672.18,8855.93,9571.99,8722.62,10193.51,9227.2,8985.2,9315.17,7380.14,102375.15
5,THE BOSTON BEER COMPANY,,3056.21,2724.61,3761.62,3357.87,4855.17,5803.01,7263.81,7718.43,6886.15,3802.92,2762.91,3090.52,55083.23
6,NEW BELGIUM,,3567.93,2112.34,3189.83,2980.1,2715.33,3725.36,2884.57,4440.04,4239.42,3538.82,3122.4,3416.08,39932.22
7,DIAGEO BEER COMPANY USA,,4204.22,3413.72,2931.3,2733.89,3776.29,3166.45,3529.43,3981.16,3967.1,2732.01,687.68,4138.71,39261.96
8,DESCHUTES BREWERY,,2784.95,2392.97,3029.71,3785.88,2991.46,1271.15,3561.34,4077.64,3464.07,3915.37,1763.42,2671.91,35709.87
9,HEINEKEN USA INCORPORATED,,6687.8,,8402.11,3097.78,,7593.14,6298.72,,,,,,32079.55


In [20]:
breweries_2022[breweries_2022['brewery_name'].isna()]['brewery'].unique()

array(['ANHEUSER-BUSCH', 'MILLERCOORS', 'MARK ANTHONY BRANDS',
       'PABST BREWING', 'THE BOSTON BEER COMPANY', 'NEW BELGIUM',
       'DIAGEO BEER COMPANY USA', 'DESCHUTES BREWERY',
       'HEINEKEN USA INCORPORATED', 'MPL BRANDS NV INC',
       'MCKENZIE RIVER BREWING', 'SIERRA NEVADA BREWING',
       'SCHOONER EXACT BREWING', 'PHUSION PROJECTS',
       'ALASKAN BREWING COMPANY', 'NINKASI BREWERY',
       'THE ODOM CORPORATION', 'PFRIEM FAMILY BREWERS',
       'THE LAGUNITAS BREWING', 'HIGH FALLS OPERATING COMPANY',
       'FORT GEORGE BREWERY', 'PELICAN PUB AND BREWERY', 'WORTHY BREWING',
       'FOUNDERS BREWING COMPANY', 'ODOM COA LICENSING LLC',
       'GORDON BIERSCH BREWING', 'BAYFRONT BREWERY',
       'STONE BREWING COMPANY', 'FULL SAIL BREWING COMPANY',
       'ECLIPTIC BREWING', 'PIKE BREWING COMPANY AND',
       'BREAKSIDE BREWERY', 'C STEIN', 'ASSOCIATED BREWING COMPANY',
       'FIRESTONE WALKER BREWING', 'MATCHLESS',
       'UNITED BRANDS COMPANY INC', 'BUOY BEER COMPAN

In [None]:
    brews_to_remove = ['ANHEUSER-BUSCH','MILLERCOORS','MARK ANTHONY BRANDS','PABST BREWING',
                       'THE BOSTON BEER COMPANY','NEW BELGIUM','SIERRA NEVADA BREWING','PHUSION PROJECTS',
                      'ALASKAN BREWING COMPANY','THE ODOM CORPORATION','HIGH FALLS OPERATING COMPANY']
    
# oregon breweries
#'NINKASI BREWERY','MCKENZIE RIVER BREWING','PFRIEM FAMILY BREWERS','FORT GEORGE BREWERY','PELICAN PUB AND BREWERY'
# 'WORTHY BREWING'
    
# ca
#'THE LAGUNITAS BREWING'

# mi
#'FOUNDERS BREWING COMPANY'
    
# CHECK ON
# , SCHOONER EXACT BREWING (san juan seltzer?)