In [23]:
import pandas as pd
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
import os
import pickle
import io
from googleapiclient.http import MediaIoBaseDownload

In [33]:
# Scopes for the Google Drive API
SCOPES = ['https://www.googleapis.com/auth/drive']

def authenticate_google_drive():
    creds = None
    if os.path.exists('token.pickle'):
        with open('token.pickle', 'rb') as token:
            creds = pickle.load(token)
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                'path/to/your/credentials.json', SCOPES)
            creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
        with open('token.pickle', 'wb') as token:
            pickle.dump(creds, token)

    service = build('drive', 'v3', credentials=creds)
    return service

drive_service = authenticate_google_drive()

def download_file(file_id, file_name):
    request = drive_service.files().get_media(fileId=file_id)
    fh = io.FileIO(file_name, 'wb')
    downloader = MediaIoBaseDownload(fh, request)
    done = False
    while done is False:
        status, done = downloader.next_chunk()
        print(f"Download {int(status.progress() * 100)}%.")

In [34]:
# dataset from beer advocate
# downloaded from https://www.kaggle.com/datasets/thedevastator/1-5-million-beer-reviews-from-beer-advocate
file_id = '1039e61fo7FSjobH-u9B-xOT62Abw4GLF'
file_name = 'ba_reviews.csv'
download_file(file_id, file_name)

df_ba = pd.read_csv(file_name)

Download 52%.
Download 100%.


In [35]:
df_ba.head(10)

Unnamed: 0,index,brewery_id,brewery_name,review_time,review_overall,review_aroma,review_appearance,review_profilename,beer_style,review_palate,review_taste,beer_name,beer_abv,beer_beerid
0,0,10325,Vecchio Birraio,1234817823,1.5,2.0,2.5,stcules,Hefeweizen,1.5,1.5,Sausa Weizen,5.0,47986
1,1,10325,Vecchio Birraio,1235915097,3.0,2.5,3.0,stcules,English Strong Ale,3.0,3.0,Red Moon,6.2,48213
2,2,10325,Vecchio Birraio,1235916604,3.0,2.5,3.0,stcules,Foreign / Export Stout,3.0,3.0,Black Horse Black Beer,6.5,48215
3,3,10325,Vecchio Birraio,1234725145,3.0,3.0,3.5,stcules,German Pilsener,2.5,3.0,Sausa Pils,5.0,47969
4,4,1075,Caldera Brewing Company,1293735206,4.0,4.5,4.0,johnmichaelsen,American Double / Imperial IPA,4.0,4.5,Cauldron DIPA,7.7,64883
5,5,1075,Caldera Brewing Company,1325524659,3.0,3.5,3.5,oline73,Herbed / Spiced Beer,3.0,3.5,Caldera Ginger Beer,4.7,52159
6,6,1075,Caldera Brewing Company,1318991115,3.5,3.5,3.5,Reidrover,Herbed / Spiced Beer,4.0,4.0,Caldera Ginger Beer,4.7,52159
7,7,1075,Caldera Brewing Company,1306276018,3.0,2.5,3.5,alpinebryant,Herbed / Spiced Beer,2.0,3.5,Caldera Ginger Beer,4.7,52159
8,8,1075,Caldera Brewing Company,1290454503,4.0,3.0,3.5,LordAdmNelson,Herbed / Spiced Beer,3.5,4.0,Caldera Ginger Beer,4.7,52159
9,9,1075,Caldera Brewing Company,1285632924,4.5,3.5,5.0,augustgarage,Herbed / Spiced Beer,4.0,4.0,Caldera Ginger Beer,4.7,52159


In [21]:
# dataset from openbrewerydb
# downloaded from https://github.com/openbrewerydb/openbrewerydb/blob/master/breweries.csv
file_id = '1lWrSxoD7nBXTiCvQ8wjaolUuY8v4_D37'
file_name = 'breweries.csv'
download_file(file_id, file_name)

df_brewery = pd.read_csv(file_name)

Download 100%.
                                     id                     name brewery_type  \
0  5128df48-79fc-4f0f-8b52-d06be54d0cec         (405) Brewing Co        micro   
1  9c5a66c8-cc13-416f-a5d9-0a769c87d318         (512) Brewing Co        micro   
2  34e8c68b-6146-453f-a4b9-1f6cd99a5ada  1 of Us Brewing Company        micro   
3  ef970757-fe42-416f-931d-722451f1f59c     10 Barrel Brewing Co        large   
4  6d14b220-8926-4521-8d19-b98a2d6ec3db     10 Barrel Brewing Co        large   

               address_1 address_2 address_3            city state_province  \
0         1716 Topeka St       NaN       NaN          Norman       Oklahoma   
1  407 Radam Ln Ste F200       NaN       NaN          Austin          Texas   
2    8100 Washington Ave       NaN       NaN  Mount Pleasant      Wisconsin   
3              1501 E St       NaN       NaN       San Diego     California   
4          62970 18th St       NaN       NaN            Bend         Oregon   

  postal_code        co

In [36]:
df_brewery.head(10)

Unnamed: 0,id,name,brewery_type,address_1,address_2,address_3,city,state_province,postal_code,country,phone,website_url,longitude,latitude
0,5128df48-79fc-4f0f-8b52-d06be54d0cec,(405) Brewing Co,micro,1716 Topeka St,,,Norman,Oklahoma,73069-8224,United States,4058160490,http://www.405brewing.com,-97.468182,35.257389
1,9c5a66c8-cc13-416f-a5d9-0a769c87d318,(512) Brewing Co,micro,407 Radam Ln Ste F200,,,Austin,Texas,78745-1197,United States,5129211545,http://www.512brewing.com,,
2,34e8c68b-6146-453f-a4b9-1f6cd99a5ada,1 of Us Brewing Company,micro,8100 Washington Ave,,,Mount Pleasant,Wisconsin,53406-3920,United States,2624847553,https://www.1ofusbrewing.com,-87.883364,42.720108
3,ef970757-fe42-416f-931d-722451f1f59c,10 Barrel Brewing Co,large,1501 E St,,,San Diego,California,92101-6618,United States,6195782311,http://10barrel.com,-117.129593,32.714813
4,6d14b220-8926-4521-8d19-b98a2d6ec3db,10 Barrel Brewing Co,large,62970 18th St,,,Bend,Oregon,97701-9847,United States,5415851007,http://www.10barrel.com,-121.281706,44.086835
5,e2e78bd8-80ff-4a61-a65c-3bfbd9d76ce2,10 Barrel Brewing Co,large,1135 NW Galveston Ave Ste B,,,Bend,Oregon,97703-2465,United States,5415851007,,-121.328802,44.057565
6,e432899b-7f58-455f-9c7b-9a6e2130a1e0,10 Barrel Brewing Co,large,1411 NW Flanders St,,,Portland,Oregon,97209-2620,United States,5032241700,http://www.10barrel.com,-122.685506,45.525979
7,9f1852da-c312-42da-9a31-097bac81c4c0,10 Barrel Brewing Co - Bend Pub,large,62950 NE 18th St,,,Bend,Oregon,97701,United States,5415851007,,-121.280954,44.091211
8,ea4f30c0-bce6-416b-8904-fab4055a7362,10 Barrel Brewing Co - Boise,large,826 W Bannock St,,,Boise,Idaho,83702-5857,United States,2083445870,http://www.10barrel.com,-116.202929,43.618516
9,1988eb86-f0a2-4674-ba04-02454efa0d31,10 Barrel Brewing Co - Denver,large,2620 Walnut St,,,Denver,Colorado,80205-2231,United States,7205738992,,-104.985366,39.759251


In [30]:
# dataset combining beer advocate and beer profile data from kaggle
# downloaded from https://www.kaggle.com/datasets/ruthgn/beer-profile-and-ratings-data-set/data
file_id = '1CmgbvYGtgp0b7wZU8z8FT8tbRCJ6WkYT'
file_name = 'beer-profile-and-ratings.csv'
download_file(file_id, file_name)

df_profile = pd.read_csv(file_name)

Download 100%.


In [31]:
print(df_profile)

                                          Name          Style  \
0                                        Amber        Altbier   
1                                   Double Bag        Altbier   
2                               Long Trail Ale        Altbier   
3                                 Doppelsticke        Altbier   
4                 Sleigh'r Dark Doüble Alt Ale        Altbier   
...                                        ...            ...   
3192                           Winter Shredder  Winter Warmer   
3193                        The First Snow Ale  Winter Warmer   
3194                       Red Nose Winter Ale  Winter Warmer   
3195                      Fish Tale Winterfish  Winter Warmer   
3196  Frosted Frog Christmas Ale - Barrel-Aged  Winter Warmer   

                                               Brewery  \
0                                  Alaskan Brewing Co.   
1                               Long Trail Brewing Co.   
2                               Long Trail Br

In [None]:
# Chosen dataset is the beer advocate dataset as it is a comprehensive dataset of 
# 1.5 million samples from all over the world
# All the datasets can be found here in this drive folder 
# https://drive.google.com/drive/folders/14J0u4AhUwkfKJKYnJzuD6dRW2y9uZqnD?usp=sharing