In [1]:
# Dependencies
import os
import pathlib
import requests
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as st


In [5]:
# URL for GET requests to retrieve brewery data
base_url = 'https://api.openbrewerydb.org/v1/breweries?'

country = 'United_States'

page = '1'

query_url = base_url + "by_country="+ country + "&page="+ page + "&per_page=200"

#https://api.openbrewerydb.org/v1/breweries?by_country=United_States&page=1&per_page=200

print(query_url)


https://api.openbrewerydb.org/v1/breweries?by_country=United_States&page=1&per_page=200


In [3]:
# Pretty print JSON for all launchpads

response = requests.get(query_url).json()
print(json.dumps(response, indent=4, sort_keys=True))

[
    {
        "address_1": "1716 Topeka St",
        "address_2": null,
        "address_3": null,
        "brewery_type": "micro",
        "city": "Norman",
        "country": "United States",
        "id": "5128df48-79fc-4f0f-8b52-d06be54d0cec",
        "latitude": "35.25738891",
        "longitude": "-97.46818222",
        "name": "(405) Brewing Co",
        "phone": "4058160490",
        "postal_code": "73069-8224",
        "state": "Oklahoma",
        "state_province": "Oklahoma",
        "street": "1716 Topeka St",
        "website_url": "http://www.405brewing.com"
    },
    {
        "address_1": "407 Radam Ln Ste F200",
        "address_2": null,
        "address_3": null,
        "brewery_type": "micro",
        "city": "Austin",
        "country": "United States",
        "id": "9c5a66c8-cc13-416f-a5d9-0a769c87d318",
        "latitude": null,
        "longitude": null,
        "name": "(512) Brewing Co",
        "phone": "5129211545",
        "postal_code": "78745-1197",
 

In [4]:
#create a dataframe from the json response
breweries_df = pd.DataFrame(response)
breweries_df.head(4)

Unnamed: 0,id,name,brewery_type,address_1,address_2,address_3,city,state_province,postal_code,country,longitude,latitude,phone,website_url,state,street
0,5128df48-79fc-4f0f-8b52-d06be54d0cec,(405) Brewing Co,micro,1716 Topeka St,,,Norman,Oklahoma,73069-8224,United States,-97.46818222,35.25738891,4058160490,http://www.405brewing.com,Oklahoma,1716 Topeka St
1,9c5a66c8-cc13-416f-a5d9-0a769c87d318,(512) Brewing Co,micro,407 Radam Ln Ste F200,,,Austin,Texas,78745-1197,United States,,,5129211545,http://www.512brewing.com,Texas,407 Radam Ln Ste F200
2,ef970757-fe42-416f-931d-722451f1f59c,10 Barrel Brewing Co,large,1501 E St,,,San Diego,California,92101-6618,United States,-117.129593,32.714813,6195782311,http://10barrel.com,California,1501 E St
3,6d14b220-8926-4521-8d19-b98a2d6ec3db,10 Barrel Brewing Co,large,62970 18th St,,,Bend,Oregon,97701-9847,United States,-121.281706,44.08683531,5415851007,http://www.10barrel.com,Oregon,62970 18th St


In [5]:
#creates a summary table of the data
breweries_df.describe()


Unnamed: 0,id,name,brewery_type,address_1,address_2,address_3,city,state_province,postal_code,country,longitude,latitude,phone,website_url,state,street
count,50,50,50,47,1,0.0,50,50,50,50,40.0,40.0,45,39,50,47
unique,50,46,6,47,1,0.0,46,27,50,2,40.0,40.0,43,37,27,47
top,5128df48-79fc-4f0f-8b52-d06be54d0cec,10 Barrel Brewing Co,micro,1716 Topeka St,Clonmore,,Bend,Oregon,73069-8224,United States,-97.46818222,35.25738891,5415851007,http://www.10barrel.com,Oregon,1716 Topeka St
freq,1,4,30,1,1,,3,6,1,49,1.0,1.0,3,3,6,1


In [6]:
#Prepare the breweries Database for analysis 
#drop brevery id, address_2, address_3, country, phone, website_url, and street
clean_breweries_df = breweries_df.drop(columns=['id', 'address_2', 'address_3', 'country', 'phone', 'website_url', 'street'])
clean_breweries_df.head(7933)

Unnamed: 0,name,brewery_type,address_1,city,state_province,postal_code,longitude,latitude,state
0,(405) Brewing Co,micro,1716 Topeka St,Norman,Oklahoma,73069-8224,-97.46818222,35.25738891,Oklahoma
1,(512) Brewing Co,micro,407 Radam Ln Ste F200,Austin,Texas,78745-1197,,,Texas
2,10 Barrel Brewing Co,large,1501 E St,San Diego,California,92101-6618,-117.129593,32.714813,California
3,10 Barrel Brewing Co,large,62970 18th St,Bend,Oregon,97701-9847,-121.281706,44.08683531,Oregon
4,10 Barrel Brewing Co,large,1135 NW Galveston Ave Ste B,Bend,Oregon,97703-2465,-121.3288021,44.0575649,Oregon
5,10 Barrel Brewing Co,large,1411 NW Flanders St,Portland,Oregon,97209-2620,-122.6855056,45.5259786,Oregon
6,10 Barrel Brewing Co - Bend Pub,large,62950 NE 18th St,Bend,Oregon,97701,-121.2809536,44.0912109,Oregon
7,10 Barrel Brewing Co - Boise,large,826 W Bannock St,Boise,Idaho,83702-5857,-116.202929,43.618516,Idaho
8,10 Barrel Brewing Co - Denver,large,2620 Walnut St,Denver,Colorado,80205-2231,-104.9853655,39.7592508,Colorado
9,10 Torr Distilling and Brewing,micro,490 Mill St,Reno,Nevada,89502,-119.7732015,39.5171702,Nevada


In [7]:
#count missing values for each column
clean_breweries_df.isnull()
#count missing values for each column 
clean_breweries_df.isnull().sum()

# TEAM: We need to decide how to handle the missing values.  Do we drop the rows?

name               0
brewery_type       0
address_1          3
city               0
state_province     0
postal_code        0
longitude         10
latitude          10
state              0
dtype: int64

In [8]:
#load dataframe into csv
clean_breweries_df.to_csv('breweries.csv', index=False)


In [9]:
#show count by the length of the postal code to see if there are any invalid postal codes
clean_breweries_df['postal_code'].str.len().value_counts()



10    37
5     12
8      1
Name: postal_code, dtype: int64