# Import Dependencies

In [None]:
from config import api_key
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import datetime
import json

# Use API to get .json

In [None]:
endpoint = 'breweries'
page = 1
url = f"https://sandbox-api.brewerydb.com/v2/{endpoint}/?key={api_key}&p={page}&withLocations=Y&withSocialAccounts=Y"
brewery_data = requests.get(url).json()
#print(json.dumps(brewery_data, indent=4, sort_keys=True))

# Create DataFrame

- Initially, we pull just a few interesting columns for the dataframe, most importantly, the established dates and lat/lon coordinates for each brewery
- We will add distance columns later after doing some math
- Change the Established Date column to numeric in order to use in the scatter plot

In [None]:
brewery_dict = []

for result in range(0,19):
    try:         
        brewery_info = {
            'Brewery Name': brewery_data['data'][result]['name'],
            'Brewery ID': brewery_data['data'][result]['id'], 
            'Established Date': brewery_data['data'][result]['established'],           
            'Is in business?': brewery_data['data'][result]['isInBusiness'], 
            'Website': brewery_data['data'][result]['website'],
            'Country': brewery_data['data'][result]['locations'][0]['country']['isoCode'],
            'City':brewery_data['data'][result]['locations'][0]['locality'],
            'Latitude':brewery_data['data'][result]['locations'][0]['latitude'],
            'Longitude':brewery_data['data'][result]['locations'][0]['longitude'],
            'Primary Location':brewery_data['data'][result]['locations'][0]['isPrimary'],
            'Distance from Chicago (km)':'',
            'Distance from Pottsville (km)':''
                       }      
    except:
        print('id not found')
    brewery_dict.append(brewery_info)

In [None]:
brewery_df = pd.DataFrame(brewery_dict)
brewery_df['Established Date']=pd.to_numeric(brewery_df['Established Date'])
#brewery_df

# Determine Distances from Chicago

- use geopy to determine distances via lat/long data
- Chicago is one of the hot-spots for early American breweries, made possible by the German immigrant community
- Pottsville (Becky's hometown) is home to the oldest brewery in America - Yeungling!
- update the dataframe, clean it and export as a csv

In [None]:
#!pip install geopy

In [None]:
import geopy.distance

Chi_coords = (41.8781, -87.6298)
Pottsville_coords = (40.6856, -76.1955)

for x in range(0,19):
    Brewery_coords = (brewery_df['Latitude'][x], brewery_df['Longitude'][x])
    brewery_df['Distance from Chicago (km)'][x] = geopy.distance.distance(Chi_coords, Brewery_coords).km
    brewery_df['Distance from Pottsville (km)'][x] = geopy.distance.distance(Pottsville_coords, Brewery_coords).km

In [None]:
brewery_df = brewery_df.drop_duplicates(subset=['Brewery ID'], keep='first')

brewery_df

In [None]:
brewery_df.to_csv("data/brewery_data.csv", encoding="utf-8", index=False)

# Figures

- I expect a greater number of older breweries closer to Chicago, given that some of the first instances of brewing in America occured here.
- With such few breweries available for free (boo sandbox), the scatter plot looks a little sparse. However, the general trend gives us preliminary data that shows that there may be a coorlation! If I wanted to do more with this, this would be good enough to convince me to splurge the $20 for full access

- plot for Pottsville is just for fun

In [None]:
#Chicago
plt.scatter(brewery_df['Distance from Chicago (km)'], brewery_df['Established Date'], 
            alpha=0.5, edgecolor ='black', color="blue",s=100)

#Chart elements
plt.title(f"Distance from Chicago vs. Established Year")
plt.xlabel('Distance from Chicago (km)')
plt.ylabel('Established Year')
plt.grid(True)

#Save and print
plt.savefig("images/Distance from Chicago vs. Established Year.png")
plt.show()

In [None]:
#Pottsville
plt.scatter(brewery_df['Distance from Pottsville (km)'], brewery_df['Established Date'], alpha=0.5, edgecolor ='black', color="red",s=100)

#Chart elements
plt.title(f"Distance from Pottsville vs. Established Year")
plt.xlabel('Distance from Pottsville (km)')
plt.ylabel('Established Year')
plt.grid(True)

#Save and print
#plt.savefig("images/Distance from Pottsville vs. Established Year.png")
plt.show()

In [None]:
#Empty Plot
plt.scatter(brewery_df['Distance from Chicago (km)'], brewery_df['Established Date'], alpha=0.5, edgecolor ='none', color="none",s=100)

#Chart elements
plt.title(f"Distance from Chicago vs. Established Year")
plt.xlabel('Distance from Chicago (km)')
plt.ylabel('Established Year')
plt.grid(True)

#Save and print
plt.savefig("images/Empty plot.png")
plt.show()