In [10]:
import requests
from requests.exceptions import HTTPError
import pandas as pd
import numpy as np

In [4]:
url ='https://api.openbrewerydb.org/breweries'
error = "N"

### Reading the Data from API

In [78]:
try:
    r = requests.get(url)
    r.raise_for_status()
except HTTPError as http_err:
    error = "Y"
except Exception as err:
    error = "O"
    errorReason = 'Connection Error Occured'
if error == 'O':
    #errorReason = err
    print(errorReason)
else:
    events = r.json()
    #print(type(events))
    df = pd.DataFrame.from_dict(events)
    #print(df)
    
df.head(5)

Unnamed: 0,id,obdb_id,name,brewery_type,street,address_2,address_3,city,state,county_province,postal_code,country,longitude,latitude,phone,website_url,updated_at,created_at
0,9094,bnaf-llc-austin,"Bnaf, LLC",planning,,,,Austin,Texas,,78727-7602,United States,,,,,2018-07-24T00:00:00.000Z,2018-07-24T00:00:00.000Z
1,9180,boulder-beer-co-boulder,Boulder Beer Co,regional,2880 Wilderness Pl,,,Boulder,Colorado,,80301-5401,United States,-105.2480158,40.026439,,,2018-08-24T00:00:00.000Z,2018-07-24T00:00:00.000Z
2,9754,clermont-brewing-company-clermont,Clermont Brewing Company,planning,,,,Clermont,Florida,,34711-2108,United States,,,,,2018-08-11T00:00:00.000Z,2018-07-24T00:00:00.000Z
3,10186,dimensional-brewing-co-dubuque,Dimensional Brewing Co.,planning,,,,Dubuque,Iowa,,52001,United States,,,,http://www.dimensionalbrewing.com,2018-08-11T00:00:00.000Z,2018-07-24T00:00:00.000Z
4,10217,dixie-brewing-co-inc-new-orleans,Dixie Brewing Co Inc.,contract,6221 S Claiborne Ave Ste 101,,,New Orleans,Louisiana,,70125-4191,United States,,,5048228711.0,,2018-08-11T00:00:00.000Z,2018-07-24T00:00:00.000Z


### 1. Replace the Headers

In [79]:
print(df.columns)
df=df.rename(columns={"name": "brewery_name", "street": "street_name", "address_2": "addressline_2", "address_3": "addressline_3", "city": "city_name", "state": "state_name"})
print(df.columns)
df.head(5)

Index(['id', 'obdb_id', 'name', 'brewery_type', 'street', 'address_2',
       'address_3', 'city', 'state', 'county_province', 'postal_code',
       'country', 'longitude', 'latitude', 'phone', 'website_url',
       'updated_at', 'created_at'],
      dtype='object')
Index(['id', 'obdb_id', 'brewery_name', 'brewery_type', 'street_name',
       'addressline_2', 'addressline_3', 'city_name', 'state_name',
       'county_province', 'postal_code', 'country', 'longitude', 'latitude',
       'phone', 'website_url', 'updated_at', 'created_at'],
      dtype='object')


Unnamed: 0,id,obdb_id,brewery_name,brewery_type,street_name,addressline_2,addressline_3,city_name,state_name,county_province,postal_code,country,longitude,latitude,phone,website_url,updated_at,created_at
0,9094,bnaf-llc-austin,"Bnaf, LLC",planning,,,,Austin,Texas,,78727-7602,United States,,,,,2018-07-24T00:00:00.000Z,2018-07-24T00:00:00.000Z
1,9180,boulder-beer-co-boulder,Boulder Beer Co,regional,2880 Wilderness Pl,,,Boulder,Colorado,,80301-5401,United States,-105.2480158,40.026439,,,2018-08-24T00:00:00.000Z,2018-07-24T00:00:00.000Z
2,9754,clermont-brewing-company-clermont,Clermont Brewing Company,planning,,,,Clermont,Florida,,34711-2108,United States,,,,,2018-08-11T00:00:00.000Z,2018-07-24T00:00:00.000Z
3,10186,dimensional-brewing-co-dubuque,Dimensional Brewing Co.,planning,,,,Dubuque,Iowa,,52001,United States,,,,http://www.dimensionalbrewing.com,2018-08-11T00:00:00.000Z,2018-07-24T00:00:00.000Z
4,10217,dixie-brewing-co-inc-new-orleans,Dixie Brewing Co Inc.,contract,6221 S Claiborne Ave Ste 101,,,New Orleans,Louisiana,,70125-4191,United States,,,5048228711.0,,2018-08-11T00:00:00.000Z,2018-07-24T00:00:00.000Z


### 2. Create a data set with required columns

In [80]:
df1=df[['brewery_name','brewery_type','street_name','addressline_2','addressline_3','city_name','state_name','country','postal_code']]
df1.head(5)

Unnamed: 0,brewery_name,brewery_type,street_name,addressline_2,addressline_3,city_name,state_name,country,postal_code
0,"Bnaf, LLC",planning,,,,Austin,Texas,United States,78727-7602
1,Boulder Beer Co,regional,2880 Wilderness Pl,,,Boulder,Colorado,United States,80301-5401
2,Clermont Brewing Company,planning,,,,Clermont,Florida,United States,34711-2108
3,Dimensional Brewing Co.,planning,,,,Dubuque,Iowa,United States,52001
4,Dixie Brewing Co Inc.,contract,6221 S Claiborne Ave Ste 101,,,New Orleans,Louisiana,United States,70125-4191


### 3. Find duplicates

In [81]:
print("Postal Code is duplictaed - {}".format(any(df1.postal_code.duplicated())))

Postal Code is duplictaed - False


### 4. Find Null values

In [82]:
print("The column street name contains NaN - %r " % df1.street_name.isnull().values.any())
print("The column longitude contains NaN - %r " % df.longitude.isnull().values.any())

The column street name contains NaN - True 
The column longitude contains NaN - True 


### 5. Identify outliers and bad data

In [84]:
size_prev = df.shape
df['longitude'] = df['longitude'].astype(float, errors = 'raise')
df = df[np.isfinite(df['longitude'])] 
size_after = df.shape
print("The size of previous data was - {prev[0]} rows and the size of the new one is - {after[0]} rows".
      format(prev=size_prev, after=size_after))

The size of previous data was - 20 rows and the size of the new one is - 3 rows
