### Data Collection - Food Standards Agency

In [None]:
import pandas as pd
import numpy as np
import requests
import random
import time
import re
import os, glob

import seaborn as sns
import matplotlib.pyplot as plt

plt.style.use('ggplot')

%config InlineBackend.figure_format = 'retina'
%matplotlib inline

In [None]:
# https://api.ratings.food.gov.uk/Help

In [None]:
# query API - ratings details
url = 'http://api.ratings.food.gov.uk/ratings'
headers = {'x-api-version': '2'}

response = requests.get(url=url, headers=headers, timeout=10)

In [None]:
# return response as JSON object
data_dict = response.json()
data_dict

In [None]:
# query API - establishment details
url = 'http://api.ratings.food.gov.uk/establishments/basic'
headers = {'x-api-version': '2'}

response = requests.get(url=url, headers=headers, timeout=10)

In [None]:
# return response as JSON object
data_dict = response.json()
data_dict

In [None]:
# query API - local authority details
url = 'http://api.ratings.food.gov.uk/authorities'
headers = {'x-api-version': '2'}

response = requests.get(url=url, headers=headers, timeout=10)

In [None]:
# return response as JSON object
data_dict = response.json()
data_dict

In [None]:
# check JSON keys
data_dict.keys()

In [None]:
# create dataframe of local authorities and their IDs

local_auth = {'local_auth_id':[],
              'name':[],
              'region':[]}

for item in data_dict['authorities']:
    try:
        local_auth['local_auth_id'].append(item['LocalAuthorityId'])
    except:
        local_auth['local_auth_id'].append(np.nan)
    try:
        local_auth['name'].append(item['Name'])
    except:
        local_auth['name'].append(np.nan)
    try:
        local_auth['region'].append(item['RegionName'])
    except:
        local_auth['region'].append(np.nan)

local_auth = pd.DataFrame(local_auth)
local_auth


In [None]:
# filter for local authorities in London
local_auth[local_auth['region']=='London']

In [None]:
# get IDs of London local authorities
london_ids = local_auth['local_auth_id'][local_auth['region']=='London'].unique()
london_ids

In [None]:
# expand scope - get local authority IDs for all relevant regions
all_reg_ids = local_auth['local_auth_id'][(local_auth['region']=='South East') |
                                          (local_auth['region']=='East Counties') |
                                          (local_auth['region']=='North West') |
                                          (local_auth['region']=='East Midlands') |
                                          (local_auth['region']=='South West') |
                                          (local_auth['region']=='West Midlands') |
                                          (local_auth['region']=='Yorkshire and Humberside') |
                                          (local_auth['region']=='North East') |
                                          (local_auth['region']=='Northern Ireland') |
                                          (local_auth['region']=='Wales')].unique()
all_reg_ids

In [None]:
# query API by local authority ID - test

In [None]:
# set parameters
url = 'http://api.ratings.food.gov.uk/establishments'
headers = {'x-api-version': '2'}
params = {'localAuthorityId': 89}

In [None]:
# fetch response data
r = requests.get(url=url, headers=headers, params=params, timeout=10)

In [None]:
# check response headers
print(r.headers)

In [None]:
# return response as JSON object
data_dict = r.json()
data_dict

In [None]:
# check JSON keys
data_dict.keys()

In [None]:
# define function to extract business details from JSON and convert to dataframe

def convert_to_df(data_dict):
    
    rest_fsa = {'FHRSID':[],
                'BusinessName':[],
                'BusinessType':[],
                'BusinessTypeID':[],
                'AddressLine1':[],
                'AddressLine2':[],
                'AddressLine3':[],
                'AddressLine4':[],
                'PostCode':[],
                'RatingValue':[],
                'RatingKey':[],
                'RatingDate':[],
                'LocalAuthorityCode':[],
                'LocalAuthorityName':[],
                'Hygiene':[],
                'Structural':[],
                'ConfidenceInManagement':[],
                'longitude':[],
                'latitude': [],
                'RightToReply':[],
                'NewRatingPending':[]}

    for item in data_dict['establishments']:
        try:
            rest_fsa['FHRSID'].append(item['FHRSID'])
        except:
            rest_fsa['FHRSID'].append(np.nan)
        try:
            rest_fsa['BusinessName'].append(item['BusinessName'])
        except:
            rest_fsa['BusinessName'].append(np.nan)
        try:
            rest_fsa['BusinessType'].append(item['BusinessType'])
        except:
            rest_fsa['BusinessType'].append(np.nan)
        try:
            rest_fsa['BusinessTypeID'].append(item['BusinessTypeID'])
        except:
            rest_fsa['BusinessTypeID'].append(np.nan)
        try:
            rest_fsa['AddressLine1'].append(item['AddressLine1'])
        except:
            rest_fsa['AddressLine1'].append(np.nan)
        try:
            rest_fsa['AddressLine2'].append(item['AddressLine2'])
        except:
            rest_fsa['AddressLine2'].append(np.nan)
        try:
            rest_fsa['AddressLine3'].append(item['AddressLine3'])
        except:
            rest_fsa['AddressLine3'].append(np.nan)
        try:
            rest_fsa['AddressLine4'].append(item['AddressLine4'])
        except:
            rest_fsa['AddressLine4'].append(np.nan)
        try:
            rest_fsa['PostCode'].append(item['PostCode'])
        except:
            rest_fsa['PostCode'].append(np.nan) 
        try:
            rest_fsa['RatingValue'].append(item['RatingValue'])
        except:
            rest_fsa['RatingValue'].append(np.nan)
        try:
            rest_fsa['RatingKey'].append(item['RatingKey'])
        except:
            rest_fsa['RatingKey'].append(np.nan)    
        try:
            rest_fsa['RatingDate'].append(item['RatingDate'])
        except:
            rest_fsa['RatingDate'].append(np.nan)
        try:
            rest_fsa['LocalAuthorityCode'].append(item['LocalAuthorityCode'])
        except:
            rest_fsa['LocalAuthorityCode'].append(np.nan)
        try:
            rest_fsa['LocalAuthorityName'].append(item['LocalAuthorityName'])
        except:
            rest_fsa['LocalAuthorityName'].append(np.nan)
        try:
            rest_fsa['Hygiene'].append(item['scores']['Hygiene'])
        except:
            rest_fsa['Hygiene'].append(np.nan)
        try:
            rest_fsa['Structural'].append(item['scores']['Structural'])
        except:
            rest_fsa['Structural'].append(np.nan)
        try:
            rest_fsa['ConfidenceInManagement'].append(item['scores']['ConfidenceInManagement'])
        except:
            rest_fsa['ConfidenceInManagement'].append(np.nan)
        try:
            rest_fsa['longitude'].append(item['geocode']['longitude'])
        except:
            rest_fsa['longitude'].append(np.nan)          
        try:
            rest_fsa['latitude'].append(item['geocode']['latitude'])
        except:
            rest_fsa['latitude'].append(np.nan)     
        try:
            rest_fsa['RightToReply'].append(item['RightToReply'])
        except:
            rest_fsa['RightToReply'].append(np.nan)     
        try:
            rest_fsa['NewRatingPending'].append(item['NewRatingPending'])
        except:
            rest_fsa['NewRatingPending'].append(np.nan)   

    rest_fsa = pd.DataFrame(rest_fsa)
    return rest_fsa


In [None]:
# query API - get data for all relevant local authorities and save to csv

url = 'http://api.ratings.food.gov.uk/establishments'
headers = {'x-api-version': '2'}

for reg_id in all_reg_ids:
    params = {'localAuthorityId': reg_id}
    response = requests.get(url=url, headers=headers, params=params, timeout=10)
    data_dict = response.json()
    df = convert_to_df(data_dict)
    df.to_csv('FHRS_{}.csv'.format(reg_id), index=True)


In [None]:
# combine csv files

In [None]:
path = '/Users/katjakrempel/Desktop/capstone/fsa_data/'

In [None]:
all_fsa_files = glob.glob(os.path.join(path, 'FHRS_*.csv'))
df_from_file = (pd.read_csv(f) for f in all_fsa_files)
df_fsa_merged = pd.concat(df_from_file)
df_fsa_merged

In [None]:
# check business types in data set
df_fsa_merged['BusinessType'].unique()

In [None]:
# exclude irrelevant business types
subset = df_fsa_merged[(df_fsa_merged['BusinessType']=='Restaurant/Cafe/Canteen') | 
                       (df_fsa_merged['BusinessType']=='Takeaway/sandwich shop') |
                       (df_fsa_merged['BusinessType']=='Pub/bar/nightclub')]

In [None]:
subset.info()

In [None]:
# drop 'Unnamed: 0' column
subset.drop(columns=['Unnamed: 0'], inplace=True)

In [None]:
# save dataframe to csv 
subset.to_csv('all_fsa.csv', index=False)

In [None]:
df = pd.read_csv('/Users/katjakrempel/Desktop/capstone/all_fsa.csv')

In [None]:
df.head(10)

In [None]:
df.info()