## Imports

In [1]:
import folium
import pandas as pd
from pandas.io.json import json_normalize
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rcParams
import requests
import io
from zipfile import ZipFile
import time
from ipywidgets import widgets
from IPython.display import display
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import MinMaxScaler
from pyclustertend import hopkins
import pickle
from plotly import graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

## Foursquare data acquisition function

In [2]:
 #### Requesting data for one specific location on the map.

def explore_area_nearby(client_id, client_secret, access_token, lat, lng, radius = 500, limit = 100, category = None):
    print(radius)
    print(limit)
    version = '20180605' # Foursquare API version
    base_url_template = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&oauth_token={}&v={}&radius={}&limit={}'
    lat_lng_suffix = "&ll={},{}"
    base_url = base_url_template.format(client_id, client_secret, access_token, version, radius, limit)
    request_url = base_url + lat_lng_suffix.format(lat, lng)
    if category is not None:
        request_url += "&categoryId={}".format(category)
    #print(request_url)
    resp = requests.get(request_url)
    return resp.json()

#### Helper wrapper function to be used for the function above ("explore_area_nearby")

def get_fs_for_latlng(data, raw = False, category = None, radius = 2000):
    lat_lng = data
    client_id = "OCJNVVQEBW4JMRT1XQWOMUA4D3BSVJ1ABPSFVWIGNQZ4MHKT"
    client_secret = "4XAPAWARYQL5Y0OLDVCABVNFTC5SXUUH4Q0YYVVHSHNJJ44F"
    access_token = "N2LGNKF1ZJBDYCFOYITCMFEUSMFYEXSZWEOEG2TNH15H1GEC"
    limit = 500
    raw_response = explore_area_nearby(client_id, client_secret, access_token, lat_lng[1], lat_lng[0], radius, limit, category = category)
    if(raw):
        return raw_response
    
    return raw_response["response"]["groups"][0]["items"]

#### Helper functions to extract relevant fields from the JSON response of FourSquare's API

def get_venue_category(i, raw_response):
    return raw_response["response"]["groups"][0]["items"][i]["venue"]["categories"][0]["name"]

def get_venue_category_id(i, raw_response):
    return raw_response["response"]["groups"][0]["items"][i]["venue"]["categories"][0]["id"]
    
def get_venue_type(i, raw_response):
    return raw_response["response"]["groups"][0]["items"][i]["reasons"]["items"][0]["type"]

def get_venue_name(i, raw_response):
    return raw_response["response"]["groups"][0]["items"][i]["venue"]["name"]

def get_venue_lat_lng(i, raw_response):
        return [raw_response["response"]["groups"][0]["items"][i]["venue"]["location"]["lat"], 
                raw_response["response"]["groups"][0]["items"][i]["venue"]["location"]["lng"]]



#### Function to be used in a loop to extract each selected relevant field with one of the above functions

def get_venue_fields(raw_response, venue_field_extractor_function):
    extracted_fields = []
    for i in range(0, len(raw_response["response"]["groups"][0]["items"])):
        extracted_fields.append(venue_field_extractor_function(i, raw_response))
    return extracted_fields

#### Since FourSquare's free tier only allows for 500 requests within an hour 
#### and rejects calls with the same API credentials in the next 2 hours,
#### the helper function below aims to automatize this waiting process

def patient_foursquare_calls(dfs, venue_extractor_functions = None, dfs_all_key = "all", category = None, radius = 2000):
    if venue_extractor_functions is None:
        #### Placing the above functions into a dictionary for programming convenience
        venue_extractor_functions = dict(venue_name = get_venue_name, 
                                         venue_location = get_venue_lat_lng,
                                         venue_type =  get_venue_type, 
                                         venue_category = get_venue_category,
                                         venue_category_id = get_venue_category_id)

    #### Venue dataframe template
    venue_ds_template = dict(venue_name = "", venue_location = "", venue_category = "", venue_category_id = "")

    #### the dataframe to hold the extracted fields, each identified by its MSOA ID    
    
    
    dfs["venue_df"] = pd.DataFrame([], columns = ["msoa_id", "venue_name", "venue_location", "venue_category", "venue_category_id"])

    #### Due to the aforementioned limitation of FourSquare's free tier, raw responses are decieded
    #### to be saved in a serialized format,
    #### should additional data be intended to be extracted.

    raw_responses = []
    data_length = len(dfs[dfs_all_key].index)
    i = 0
    #### Main loop to iterate through each MSOA center within the dataframe.

    while(i < data_length):
        try:
            msoa_id =  dfs[dfs_all_key].loc[i, "msoa11cd"]

            #### Getting the raw response data for the location
            raw_response = get_fs_for_latlng(dfs[dfs_all_key].loc[i, "coordinates"], raw = True, category = category, radius = radius)
            raw_responses.append(raw_response)
            venue_ds_temp = venue_ds_template.copy()
            for k,v in venue_extractor_functions.items():
                temp_list =  get_venue_fields(raw_response, v)
                venue_ds_temp[k] = temp_list
            venue_ds_temp["msoa_id"] = [msoa_id for i in range(0, len(temp_list))]
            dfs["venue_df"] = dfs["venue_df"].append(pd.DataFrame(venue_ds_temp))
            print("\rPulled data for {:d}/{:d}. Last MSOID: {:s}".format(i+1, data_length, msoa_id))
            i+=1
        except:
            print("*"*50 + "\nI will go to sleep for 2 hours since have reached the limit of FourSquare's free tier.\n" + "*"*50)
            time.sleep(60*60*2+10)
    return raw_responses

## Function call

In [None]:
# Don't run this unless you are ready to be very patient...
#to_be_pickled = patient_foursquare_calls(dfs, category = category_id_dict["Food"])
#pickle.dump(to_be_pickled, open("new_fs_data.pkl", "wb"))