Clearly define a problem or an idea of your choice, where you would need to leverage the Foursquare location data to solve or execute. Remember that data science problems always target an audience and are meant to help a group of stakeholders solve a problem, so make sure that you explicitly describe your audience and why they would care about your problem.

### Introduction/Business Problem 

<b>Problem</b>: 
         A family of 4 people are looking to buy a house in neighborhood.<br>
         Based on a specific average family income, 100k, what kind of neighborhood they can afford to buy a house.<br>
         Bonus: They want a shopping center close to their house and a park for the kids.<br>

### Data

Data needed for this segmentation:
+ Toronto police (https://data.torontopolice.on.ca/datasets) for neighbourhood + average crimes (Assault, Auto Theft, Breaking and Entering, Theft, Homicide)
+ Use of (https://api.foursquare.com) to get the venues for each neighbourhood
+ Toronto Income average per neighborhood (http://map.toronto.ca/wellbeing/)

In [174]:
import requests
import pickle as pkl
import numpy as np
import time
import pandas as pd
import json

In [309]:
# Load Police Crimes averages per neihborhood

with open('crimes.json') as json_file:
    data = json.load(json_file)

In [35]:
features = data['features']

In [48]:
features[0]['attributes']

{'Neighbourhood': 'Yonge-St.Clair',
 'Assault_AVG': 50.8,
 'AutoTheft_AVG': 39.4,
 'BreakandEnter_AVG': 25.6,
 'Robbery_AVG': 18.2,
 'TheftOver_AVG': 4.4,
 'Shape__Length': 5873.27050652613,
 'Homicide_AVG': 'N/A',
 'Population': 3189,
 'Shape__Area': 1161314.75244141}

In [310]:
neighbours = []

# Create a list of dictionaries for each neighbourhood
# Drop the AVG from the name
# Add the Longitude and Latitude

for feat in features:
    atts = {}
    atts['Neighbourhood'] = feat['attributes']['Neighbourhood']
    atts['Assault'] = feat['attributes']['Assault_AVG']
    atts['AutoTheft'] = feat['attributes']['AutoTheft_AVG']
    atts['BreakandEnter'] = feat['attributes']['BreakandEnter_AVG']
    atts['TheftOver'] = feat['attributes']['TheftOver_AVG']
    atts['Shape'] = feat['attributes']['Shape__Length']
    atts['Homicide'] = feat['attributes']['Homicide_AVG']
    atts['Population'] = feat['attributes']['Population']
    atts['Area'] = feat['attributes']['Shape__Area']
    # middle of the neighborhood latitude, longitude
    atts['Longitude'] = feat['geometry']['rings'][0][int(len(features[0]['geometry']['rings'][0])/2)][0]
    atts['Latitude'] = feat['geometry']['rings'][0][int(len(features[0]['geometry']['rings'][0])/2)][1]
    
    neighbours.append(atts)

In [108]:
df_neigh = pd.DataFrame(neighbours)

In [109]:
# replace N/A for averages with 0
df_neigh.replace('N/A', 0, inplace=True)

In [110]:
df_neigh[df_neigh['Homicide'] == 'N/A']['Homicide']

Series([], Name: Homicide, dtype: object)

In [111]:
df_neigh.head()

Unnamed: 0,Neighbourhood,Assault,AutoTheft,BreakandEnter,TheftOver,Shape,Homicide,Population,Area,Longitude,Latitude
0,Yonge-St.Clair,50.8,39.4,25.6,4.4,5873.270507,0.0,3189,1161315.0,-79.402363,43.695495
1,York University Heights,109.6,15.8,49.8,3.8,18504.777616,1.3,36764,13246660.0,-79.50872,43.762429
2,Lansing-Westgate,213.0,25.4,63.6,5.4,11112.109419,0.0,10242,5346186.0,-79.409809,43.757227
3,Yorkdale-Glen Park,129.8,31.4,57.0,9.0,10079.426837,1.2,18233,6038326.0,-79.471533,43.725117
4,Stonegate-Queensway,122.6,22.8,39.4,5.2,11853.189803,1.0,22207,7946202.0,-79.485026,43.641919


In [311]:
# Foursquare API
# Censored the id and secret

client_id = "**********************************"
client_secret = "*******************"
version = "20180602"

# configs for search
limit = 10
radius = 400

In [313]:
def get_categories(lat, long, json, result):
    '''
    get the venues for each neighbourhood
    '''
    
    categories = []

    for item in json['response']['groups'][0]['items']:
        for category in item['venue']['categories']:
            categories.append(category['name'])

    result[(lat,long)] = categories
    return result


def get_all_categories(df):
    '''
    function used to get all the calls for each latitude/longitude
    df - dataframe that contains latitude/longitude
    '''
    i = 0
    result = {}
    for lat, long in zip(df['Latitude'],df['Longitude']):
        
        url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(
                    client_id, 
                    client_secret, 
                    lat, 
                    long, 
                    version, 
                    radius, 
                    limit
                )
        
        json = requests.get(url).json()
        get_categories(lat, long, json, result)
        
        # do not spam the api
        if( i % 10 == 0):
            print('Number of requests done : ', i)
            time.sleep(1)
            
        i += 1
    
    return result

In [163]:
venues = get_all_categories(df_neigh)

Number of requests done :  0
Number of requests done :  10
Number of requests done :  20
Number of requests done :  30
Number of requests done :  40
Number of requests done :  50
Number of requests done :  60
Number of requests done :  70
Number of requests done :  80
Number of requests done :  90
Number of requests done :  100
Number of requests done :  110
Number of requests done :  120
Number of requests done :  130


In [314]:
def get_list_categories(venues_dict):
    '''
    group up the venues dictionary into a list of venues 
    '''
    lst = [item for sublist in venues_dict.values() for item in sublist]
    
    # return unique list
    return np.unique(lst)

In [165]:
venues_result = []
for i in range(len(df_neigh)):
    lst = venues[(df_neigh.iloc[i]['Latitude'],df_neigh.iloc[i]['Longitude'])]
    tmp = ', '.join(lst)
    venues_result.append(tmp)

In [169]:
df_neigh.insert(11, "Venues", venues_result, True) 

In [172]:
df_neigh

Unnamed: 0,Neighbourhood,Assault,AutoTheft,BreakandEnter,TheftOver,Shape,Homicide,Population,Area,Longitude,Latitude,Venues
0,Yonge-St.Clair,50.8,39.4,25.6,4.40,5873.270507,0,3189,1.161315e+06,-79.402363,43.695495,"Park, Trail, Gym / Fitness Center, Convenience..."
1,York University Heights,109.6,15.8,49.8,3.80,18504.777616,1.3,36764,1.324666e+07,-79.508720,43.762429,"Grocery Store, Hockey Field, Theater"
2,Lansing-Westgate,213.0,25.4,63.6,5.40,11112.109419,0,10242,5.346186e+06,-79.409809,43.757227,"Thai Restaurant, Japanese Restaurant, Grocery ..."
3,Yorkdale-Glen Park,129.8,31.4,57.0,9.00,10079.426837,1.2,18233,6.038326e+06,-79.471533,43.725117,"Pet Store, Arcade, Furniture / Home Store, Pla..."
4,Stonegate-Queensway,122.6,22.8,39.4,5.20,11853.189803,1,22207,7.946202e+06,-79.485026,43.641919,
...,...,...,...,...,...,...,...,...,...,...,...,...
135,Pleasant View,264.2,46.8,34.6,7.40,7944.829782,0,19042,2.967493e+06,-79.334668,43.775478,"Chinese Restaurant, Middle Eastern Restaurant,..."
136,Wychwood,76.2,6.2,24.8,3.25,5435.022062,0,7397,1.682111e+06,-79.416399,43.678333,"Dessert Shop, Yoga Studio, Latin American Rest..."
137,Leaside-Bennington,102.0,40.6,25.8,5.80,14836.618542,1,20308,4.784766e+06,-79.360712,43.703480,"Fish & Chips Shop, Coffee Shop, Bike Shop, Gro..."
138,Briar Hill-Belgravia,86.6,17.0,21.0,1.50,6289.707250,1,4572,1.830325e+06,-79.447234,43.704074,"Park, BBQ Joint"


In [176]:
with open('data_venues_crimes.pkl', 'wb') as f:
    pkl.dump(df_neigh, f)

In [276]:
df = pd.read_pickle("data_venues_crimes.pkl")

In [277]:
df.head()

Unnamed: 0,Neighbourhood,Assault,AutoTheft,BreakandEnter,TheftOver,Shape,Homicide,Population,Area,Longitude,Latitude,Venues
0,Yonge-St.Clair,50.8,39.4,25.6,4.4,5873.270507,0.0,3189,1161315.0,-79.402363,43.695495,"Park, Trail, Gym / Fitness Center, Convenience..."
1,York University Heights,109.6,15.8,49.8,3.8,18504.777616,1.3,36764,13246660.0,-79.50872,43.762429,"Grocery Store, Hockey Field, Theater"
2,Lansing-Westgate,213.0,25.4,63.6,5.4,11112.109419,0.0,10242,5346186.0,-79.409809,43.757227,"Thai Restaurant, Japanese Restaurant, Grocery ..."
3,Yorkdale-Glen Park,129.8,31.4,57.0,9.0,10079.426837,1.2,18233,6038326.0,-79.471533,43.725117,"Pet Store, Arcade, Furniture / Home Store, Pla..."
4,Stonegate-Queensway,122.6,22.8,39.4,5.2,11853.189803,1.0,22207,7946202.0,-79.485026,43.641919,


In [315]:
# open the income dataset from wellbeing toronto
df_income = pd.read_csv('wellbeing_toronto.csv')

In [279]:
len(df_income)

140

In [280]:
df_neigh.iloc[41]

Neighbourhood                               West Humber-Clairville
Assault                                                       78.4
AutoTheft                                                     26.6
BreakandEnter                                                 58.8
TheftOver                                                      5.4
Shape                                                      27924.2
Homicide                                                       2.5
Population                                                   36570
Area                                                   3.01453e+07
Longitude                                                 -79.5556
Latitude                                                   43.7075
Venues           Deli / Bodega, Art Gallery, Café, Intersection...
0                                                              NaN
Name: 41, dtype: object

In [286]:
# replace Danforth-East York by Danforth East York to match the crimes data
df.loc[df.index[df['Neighbourhood'] == 'Danforth East York'], 'Neighbourhood'] = 'Danforth-East York'

In [290]:
# replace income 'Dovercourt-Wallace Emerson-Juncti' to 'Dovercourt-Wallace Emerson-Junction
df_income.loc[df_income.index[df_income['Neighbourhood'] == 'Dovercourt-Wallace Emerson-Juncti'], 'Neighbourhood'] = 'Dovercourt-Wallace Emerson-Junction'

In [305]:
df.head()

Unnamed: 0,Neighbourhood,Assault,AutoTheft,BreakandEnter,TheftOver,Shape,Homicide,Population,Area,Longitude,Latitude,Venues
0,Yonge-St.Clair,50.8,39.4,25.6,4.4,5873.270507,0.0,3189,1161315.0,-79.402363,43.695495,"Park, Trail, Gym / Fitness Center, Convenience..."
1,York University Heights,109.6,15.8,49.8,3.8,18504.777616,1.3,36764,13246660.0,-79.50872,43.762429,"Grocery Store, Hockey Field, Theater"
2,Lansing-Westgate,213.0,25.4,63.6,5.4,11112.109419,0.0,10242,5346186.0,-79.409809,43.757227,"Thai Restaurant, Japanese Restaurant, Grocery ..."
3,Yorkdale-Glen Park,129.8,31.4,57.0,9.0,10079.426837,1.2,18233,6038326.0,-79.471533,43.725117,"Pet Store, Arcade, Furniture / Home Store, Pla..."
4,Stonegate-Queensway,122.6,22.8,39.4,5.2,11853.189803,1.0,22207,7946202.0,-79.485026,43.641919,


In [303]:
i = 0
income = []

for nei in df['Neighbourhood']:
    result = df_income.index[df_income['Neighbourhood'].str.contains(nei) == True]
    if(len(result) > 0):
        income.append(df_income.iloc[result[0]]['Average Family Income'])
    else:
        print('Not found neighbourhood :',nei)

In [306]:
# insert the income column into the neighbourhood/crimes/venues data
df.insert(12, "Income", income, True) 

In [307]:
df.head()

Unnamed: 0,Neighbourhood,Assault,AutoTheft,BreakandEnter,TheftOver,Shape,Homicide,Population,Area,Longitude,Latitude,Venues,Income
0,Yonge-St.Clair,50.8,39.4,25.6,4.4,5873.270507,0.0,3189,1161315.0,-79.402363,43.695495,"Park, Trail, Gym / Fitness Center, Convenience...",173751.0
1,York University Heights,109.6,15.8,49.8,3.8,18504.777616,1.3,36764,13246660.0,-79.50872,43.762429,"Grocery Store, Hockey Field, Theater",59770.0
2,Lansing-Westgate,213.0,25.4,63.6,5.4,11112.109419,0.0,10242,5346186.0,-79.409809,43.757227,"Thai Restaurant, Japanese Restaurant, Grocery ...",105947.0
3,Yorkdale-Glen Park,129.8,31.4,57.0,9.0,10079.426837,1.2,18233,6038326.0,-79.471533,43.725117,"Pet Store, Arcade, Furniture / Home Store, Pla...",70539.0
4,Stonegate-Queensway,122.6,22.8,39.4,5.2,11853.189803,1.0,22207,7946202.0,-79.485026,43.641919,,100867.0


In [308]:
with open('data_venues_crimes_income.pkl', 'wb') as f:
    pkl.dump(df, f)