# Import modules.

In [1]:
import numpy as np
import pandas as pd
import json
import requests
from bs4 import BeautifulSoup
import os
import folium
import matplotlib.colors as colors
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
from shapely.geometry import shape, Point

# Preparation of data

At first, the following information will be specified:
- the coordinate of the Zurich Hauptbahnhof 
- the number of districts ("Kreis") in Zurich

In [2]:
#Coordinate of Zurich Hauptbahnhof
latitude_ZHB=47.3784
longitude_ZHB=8.5384

#number of districts (kreis)
num_kreis=12

 Foursquare Credentials and Version:

In [3]:
CLIENT_ID = 'PZZDZJQ40QLEMC4OTOWZNFJ5GWAX1RKQ2VWSTJAIJKUFA2PW' # your Foursquare ID
CLIENT_SECRET = 'QSH0EO045TWKZCID2JOPFWNTU4POQS5NUJCG12ZP5114FM0F' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: PZZDZJQ40QLEMC4OTOWZNFJ5GWAX1RKQ2VWSTJAIJKUFA2PW
CLIENT_SECRET:QSH0EO045TWKZCID2JOPFWNTU4POQS5NUJCG12ZP5114FM0F


URL will be created. The word "Japanese" will be used for the search query in order to obtain information about Japanese restaurant. 

In [4]:
search_query = 'Japanese'

LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 30000 # define radius. This radius should be large enough to cover all districts of Zurich.
# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&query={}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude_ZHB, 
    longitude_ZHB,
    search_query,
    radius, 
    LIMIT)

results = requests.get(url).json()

In [5]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

A dataframe including the name of a restaurant and the coordinates will be created:

In [6]:
venues = results['response']['groups'][0]['items']
Restaurant = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.neighborhood', 'venue.location.lat', 'venue.location.lng']
Restaurant =Restaurant.loc[:, filtered_columns]

# filter the category for each row
Restaurant['venue.categories'] = Restaurant.apply(get_category_type, axis=1)

# clean columns
Restaurant.columns = [col.split(".")[-1] for col in Restaurant.columns]

Restaurant.head()

Unnamed: 0,name,categories,neighborhood,lat,lng
0,Kokoro,Japanese Restaurant,Kreis 4,47.38068,8.526546
1,Miki Ramen,Japanese Restaurant,,47.375152,8.516653
2,Ikoo,Japanese Restaurant,Kreis 4,47.375044,8.528807
3,Japan-Restaurant Bimi,Japanese Restaurant,,47.363665,8.549325
4,Samurai,Japanese Restaurant,,47.373747,8.527697


In [7]:
num_restaurant=Restaurant.shape[0]
print('{} restaurants were returned by Foursquare.'.format(num_restaurant))

63 restaurants were returned by Foursquare.


The name of the column "neighborhood" will be changed to "Kreis." This column will be used to show which district the restaurant belongs to.

In [8]:
# Change the name of the column
Restaurant = Restaurant.rename(columns={'neighborhood': 'Kreis'})

In [9]:
# Define a function for showing which districts the coordinate (lat and lon) belongs to. 
def belong(geo_json, lat, lon):
    point = Point(lon, lat)
    for feature in geo_json['features']:
        polygon = shape(feature['geometry'])
        if polygon.contains(point):
            return feature['properties']['kname'] 
    return None

The geojson data for showing districts (kreis) in Zurich was obtained here:  
https://data.stadt-zuerich.ch/dataset/5fd779b6-64b3-4320-baa2-40e4de59dfc6/resource/261666ee-2ca0-43cb-927c-0f610e96cea1/download/stadtkreise.json

In [10]:
currentdir=os.getcwd()
datapath=currentdir+'/kreis.geojson'
#https://data.stadt-zuerich.ch/dataset/5fd779b6-64b3-4320-baa2-40e4de59dfc6/resource/261666ee-2ca0-43cb-927c-0f610e96cea1/download/stadtkreise.json
#f=open('//Users//kotarosonoda//Documents//Coursera//Applied Data Science Capstone (IBM)//kreis.geojson')
f=open(datapath)
json_data=json.load(f)

Here, two modifications will be made for the dataframe.
- A district (Kreis) will be assigned to each restaurant based on the coordinate of the restaurant. If the coordinate of the restaurant is out of all districts of Zurich, the corresponding data will be discarded. 
- For some reasons, a few Chinese restaurants are also included in the data frame. They will be omitted from the data frame. 

In [11]:
#Assign district (Kreis)
num_omit=1
for j in np.linspace(0,num_restaurant-1,num_restaurant):
    lat=Restaurant['lat'][j]
    lon=Restaurant['lng'][j]

    Assignment_Kreis = belong(json_data, lat, lon)
#    Restaurant_new['Kreis'][j]=Assignment_Kreis
    
    if Assignment_Kreis==None:
        if num_omit==1:
            omitrows=[j]
            num_omit=2
        else:
            omitrows.append(j)
    else:
        Assignment_Kreis=Assignment_Kreis.strip('Kreis ')

        Restaurant['Kreis'][j]=int(Assignment_Kreis)

    if Restaurant['categories'][j]=='Chinese Restaurant':
        if num_omit==1:
            omitrows=[j]
            num_omit=2
        else:
            omitrows.append(j)
            

# Omit rows with "None" and/or "Chinese restaurant"
omitrows=[int(nn) for nn in omitrows]
Restaurant.drop(omitrows, axis=0,inplace=True)

#sort data based on the Kreis
Restaurant.sort_values('Kreis',inplace=True)

#reset index
Restaurant.reset_index(inplace=True,drop=True)

Restaurant

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,name,categories,Kreis,lat,lng
0,Sora Sushi,Sushi Restaurant,1,47.377623,8.539133
1,Negishi Sushi Bar,Sushi Restaurant,1,47.371517,8.535471
2,Yooji's,Sushi Restaurant,1,47.367173,8.546464
3,O'Tabe,Japanese Restaurant,1,47.368881,8.538315
4,Sushishop,Sushi Restaurant,1,47.372857,8.535131
5,Yooji's sushi deli,Sushi Restaurant,1,47.378033,8.538804
6,Barfüsser,Sushi Restaurant,1,47.373601,8.544591
7,Yooji's,Japanese Restaurant,1,47.376387,8.539659
8,Nippon Sushi Bar,Japanese Restaurant,1,47.374152,8.536871
9,Negishi Sushi Bar,Japanese Restaurant,1,47.370853,8.543192


In [12]:
print('{} restaurants in Zurich were found by Foursquare.'.format(Restaurant.shape[0]))

42 restaurants in Zurich were found by Foursquare.


The number of Japanese restaurants is summarized for each district.

In [13]:
df_kreis_restaurant=pd.DataFrame({ 'Kreis' : pd.Series(range(0,num_kreis+1,1),dtype='int'),
                                  'Number of restaurants' : 0})

for j1 in range(0,num_kreis+1,1):
    if (j1 in Restaurant['Kreis'].value_counts().index)==True:
        df_kreis_restaurant['Number of restaurants'][j1]=Restaurant['Kreis'].value_counts()[j1]

df_kreis_restaurant.drop([0], axis=0, inplace=True)

#    Restaurant['Kreis'].value_counts().index[j]=int(Restaurant['Kreis'].value_counts().index[j])
#    print(a)

df_kreis_restaurant

Unnamed: 0,Kreis,Number of restaurants
1,1,12
2,2,3
3,3,3
4,4,7
5,5,6
6,6,1
7,7,0
8,8,4
9,9,2
10,10,1
