In [6]:
from foursquare_credentials import CLIENT_ID, CLIENT_SECRET, LIMIT, VERSION
import math, requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
import pandas as pd

I used function to take into the account size of the regions, it calculates radius that will be used with four square API. Radius depends on the area of the neighbourhood  

In [7]:
def get_circe_radius(area):
    '''Accepts area in square kilometers and gives radius in meters as an output'''
    return int(math.sqrt(area/math.pi) * 1000)

In [8]:
def get_category_type(row):
    '''function that extracts the category of the venue'''
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

Function retrieves all venues for each neighbourhood and saves it into the pandas data frame

In [13]:
def get_nearby_venues(names, latitudes, longitudes, areas):
    
    venues_list=[]
    for name, lat, lng, area in zip(names, latitudes, longitudes, areas):
        print(name)
        radius = get_circe_radius(area) * 0.8
        # create the API request URL
        url = f'https://api.foursquare.com/v2/venues/explore?&client_id={CLIENT_ID}&client_secret={CLIENT_SECRET}&v={VERSION}&ll={lat},{lng}&radius={radius}&limit={LIMIT}'
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

I use saved region info from the webscraping part and I use it to get info from four square. After getting all the neessary information I save it to the disk to reuse it without need of calling the API again.

In [12]:
berlin_regions = pd.read_csv('berlin_regions.csv', sep=';')
berlin_regions.head()

Unnamed: 0.1,Unnamed: 0,Postal Code,City,Administrative Region,Population,Area km²,Population density per km²,Median Age,Neighborhoods,Latitude,Longitude
0,0,10115,Berlin,Berlin,16678,2.421,6888.89,43.0,Mitte,52.533707,13.387224
1,1,10117,Berlin,Berlin,24223,3.321,7293.89,43.0,Mitte,52.518746,13.390193
2,2,10119,Berlin,Berlin,7408,0.857,8644.11,43.0,"Bezirk Pankow, Mitte",52.532666,13.407149
3,3,10178,Berlin,Berlin,14069,1.872,7515.49,43.0,Mitte,52.523474,13.412203
4,4,10179,Berlin,Berlin,15897,2.183,7282.18,43.0,"Luisenstadt, Mitte",52.514591,13.419699


In [18]:
berlin_venues = get_nearby_venues(
    names=berlin_regions['Postal Code'],
    latitudes=berlin_regions['Latitude'],
    longitudes=berlin_regions['Longitude'],
    areas=berlin_regions['Area km²']
)

10115
10117
10119
10178
10179
10243
10245
10247
10249
10315
10317
10318
10319
10365
10367
10369
10405
10407
10409
10435
10437
10439
10551
10553
10555
10557
10559
10585
10587
10589
10623
10625
10627
10629
10707
10709
10711
10713
10715
10717
10719
10777
10779
10781
10783
10785
10787
10789
10823
10825
10827
10829
10961
10963
10965
10967
10969
10997
10999
12043
12045
12047
12049
12051
12053
12055
12057
12059
12099
12101
12103
12105
12107
12109
12157
12159
12161
12163
12165
12167
12169
12203
12205
12207
12209
12247
12249
12277
12279
12305
12307
12309
12347
12349
12351
12353
12355
12357
12359
12435
12437
12439
12459
12487
12489
12524
12526
12527
12529
12555
12557
12559
12587
12589
12619
12621
12623
12627
12629
12679
12681
12683
12685
12687
12689
13051
13053
13055
13057
13059
13086
13088
13089
13125
13127
13129
13156
13158
13159
13187
13189
13347
13349
13351
13353
13355
13357
13359
13403
13405
13407
13409
13435
13437
13439
13465
13467
13469
13503
13505
13507
13509
13581
13583
13585
13587
1358

In [19]:
berlin_venues.shape

(5531, 7)

In [22]:
print('There are {} uniques categories.'.format(len(berlin_venues['Venue Category'].unique())))

There are 387 uniques categories.


In [20]:
berlin_venues.to_csv('berlin_venues.csv', sep=';')

In [23]:
amsterdam_regions = pd.read_csv('amsterdam_regions.csv', sep=';')
amsterdam_regions.head()

Unnamed: 0.1,Unnamed: 0,Postal Code,City,Administrative Region,Population,Area km²,Population density per km²,Median Age,Neighborhoods,Latitude,Longitude
0,0,1011,Amsterdam,North Holland,6606,1.032,6401.16,36.3,"Amsterdam-Centrum, Stadsdeel Centrum",52.371124,4.903752
1,1,1012,Amsterdam,North Holland,7067,1.207,5855.01,36.3,"Amsterdam-Centrum, Centrum, De Wallen, Stadsde...",52.373179,4.89491
2,2,1013,Amsterdam,North Holland,26792,6.3,4252.7,36.3,"Amsterdam-Centrum, Amsterdam-West, Haarlemmerb...",52.387662,4.883396
3,3,1014,Amsterdam,North Holland,15056,2.699,5578.36,36.3,"Amsterdam-West, Bedrijventerrein Sloterdijk, S...",52.393066,4.853503
4,4,1015,Amsterdam,North Holland,5926,0.776,7636.6,36.3,"Amsterdam-Centrum, Grachtengordel-West, Jordaan",52.378205,4.882973


In [24]:
amsterdam_venues = get_nearby_venues(
    names=amsterdam_regions['Postal Code'],
    latitudes=amsterdam_regions['Latitude'],
    longitudes=amsterdam_regions['Longitude'],
    areas=amsterdam_regions['Area km²']
)

1011
1012
1013
1014
1015
1016
1017
1018
1019
1021
1022
1023
1024
1025
1026
1027
1028
1031
1032
1033
1034
1035
1036
1041
1042
1043
1044
1045
1046
1047
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1071
1072
1073
1074
1075
1076
1077
1078
1079
1081
1082
1083
1086
1087
1091
1092
1093
1094
1095
1096
1097
1098
1101
1102
1103
1104
1105
1106
1107
1108
1109
1114


In [25]:
amsterdam_venues.shape

(2966, 7)

In [26]:
print('There are {} uniques categories.'.format(len(amsterdam_venues['Venue Category'].unique())))

There are 304 uniques categories.


In [27]:
amsterdam_venues.to_csv('amsterdam_venues.csv', sep=';')