### Data
The data I will be using for this project are listed below:

-Table containing list of London areas  
-London crime rates by borough  
-London unemployment rates by borough  
-Average house prices in London by borough  
-Foursquare API data

I will describe the content of each data, display examples, its sources and how it will be used.

In [1]:
import pandas as pd
import numpy as np
import json
from urllib import request
!pip install bs4
from bs4 import BeautifulSoup
!pip install geocoder
import geocoder
!pip install OSGridConverter
from OSGridConverter import grid2latlong
import requests
import json
from pandas.io.json import json_normalize

Collecting bs4
  Downloading https://files.pythonhosted.org/packages/10/ed/7e8b97591f6f456174139ec089c769f89a94a1a4025fe967691de971f314/bs4-0.0.1.tar.gz
Collecting beautifulsoup4 (from bs4)
[?25l  Downloading https://files.pythonhosted.org/packages/66/25/ff030e2437265616a1e9b25ccc864e0371a0bc3adb7c5a404fd661c6f4f6/beautifulsoup4-4.9.1-py3-none-any.whl (115kB)
[K     |████████████████████████████████| 122kB 23.4MB/s eta 0:00:01
[?25hCollecting soupsieve>1.2 (from beautifulsoup4->bs4)
  Downloading https://files.pythonhosted.org/packages/6f/8f/457f4a5390eeae1cc3aeab89deb7724c965be841ffca6cfca9197482e470/soupsieve-2.0.1-py3-none-any.whl
Building wheels for collected packages: bs4
  Building wheel for bs4 (setup.py) ... [?25ldone
[?25h  Stored in directory: /home/jupyterlab/.cache/pip/wheels/a0/b0/b2/4f80b9456b87abedbc0bf2d52235414c3467d8889be38dd472
Successfully built bs4
Installing collected packages: soupsieve, beautifulsoup4, bs4
Successfully installed beautifulsoup4-4.9.1 bs4-0.0

### 1.Table containing list of London areas
This table will be scraped from the following Wikipedia page: https://en.wikipedia.org/wiki/List_of_areas_of_London  
This will be used as it is a useful table sorting different areas in London by borough.  
Example dataframe that will be used is shown below

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_areas_of_London'
response = request.urlopen(url)
soup = BeautifulSoup(response)

response.close()
table = soup.find('table',attrs={'class':'wikitable sortable'})
table_data = [[cell.text for cell in row("td")]
                         for row in table("tr")]
del table_data[0]

In [3]:
londonarea = pd.DataFrame(table_data)
londonarea.head()

Unnamed: 0,0,1,2,3,4,5
0,Abbey Wood,"Bexley, Greenwich [7]",LONDON,SE2,20,TQ465785\n
1,Acton,"Ealing, Hammersmith and Fulham[8]",LONDON,"W3, W4",20,TQ205805\n
2,Addington,Croydon[8],CROYDON,CR0,20,TQ375645\n
3,Addiscombe,Croydon[8],CROYDON,CR0,20,TQ345665\n
4,Albany Park,Bexley,"BEXLEY, SIDCUP","DA5, DA14",20,TQ478728\n


In [4]:
londonarea

Unnamed: 0,0,1,2,3,4,5
0,Abbey Wood,"Bexley, Greenwich [7]",LONDON,SE2,020,TQ465785\n
1,Acton,"Ealing, Hammersmith and Fulham[8]",LONDON,"W3, W4",020,TQ205805\n
2,Addington,Croydon[8],CROYDON,CR0,020,TQ375645\n
3,Addiscombe,Croydon[8],CROYDON,CR0,020,TQ345665\n
4,Albany Park,Bexley,"BEXLEY, SIDCUP","DA5, DA14",020,TQ478728\n
...,...,...,...,...,...,...
528,Woolwich,Greenwich,LONDON,SE18,020,TQ435795\n
529,Worcester Park,"Sutton, Kingston upon Thames",WORCESTER PARK,KT4,020,TQ225655\n
530,Wormwood Scrubs,Hammersmith and Fulham,LONDON,W12,020,TQ225815\n
531,Yeading,Hillingdon,HAYES,UB4,020,TQ115825\n


In [5]:
londonarea.drop(columns=[2,3,4],inplace=True)
londonarea.columns = ['Location','Borough','OSgridref']
for index,row in londonarea.iterrows():
    try:
        bst = row['Borough'].index('[')
        londonarea.iloc[index,1] = (londonarea.iloc[index,1])[0:bst]
    except:   
        londonarea.iloc[index,1] = (londonarea.iloc[index,1])
        
    ends = len(row['OSgridref']) -1
    londonarea.iloc[index,2] = londonarea.iloc[index,2][0:ends]

londonarea['Borough'] = londonarea['Borough'].replace(['Kensington and ChelseaHammersmith and Fulham'],'Kensington and Chelsea, Hammersmith and Fulham')
londonarea['Borough'] = londonarea['Borough'].replace(['Camden and Islington'],['Camden & Islington'])
londonarea['Borough'] = londonarea['Borough'].replace(['Haringey and Barnet'],['Haringey & Barnet'])

In [6]:
temp = pd.DataFrame(londonarea['Borough'].str.split(', | & ',expand=True).stack())
temp[0].unique()

array(['Bexley', ' Greenwich ', 'Ealing', 'Hammersmith and Fulham',
       'Croydon', 'Redbridge', 'City', 'Westminster', 'Brent', 'Bromley',
       'Islington', 'Havering', 'Barnet', 'Enfield', 'Wandsworth',
       'Southwark', 'Barking and Dagenham', 'Richmond upon Thames',
       'Newham', 'Sutton', 'Lewisham', 'Harrow', 'Camden',
       'Kingston upon Thames', 'Tower Hamlets', 'Greenwich', 'Haringey',
       'Hounslow', 'Lambeth', 'Kensington and Chelsea', 'Waltham Forest',
       'Merton', 'Hillingdon', 'Hackney', 'Dartford'], dtype=object)

In [7]:
temp.reset_index(level=1,drop=True,inplace=True)
temp

Unnamed: 0,0
0,Bexley
0,Greenwich
1,Ealing
1,Hammersmith and Fulham
2,Croydon
...,...
529,Sutton
529,Kingston upon Thames
530,Hammersmith and Fulham
531,Hillingdon


In [8]:
temp[0].unique()
temp.columns= ['Boroughs']

In [9]:
temp['Boroughs'] = temp['Boroughs'].replace([' Greenwich '],['Greenwich'])

In [10]:
print(temp['Boroughs'].unique())
print(np.shape(temp['Boroughs'].unique()))

['Bexley' 'Greenwich' 'Ealing' 'Hammersmith and Fulham' 'Croydon'
 'Redbridge' 'City' 'Westminster' 'Brent' 'Bromley' 'Islington' 'Havering'
 'Barnet' 'Enfield' 'Wandsworth' 'Southwark' 'Barking and Dagenham'
 'Richmond upon Thames' 'Newham' 'Sutton' 'Lewisham' 'Harrow' 'Camden'
 'Kingston upon Thames' 'Tower Hamlets' 'Haringey' 'Hounslow' 'Lambeth'
 'Kensington and Chelsea' 'Waltham Forest' 'Merton' 'Hillingdon' 'Hackney'
 'Dartford']
(34,)


In [11]:
londonarea = londonarea.join(temp)
londonarea

Unnamed: 0,Location,Borough,OSgridref,Boroughs
0,Abbey Wood,"Bexley, Greenwich",TQ465785,Bexley
0,Abbey Wood,"Bexley, Greenwich",TQ465785,Greenwich
1,Acton,"Ealing, Hammersmith and Fulham",TQ205805,Ealing
1,Acton,"Ealing, Hammersmith and Fulham",TQ205805,Hammersmith and Fulham
2,Addington,Croydon,TQ375645,Croydon
...,...,...,...,...
529,Worcester Park,"Sutton, Kingston upon Thames",TQ225655,Sutton
529,Worcester Park,"Sutton, Kingston upon Thames",TQ225655,Kingston upon Thames
530,Wormwood Scrubs,Hammersmith and Fulham,TQ225815,Hammersmith and Fulham
531,Yeading,Hillingdon,TQ115825,Hillingdon


In [12]:
londonarea[londonarea['Borough']=='Kensington and Chelsea, Hammersmith and Fulham']

Unnamed: 0,Location,Borough,OSgridref,Boroughs
68,Brompton,"Kensington and Chelsea, Hammersmith and Fulham",TQ275795,Kensington and Chelsea
68,Brompton,"Kensington and Chelsea, Hammersmith and Fulham",TQ275795,Hammersmith and Fulham


In [13]:
londonarea.drop(columns=['Borough'],inplace=True)
londonarea

Unnamed: 0,Location,OSgridref,Boroughs
0,Abbey Wood,TQ465785,Bexley
0,Abbey Wood,TQ465785,Greenwich
1,Acton,TQ205805,Ealing
1,Acton,TQ205805,Hammersmith and Fulham
2,Addington,TQ375645,Croydon
...,...,...,...
529,Worcester Park,TQ225655,Sutton
529,Worcester Park,TQ225655,Kingston upon Thames
530,Wormwood Scrubs,TQ225815,Hammersmith and Fulham
531,Yeading,TQ115825,Hillingdon


In [14]:
londonarea = londonarea[londonarea.OSgridref != '']
londonarea.reset_index(drop=True,inplace=True)

In [15]:
londonarea

Unnamed: 0,Location,OSgridref,Boroughs
0,Abbey Wood,TQ465785,Bexley
1,Abbey Wood,TQ465785,Greenwich
2,Acton,TQ205805,Ealing
3,Acton,TQ205805,Hammersmith and Fulham
4,Addington,TQ375645,Croydon
...,...,...,...
559,Worcester Park,TQ225655,Sutton
560,Worcester Park,TQ225655,Kingston upon Thames
561,Wormwood Scrubs,TQ225815,Hammersmith and Fulham
562,Yeading,TQ115825,Hillingdon


In [16]:
londonarea = londonarea[londonarea['Boroughs'] != 'Dartford']
londonarea.reset_index(inplace=True,drop=True)

In [17]:
londonarea

Unnamed: 0,Location,OSgridref,Boroughs
0,Abbey Wood,TQ465785,Bexley
1,Abbey Wood,TQ465785,Greenwich
2,Acton,TQ205805,Ealing
3,Acton,TQ205805,Hammersmith and Fulham
4,Addington,TQ375645,Croydon
...,...,...,...
558,Worcester Park,TQ225655,Sutton
559,Worcester Park,TQ225655,Kingston upon Thames
560,Wormwood Scrubs,TQ225815,Hammersmith and Fulham
561,Yeading,TQ115825,Hillingdon


In [18]:

l=grid2latlong('TQ465785')
latitude = l.latitude
longitude = l.longitude


In [19]:
latitude

51.48648353740194

In [20]:
longitude
eg = [l.latitude,l.longitude]
eg

[51.48648353740194, 0.10931788814329141]

In [21]:
from geocoder import geocodefarm
test = geocoder.geocodefarm(eg,method='reverse')
test

<[OK] Geocodefarm - Reverse [82 Rochdale Road, Abbey Wood, SE2 0XB, United Kingdom]>

In [22]:
lat = np.zeros(563)
lng = np.zeros(563)
for index,row in londonarea.iterrows():
    lat_long = None
    gridref = row['OSgridref']
    while(lat_long==None):
        l = None
        l = grid2latlong(gridref)
        lat_long = [l.latitude, l.longitude]
    lat[int(index)] = lat_long[0]
    lng[int(index)] = lat_long[1]

In [23]:
londonarea['latitude'] = lat
londonarea['longitude'] = lng

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [24]:
londonarea
londonarea.to_csv('indepthboroughs.csv')

In [25]:
eg = [londonarea.iloc[4,3],londonarea.iloc[4,4]]
print(eg)
test = geocoder.geocodefarm(eg,method='reverse')
test

[51.362934332501695, -0.025779934554391835]


<[OK] Geocodefarm - Reverse [Gate House Bridle Way, Croydon, CR0 5AH, United Kingdom]>

In [26]:
url = 'https://www.finder.com/uk/london-crime-statistics'
response = request.urlopen(url)
soup = BeautifulSoup(response)
response.close()
#table = soup.findAll('table')
table = soup.find('table',attrs={'class':'luna-table js-tablesorter'})
table_data = [[cell.text for cell in row("td")]
                         for row in table("tr")]
del table_data[0]

In [27]:
table_data

[['Westminster', '73,569'],
 ['Wandsworth', '27,240'],
 ['Waltham Forest', '25,930/td>'],
 ['Tower Hamlets', '35,448'],
 ['Sutton', '14,857'],
 ['Southwark', '38,778'],
 ['Richmond upon Thames', '13,406'],
 ['Redbridge', '25,379'],
 ['Newham', '37,090'],
 ['Merton', '15,162'],
 ['Lewisham', '30,498'],
 ['Lambeth', '36,197'],
 ['Kingston upon Thames', '13,225'],
 ['Kensington and Chelsea', '24,294'],
 ['Islington', '30,376'],
 ['Hounslow', '27,896'],
 ['Hillingdon', '28,583'],
 ['Havering', '19,180'],
 ['Harrow', '17,901'],
 ['Haringey', '32,802'],
 ['Hammersmith and Fulham', '23,359'],
 ['Hackney', '34,482'],
 ['Greenwich', '30,000'],
 ['Enfield', '32,071'],
 ['Ealing', '32,828'],
 ['Croydon', '36,779'],
 ['City', '8,268'],
 ['Camden', '38,520'],
 ['Bromley', '25,792'],
 ['Brent', '31,706'],
 ['Bexley', '18,308'],
 ['Barnet', '31,892'],
 ['Barking and Dagenham', '31,892']]

In [28]:
crimes = pd.DataFrame(table_data)
crimes

Unnamed: 0,0,1
0,Westminster,73569
1,Wandsworth,27240
2,Waltham Forest,"25,930/td>"
3,Tower Hamlets,35448
4,Sutton,14857
5,Southwark,38778
6,Richmond upon Thames,13406
7,Redbridge,25379
8,Newham,37090
9,Merton,15162


In [29]:
crimes.columns=['Boroughs','CrimeOccurence']

In [30]:
londonframe = pd.merge(londonarea, crimes, on=['Boroughs'])

In [31]:
londonframe

Unnamed: 0,Location,OSgridref,Boroughs,latitude,longitude,CrimeOccurence
0,Abbey Wood,TQ465785,Bexley,51.486484,0.109318,18308
1,Albany Park,TQ478728,Bexley,51.434929,0.125663,18308
2,Barnehurst,TQ505755,Bexley,51.458479,0.165667,18308
3,Barnes Cray,TQ525755,Bexley,51.457944,0.194464,18308
4,Belvedere,TQ495785,Bexley,51.485699,0.152539,18308
...,...,...,...,...,...,...
558,South Hackney,TQ355845,Hackney,51.543141,-0.046853,34482
559,Stamford Hill,TQ335875,Hackney,51.570576,-0.074562,34482
560,Stoke Newington,TQ335865,Hackney,51.561590,-0.074942,34482
561,Upper Clapton,TQ345875,Hackney,51.570339,-0.060129,34482


In [32]:
print(londonframe.isnull().any())

Location          False
OSgridref         False
Boroughs          False
latitude          False
longitude         False
CrimeOccurence    False
dtype: bool


In [33]:
londonframe[londonframe['CrimeOccurence']=='']

Unnamed: 0,Location,OSgridref,Boroughs,latitude,longitude,CrimeOccurence


In [34]:
londonframe[londonframe['latitude']=='']

  res_values = method(rvalues)


Unnamed: 0,Location,OSgridref,Boroughs,latitude,longitude,CrimeOccurence


In [35]:
londonframe[londonframe['longitude']=='']

  res_values = method(rvalues)


Unnamed: 0,Location,OSgridref,Boroughs,latitude,longitude,CrimeOccurence


In [36]:
np.shape(londonarea['Boroughs'].unique())

(33,)

In [37]:
Filter = londonarea['Boroughs'].unique()

In [38]:
Filter = list(Filter)
Filter

['Bexley',
 'Greenwich',
 'Ealing',
 'Hammersmith and Fulham',
 'Croydon',
 'Redbridge',
 'City',
 'Westminster',
 'Brent',
 'Bromley',
 'Islington',
 'Havering',
 'Barnet',
 'Enfield',
 'Wandsworth',
 'Southwark',
 'Barking and Dagenham',
 'Richmond upon Thames',
 'Newham',
 'Sutton',
 'Lewisham',
 'Harrow',
 'Camden',
 'Kingston upon Thames',
 'Tower Hamlets',
 'Haringey',
 'Hounslow',
 'Lambeth',
 'Kensington and Chelsea',
 'Waltham Forest',
 'Merton',
 'Hillingdon',
 'Hackney']

In [39]:
url = 'https://www.gmblondon.org.uk/news/16-boroughs-in-london-have-employment-rate-below-uk-average.html#:~:text=In%20Sutton%2C%2082.4%25%20of%20the,Richmond%20upon%20Thames%20with%2078.9%25.'
response = request.urlopen(url)
soup = BeautifulSoup(response)
response.close()

In [40]:
table = soup.find('table')
table_data = [[cell.text for cell in row("td")]
                         for row in table("tr")]

In [41]:
employment = pd.DataFrame(table_data)
employment

Unnamed: 0,0,1,2,3
0,\n\t\t\tAnnual Population Survey – April 2016-...,,,
1,,,\n\t\t\tEmployment rate - aged 16-64\n\t\t\t,
2,,,\n\t\t\tnumber\n\t\t\t,\n\t\t\t%\n\t\t\t
3,,\n\t\t\tEngland\n\t\t\t,"\n\t\t\t25,725,800\n\t\t\t",\n\t\t\t74.4\n\t\t\t
4,,\n\t\t\tGreat Britain\n\t\t\t,"\n\t\t\t29,589,700\n\t\t\t",\n\t\t\t74.2\n\t\t\t
5,,\n\t\t\tUnited Kingdom\n\t\t\t,"\n\t\t\t30,395,200\n\t\t\t",\n\t\t\t74.0\n\t\t\t
6,,,,
7,,\n\t\t\tLondon\n\t\t\t,"\n\t\t\t4,428,500\n\t\t\t",\n\t\t\t73.8\n\t\t\t
8,\n\t\t\trank\n\t\t\t,,,
9,\n\t\t\t1\n\t\t\t,\n\t\t\tSutton\n\t\t\t,"\n\t\t\t106,800\n\t\t\t",\n\t\t\t82.4\n\t\t\t


In [42]:
employment.drop(index=[0,1,2,3,4,5,6,7,8],inplace=True)

In [43]:
employment.reset_index(drop=True,inplace=True)
employment.drop(columns=[0],inplace=True)
employment.columns = ['Boroughs','number','percentage']

In [44]:
temp = pd.DataFrame(employment['Boroughs'].str.split('\n\t\t\t',expand=True))
templ = pd.DataFrame(employment['percentage'].str.split('\n\t\t\t',expand=True))

In [45]:
temp = temp[temp.columns[1]]
templ = templ[templ.columns[1]]
employment = pd.DataFrame([temp,templ])

In [46]:
employment

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,23,24,25,26,27,28,29,30,31,32
1,Sutton,City of London,Lambeth,Wandsworth,Bromley,Richmond upon Thames,Havering,Croydon,Merton,Lewisham,...,Redbridge,Hounslow,Camden,Enfield,Newham,Westminster,Brent,Barking and Dagenham,Tower Hamlets,Kensington and Chelsea
1,82.4,82.1,81.9,80.3,79.0,78.9,78.3,77.8,77.8,77.4,...,71.6,71.5,70.2,70.1,68.7,68.0,67.5,65.8,65.3,64.5


In [47]:
employment = employment.transpose()
employment

Unnamed: 0,1,1.1
0,Sutton,82.4
1,City of London,82.1
2,Lambeth,81.9
3,Wandsworth,80.3
4,Bromley,79.0
5,Richmond upon Thames,78.9
6,Havering,78.3
7,Croydon,77.8
8,Merton,77.8
9,Lewisham,77.4


In [48]:
employment.columns=['Boroughs','employment rate']
employment['Boroughs'] = employment['Boroughs'].replace(['City of London'],['City'])
employment

Unnamed: 0,Boroughs,employment rate
0,Sutton,82.4
1,City,82.1
2,Lambeth,81.9
3,Wandsworth,80.3
4,Bromley,79.0
5,Richmond upon Thames,78.9
6,Havering,78.3
7,Croydon,77.8
8,Merton,77.8
9,Lewisham,77.4


In [49]:
londonframe = pd.merge(londonframe,employment,on=['Boroughs'])

In [50]:
londonframe

Unnamed: 0,Location,OSgridref,Boroughs,latitude,longitude,CrimeOccurence,employment rate
0,Abbey Wood,TQ465785,Bexley,51.486484,0.109318,18308,76.0
1,Albany Park,TQ478728,Bexley,51.434929,0.125663,18308,76.0
2,Barnehurst,TQ505755,Bexley,51.458479,0.165667,18308,76.0
3,Barnes Cray,TQ525755,Bexley,51.457944,0.194464,18308,76.0
4,Belvedere,TQ495785,Bexley,51.485699,0.152539,18308,76.0
...,...,...,...,...,...,...,...
558,South Hackney,TQ355845,Hackney,51.543141,-0.046853,34482,72.8
559,Stamford Hill,TQ335875,Hackney,51.570576,-0.074562,34482,72.8
560,Stoke Newington,TQ335865,Hackney,51.561590,-0.074942,34482,72.8
561,Upper Clapton,TQ345875,Hackney,51.570339,-0.060129,34482,72.8


In [51]:
population = pd.read_csv('housing-density-borough.csv')
population

Unnamed: 0,Code,Name,Year,Source,Population,Inland_Area _Hectares,Total_Area_Hectares,Population_per_hectare,Square_Kilometres,Population_per_square_kilometre
0,E09000001,City of London,1999,ONS MYE,6581,290.4,314.9,22.7,2.9,2266.2
1,E09000001,City of London,2000,ONS MYE,7014,290.4,314.9,24.2,2.9,2415.3
2,E09000001,City of London,2001,ONS MYE,7359,290.4,314.9,25.3,2.9,2534.1
3,E09000001,City of London,2002,ONS MYE,7280,290.4,314.9,25.1,2.9,2506.9
4,E09000001,City of London,2003,ONS MYE,7115,290.4,314.9,24.5,2.9,2450.1
...,...,...,...,...,...,...,...,...,...,...
1867,E13000002,Outer London,2046,GLA Population Projections,6573194,125423.6,126675.6,52.4,1254.2,5240.8
1868,E13000002,Outer London,2047,GLA Population Projections,6598789,125423.6,126675.6,52.6,1254.2,5261.2
1869,E13000002,Outer London,2048,GLA Population Projections,6622921,125423.6,126675.6,52.8,1254.2,5280.4
1870,E13000002,Outer London,2049,GLA Population Projections,6647527,125423.6,126675.6,53.0,1254.2,5300.1


In [52]:
pop = population.filter(items=['Name','Population','Year'])

In [53]:
pop.columns = ['Boroughs','Population','Year']
pop['Boroughs'] = pop['Boroughs'].replace(['City of London'],['City'])
pop

Unnamed: 0,Boroughs,Population,Year
0,City,6581,1999
1,City,7014,2000
2,City,7359,2001
3,City,7280,2002
4,City,7115,2003
...,...,...,...
1867,Outer London,6573194,2046
1868,Outer London,6598789,2047
1869,Outer London,6622921,2048
1870,Outer London,6647527,2049


In [54]:
pop = pop[pop['Year']==2019].reset_index(drop=True)

In [55]:
pop.drop(index=[33,34,35],inplace=True)

In [56]:
pop

Unnamed: 0,Boroughs,Population,Year
0,City,7953,2019
1,Barking and Dagenham,214858,2019
2,Barnet,402363,2019
3,Bexley,252885,2019
4,Brent,340710,2019
5,Bromley,334292,2019
6,Camden,255526,2019
7,Croydon,396548,2019
8,Ealing,354184,2019
9,Enfield,339480,2019


In [57]:
lf = 0

In [58]:
pop.sort_values('Boroughs',inplace=True)

In [59]:
pop

Unnamed: 0,Boroughs,Population,Year
1,Barking and Dagenham,214858,2019
2,Barnet,402363,2019
3,Bexley,252885,2019
4,Brent,340710,2019
5,Bromley,334292,2019
6,Camden,255526,2019
0,City,7953,2019
7,Croydon,396548,2019
8,Ealing,354184,2019
9,Enfield,339480,2019


In [60]:
lf = pd.merge(londonframe,pop,on=['Boroughs'])

In [61]:
lf

Unnamed: 0,Location,OSgridref,Boroughs,latitude,longitude,CrimeOccurence,employment rate,Population,Year
0,Abbey Wood,TQ465785,Bexley,51.486484,0.109318,18308,76.0,252885,2019
1,Albany Park,TQ478728,Bexley,51.434929,0.125663,18308,76.0,252885,2019
2,Barnehurst,TQ505755,Bexley,51.458479,0.165667,18308,76.0,252885,2019
3,Barnes Cray,TQ525755,Bexley,51.457944,0.194464,18308,76.0,252885,2019
4,Belvedere,TQ495785,Bexley,51.485699,0.152539,18308,76.0,252885,2019
...,...,...,...,...,...,...,...,...,...
558,South Hackney,TQ355845,Hackney,51.543141,-0.046853,34482,72.8,286425,2019
559,Stamford Hill,TQ335875,Hackney,51.570576,-0.074562,34482,72.8,286425,2019
560,Stoke Newington,TQ335865,Hackney,51.561590,-0.074942,34482,72.8,286425,2019
561,Upper Clapton,TQ345875,Hackney,51.570339,-0.060129,34482,72.8,286425,2019


In [62]:
np.shape(lf['Boroughs'].unique())

(33,)

In [63]:
for index,row in lf.iterrows():
    lf.iloc[index,5] = row['CrimeOccurence'].replace(',','').replace('/td>','')
lf

Unnamed: 0,Location,OSgridref,Boroughs,latitude,longitude,CrimeOccurence,employment rate,Population,Year
0,Abbey Wood,TQ465785,Bexley,51.486484,0.109318,18308,76.0,252885,2019
1,Albany Park,TQ478728,Bexley,51.434929,0.125663,18308,76.0,252885,2019
2,Barnehurst,TQ505755,Bexley,51.458479,0.165667,18308,76.0,252885,2019
3,Barnes Cray,TQ525755,Bexley,51.457944,0.194464,18308,76.0,252885,2019
4,Belvedere,TQ495785,Bexley,51.485699,0.152539,18308,76.0,252885,2019
...,...,...,...,...,...,...,...,...,...
558,South Hackney,TQ355845,Hackney,51.543141,-0.046853,34482,72.8,286425,2019
559,Stamford Hill,TQ335875,Hackney,51.570576,-0.074562,34482,72.8,286425,2019
560,Stoke Newington,TQ335865,Hackney,51.561590,-0.074942,34482,72.8,286425,2019
561,Upper Clapton,TQ345875,Hackney,51.570339,-0.060129,34482,72.8,286425,2019


In [64]:
lf['CrimeRate'] = (lf['CrimeOccurence'].astype('int64')/lf['Population'])*100

In [65]:
lf.drop(columns=['CrimeOccurence','Population','Year'],inplace=True)

In [66]:
lf.reset_index(drop=True,inplace=True)

In [67]:
lf

Unnamed: 0,Location,OSgridref,Boroughs,latitude,longitude,employment rate,CrimeRate
0,Abbey Wood,TQ465785,Bexley,51.486484,0.109318,76.0,7.239654
1,Albany Park,TQ478728,Bexley,51.434929,0.125663,76.0,7.239654
2,Barnehurst,TQ505755,Bexley,51.458479,0.165667,76.0,7.239654
3,Barnes Cray,TQ525755,Bexley,51.457944,0.194464,76.0,7.239654
4,Belvedere,TQ495785,Bexley,51.485699,0.152539,76.0,7.239654
...,...,...,...,...,...,...,...
558,South Hackney,TQ355845,Hackney,51.543141,-0.046853,72.8,12.038754
559,Stamford Hill,TQ335875,Hackney,51.570576,-0.074562,72.8,12.038754
560,Stoke Newington,TQ335865,Hackney,51.561590,-0.074942,72.8,12.038754
561,Upper Clapton,TQ345875,Hackney,51.570339,-0.060129,72.8,12.038754


In [68]:
price = pd.read_csv('land-registry-house-prices-borough.csv')

In [69]:
price

Unnamed: 0,Code,Area,Year ending Dec 1995,Year ending Mar 1996,Year ending Jun 1996,Year ending Sep 1996,Year ending Dec 1996,Year ending Mar 1997,Year ending Jun 1997,Year ending Sep 1997,...,Year ending Sep 2015,Year ending Dec 2015,Year ending Mar 2016,Year ending Jun 2016,Year ending Sep 2016,Year ending Dec 2016,Year ending Mar 2017,Year ending Jun 2017,Year ending Sep 2017,Year ending Dec 2017
0,,,,,,,,,,,...,,,,,,,,,,
1,E09000001,City of London,105000.0,112500.0,112375.0,121500.0,124250.0,126503.0,130000.0,130000.0,...,823750.0,800000.0,795000.0,799750.0,800000.0,831400.0,831400.0,812500.0,835000.0,835000.0
2,E09000002,Barking and Dagenham,49000.0,49995.0,50000.0,49995.0,49995.0,49950.0,50000.0,52000.0,...,235000.0,245500.0,255000.0,265000.0,277000.0,290000.0,300000.0,302000.0,302000.0,300000.0
3,E09000003,Barnet,85125.0,85000.0,85000.0,85500.0,87000.0,89500.0,91950.0,95000.0,...,440000.0,450000.0,455000.0,460000.0,470000.0,476500.0,482951.0,500000.0,512000.0,533500.0
4,E09000004,Bexley,62000.0,62000.0,63125.0,63500.0,65000.0,67000.0,68500.0,70000.0,...,271000.0,285000.0,295000.0,310000.0,322000.0,328000.0,330000.0,332000.0,336000.0,342500.0
5,E09000005,Brent,68000.0,69000.0,70000.0,71000.0,73000.0,75000.0,76000.0,80000.0,...,400000.0,415000.0,425000.0,425000.0,450000.0,445000.0,450000.0,465150.0,475000.0,499950.0
6,E09000006,Bromley,76625.0,77000.0,77000.0,79500.0,80000.0,81000.0,82500.0,84500.0,...,365000.0,375000.0,385000.0,399000.0,408000.0,420000.0,425000.0,425000.0,435000.0,440000.0
7,E09000007,Camden,114000.0,112000.0,117000.0,121000.0,127500.0,132950.0,135000.0,140000.0,...,680750.0,705000.0,740000.0,748550.0,737500.0,735000.0,717500.0,750000.0,768750.0,760000.0
8,E09000008,Croydon,60000.0,60000.0,61000.0,63000.0,64500.0,65850.0,67000.0,69000.0,...,295000.0,306000.0,319950.0,325000.0,331000.0,338000.0,345000.0,353595.0,360523.0,365000.0
9,E09000009,Ealing,75000.0,75000.0,76000.0,78000.0,80000.0,81500.0,83500.0,87000.0,...,415000.0,432000.0,445000.0,451500.0,460000.0,465000.0,465050.0,475000.0,488000.0,485000.0


In [70]:
houseprice = price.filter(items=['Area','Year ending Dec 2017'])

In [71]:
houseprice['Area'] = houseprice['Area'].replace(['City of London'],['City'])

In [72]:
houseprice = houseprice[houseprice['Area'].isin(Filter)]

In [73]:
houseprice

Unnamed: 0,Area,Year ending Dec 2017
1,City,835000
2,Barking and Dagenham,300000
3,Barnet,533500
4,Bexley,342500
5,Brent,499950
6,Bromley,440000
7,Camden,760000
8,Croydon,365000
9,Ealing,485000
10,Enfield,402500


In [74]:
houseprice.reset_index(drop=True,inplace=True)

In [75]:
houseprice.columns=['Boroughs','Avg_house_price']
lf = pd.merge(lf,houseprice,on=['Boroughs'])

In [76]:
np.shape(lf['Boroughs'].unique())

(33,)

In [77]:
url = 'https://en.wikipedia.org/wiki/List_of_London_boroughs'
response = request.urlopen(url)
soup = BeautifulSoup(response)
response.close()

In [78]:
table = soup.findAll('table',attrs={'class':'wikitable sortable'})[0]
table_data = [[cell.text for cell in row("td")]
                         for row in table("tr")]
table2= soup.findAll('table',attrs={'class':'wikitable sortable'})[1]
table_data2 = [[cell.text for cell in row("td")]
                         for row in table2("tr")]

In [79]:
del table_data[0]
del table_data2[0]

In [80]:
binfo = pd.DataFrame(table_data)
cinfo = pd.DataFrame(table_data2)
binfo

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,Barking and Dagenham [note 1]\n,\n,\n,Barking and Dagenham London Borough Council\n,Labour\n,"Town Hall, 1 Town Square\n",13.93\n,"194,352\n",51°33′39″N 0°09′21″E﻿ / ﻿51.5607°N 0.1557°E﻿ /...,25\n
1,Barnet\n,\n,\n,Barnet London Borough Council\n,Conservative\n,"Barnet House, 2 Bristol Avenue, Colindale\n",33.49\n,"369,088\n",51°37′31″N 0°09′06″W﻿ / ﻿51.6252°N 0.1517°W﻿ /...,31\n
2,Bexley\n,\n,\n,Bexley London Borough Council\n,Conservative\n,"Civic Offices, 2 Watling Street\n",23.38\n,"236,687\n",51°27′18″N 0°09′02″E﻿ / ﻿51.4549°N 0.1505°E﻿ /...,23\n
3,Brent\n,\n,\n,Brent London Borough Council\n,Labour\n,"Brent Civic Centre, Engineers Way\n",16.70\n,"317,264\n",51°33′32″N 0°16′54″W﻿ / ﻿51.5588°N 0.2817°W﻿ /...,12\n
4,Bromley\n,\n,\n,Bromley London Borough Council\n,Conservative\n,"Civic Centre, Stockwell Close\n",57.97\n,"317,899\n",51°24′14″N 0°01′11″E﻿ / ﻿51.4039°N 0.0198°E﻿ /...,20\n
5,Camden\n,Y\n,\n,Camden London Borough Council\n,Labour\n,"Camden Town Hall, Judd Street\n",8.40\n,"229,719\n",51°31′44″N 0°07′32″W﻿ / ﻿51.5290°N 0.1255°W﻿ /...,11\n
6,Croydon\n,\n,\n,Croydon London Borough Council\n,Labour\n,"Bernard Weatherill House, Mint Walk\n",33.41\n,"372,752\n",51°22′17″N 0°05′52″W﻿ / ﻿51.3714°N 0.0977°W﻿ /...,19\n
7,Ealing\n,\n,\n,Ealing London Borough Council\n,Labour\n,"Perceval House, 14-16 Uxbridge Road\n",21.44\n,"342,494\n",51°30′47″N 0°18′32″W﻿ / ﻿51.5130°N 0.3089°W﻿ /...,13\n
8,Enfield\n,\n,\n,Enfield London Borough Council\n,Labour\n,"Civic Centre, Silver Street\n",31.74\n,"320,524\n",51°39′14″N 0°04′48″W﻿ / ﻿51.6538°N 0.0799°W﻿ /...,30\n
9,Greenwich [note 2]\n,Y [note 3]\n,Royal\n,Greenwich London Borough Council\n,Labour\n,"Woolwich Town Hall, Wellington Street\n",18.28\n,"264,008\n",51°29′21″N 0°03′53″E﻿ / ﻿51.4892°N 0.0648°E﻿ /...,22\n


In [81]:
binfo = binfo.filter(items=[0,6])
cinfo = cinfo.filter(items=[0,6])

In [82]:
binfo.columns = ['Boroughs','Area(sqmi)']
cinfo.columns = ['Boroughs','Area(sqmi)']

In [83]:
cinfo

Unnamed: 0,Boroughs,Area(sqmi)
0,City of London\n,1.12\n


In [84]:
binfo = binfo.append(cinfo)
binfo

Unnamed: 0,Boroughs,Area(sqmi)
0,Barking and Dagenham [note 1]\n,13.93\n
1,Barnet\n,33.49\n
2,Bexley\n,23.38\n
3,Brent\n,16.70\n
4,Bromley\n,57.97\n
5,Camden\n,8.40\n
6,Croydon\n,33.41\n
7,Ealing\n,21.44\n
8,Enfield\n,31.74\n
9,Greenwich [note 2]\n,18.28\n


In [85]:
binfo.reset_index(inplace=True,drop=True)

In [86]:
binfo['Boroughs'] = binfo['Boroughs'].str.rstrip('\n')
binfo['Area(sqmi)'] = binfo['Area(sqmi)'].str.rstrip('\n')
binfo

Unnamed: 0,Boroughs,Area(sqmi)
0,Barking and Dagenham [note 1],13.93
1,Barnet,33.49
2,Bexley,23.38
3,Brent,16.7
4,Bromley,57.97
5,Camden,8.4
6,Croydon,33.41
7,Ealing,21.44
8,Enfield,31.74
9,Greenwich [note 2],18.28


In [87]:
binfo['Boroughs'] = binfo['Boroughs'].replace(['Barking and Dagenham [note 1]'],['Barking and Dagenham']).replace(['Greenwich  [note 2]'],['Greenwich']).replace(['Hammersmith and Fulham [note 4]'],['Hammersmith and Fulham'])

In [88]:
binfo['Boroughs'] = binfo['Boroughs'].replace(['City of London'],['City'])
binfo.sort_values(['Boroughs'],inplace=True)
binfo.reset_index(drop=True,inplace=True)

In [89]:
binfo

Unnamed: 0,Boroughs,Area(sqmi)
0,Barking and Dagenham,13.93
1,Barnet,33.49
2,Bexley,23.38
3,Brent,16.7
4,Bromley,57.97
5,Camden,8.4
6,City,1.12
7,Croydon,33.41
8,Ealing,21.44
9,Enfield,31.74


In [90]:
for index,row in binfo.iterrows():
    if(row['Boroughs']!=sorted(Filter)[index]):
        print(len(row['Boroughs']),len(sorted(Filter)[index]))


In [91]:
flf=pd.merge(lf,binfo,on=['Boroughs'])
flf

Unnamed: 0,Location,OSgridref,Boroughs,latitude,longitude,employment rate,CrimeRate,Avg_house_price,Area(sqmi)
0,Abbey Wood,TQ465785,Bexley,51.486484,0.109318,76.0,7.239654,342500,23.38
1,Albany Park,TQ478728,Bexley,51.434929,0.125663,76.0,7.239654,342500,23.38
2,Barnehurst,TQ505755,Bexley,51.458479,0.165667,76.0,7.239654,342500,23.38
3,Barnes Cray,TQ525755,Bexley,51.457944,0.194464,76.0,7.239654,342500,23.38
4,Belvedere,TQ495785,Bexley,51.485699,0.152539,76.0,7.239654,342500,23.38
...,...,...,...,...,...,...,...,...,...
558,South Hackney,TQ355845,Hackney,51.543141,-0.046853,72.8,12.038754,530000,7.36
559,Stamford Hill,TQ335875,Hackney,51.570576,-0.074562,72.8,12.038754,530000,7.36
560,Stoke Newington,TQ335865,Hackney,51.561590,-0.074942,72.8,12.038754,530000,7.36
561,Upper Clapton,TQ345875,Hackney,51.570339,-0.060129,72.8,12.038754,530000,7.36


In [92]:
np.shape(flf['Boroughs'].unique())

(33,)

In [93]:
CLIENT_ID = 'SECRETCLIENT_SECRET = 'SECRETVERSION = '20180605'

In [94]:
radius=500
lat = flf.iloc[0,3]
lng = flf.iloc[0,4]
url =  'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, radius)

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5f2ba6fb99578521cc315733'},
 'response': {'headerLocation': 'Abbey Wood',
  'headerFullLocation': 'Abbey Wood, London',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 5,
  'suggestedBounds': {'ne': {'lat': 51.49098354190194,
    'lng': 0.11653100512592866},
   'sw': {'lat': 51.481983532901936, 'lng': 0.10210477116065415}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4bf181d61f17ef3bfcf59bc8',
       'name': 'Co-op Food',
       'location': {'address': '103-116 Mcleod Rd',
        'lat': 51.48764979256399,
        'lng': 0.11348962783813477,
        'labeledLatLngs': [{'label': 'display',
          'lat': 51.48764979256399,
          'lng': 0.11348962783813477}],
        'distance': 316,
        'postalCode': 'SE2

In [95]:
venues = results['response']['groups'][0]['items']
name = results['response']['headerLocation']
nearby_venues1 = json_normalize(venues)
nearby_venues1
print(name)
nearby_venues1

Abbey Wood


  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,referralId,reasons.count,reasons.items,venue.id,venue.name,venue.location.address,venue.location.lat,venue.location.lng,venue.location.labeledLatLngs,venue.location.distance,venue.location.postalCode,venue.location.cc,venue.location.city,venue.location.state,venue.location.country,venue.location.formattedAddress,venue.categories,venue.photos.count,venue.photos.groups,venue.location.crossStreet
0,e-0-4bf181d61f17ef3bfcf59bc8-0,0,"[{'summary': 'This spot is popular', 'type': '...",4bf181d61f17ef3bfcf59bc8,Co-op Food,103-116 Mcleod Rd,51.48765,0.11349,"[{'label': 'display', 'lat': 51.48764979256399...",316,SE2 0BS,GB,Abbey Wood,Greater London,United Kingdom,"[103-116 Mcleod Rd, Abbey Wood, Greater London...","[{'id': '4bf58dd8d48988d118951735', 'name': 'G...",0,[],
1,e-0-4bd315449854d13af6cffc4d-1,0,"[{'summary': 'This spot is popular', 'type': '...",4bd315449854d13af6cffc4d,Bostal Gardens,,51.48667,0.110462,"[{'label': 'display', 'lat': 51.4866697860935,...",81,SE2,GB,Eltham,Greater London,United Kingdom,"[Eltham, Greater London, SE2, United Kingdom]","[{'id': '4bf58dd8d48988d1e7941735', 'name': 'P...",0,[],
2,e-0-53346d1f498e612d26c5a65a-2,0,"[{'summary': 'This spot is popular', 'type': '...",53346d1f498e612d26c5a65a,Othalias Security Services U K,Bastion Road,51.483521,0.105464,"[{'label': 'display', 'lat': 51.48352095330697...",424,SE2 0RG,GB,Abbey Wood,Greater London,United Kingdom,"[Bastion Road (Bastion Road), Abbey Wood, Grea...","[{'id': '52f2ab2ebcbc57f1066b8b36', 'name': 'I...",0,[],Bastion Road
3,e-0-58bd05cd3f5a5c1c89dc530f-3,0,"[{'summary': 'This spot is popular', 'type': '...",58bd05cd3f5a5c1c89dc530f,Morley's,311 Plumstead High St,51.48561,0.102389,"[{'label': 'display', 'lat': 51.48561, 'lng': ...",490,SE18 1JX,GB,London,Greater London,United Kingdom,"[311 Plumstead High St, London, Greater London...","[{'id': '4d4ae6fc7a7b7dea34424761', 'name': 'F...",0,[],
4,e-0-4d3f34fd1bd2a1436d51e67c-4,0,"[{'summary': 'This spot is popular', 'type': '...",4d3f34fd1bd2a1436d51e67c,East Ocean,7 Wickham Ln.,51.485279,0.102426,"[{'label': 'display', 'lat': 51.485279, 'lng':...",496,SE2 0XJ,GB,Plumstead,Greater London,United Kingdom,"[7 Wickham Ln. (at Plumstead High St.), Plumst...","[{'id': '4bf58dd8d48988d145941735', 'name': 'C...",0,[],at Plumstead High St.


In [96]:
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues1 =nearby_venues1.loc[:, filtered_columns]
nearby_venues1['Location'] = name

In [97]:
nearby_venues1

Unnamed: 0,venue.name,venue.categories,venue.location.lat,venue.location.lng,Location
0,Co-op Food,"[{'id': '4bf58dd8d48988d118951735', 'name': 'G...",51.48765,0.11349,Abbey Wood
1,Bostal Gardens,"[{'id': '4bf58dd8d48988d1e7941735', 'name': 'P...",51.48667,0.110462,Abbey Wood
2,Othalias Security Services U K,"[{'id': '52f2ab2ebcbc57f1066b8b36', 'name': 'I...",51.483521,0.105464,Abbey Wood
3,Morley's,"[{'id': '4d4ae6fc7a7b7dea34424761', 'name': 'F...",51.48561,0.102389,Abbey Wood
4,East Ocean,"[{'id': '4bf58dd8d48988d145941735', 'name': 'C...",51.485279,0.102426,Abbey Wood


In [98]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [99]:
nearby_venues1['venue.categories'] = nearby_venues1.apply(get_category_type, axis=1)

In [100]:
nearby_venues1

Unnamed: 0,venue.name,venue.categories,venue.location.lat,venue.location.lng,Location
0,Co-op Food,Grocery Store,51.48765,0.11349,Abbey Wood
1,Bostal Gardens,Playground,51.48667,0.110462,Abbey Wood
2,Othalias Security Services U K,IT Services,51.483521,0.105464,Abbey Wood
3,Morley's,Fried Chicken Joint,51.48561,0.102389,Abbey Wood
4,East Ocean,Chinese Restaurant,51.485279,0.102426,Abbey Wood


In [101]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    count = 0
    oops=0
    nearby_venues = pd.DataFrame()
    total_venues = pd.DataFrame()
    for name, laty, lngy in zip(names, latitudes, longitudes):
        
        venues_list = 0
        totalvenues = pd.DataFrame()
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            laty, 
            lngy, 
            radius)
        results = requests.get(url).json()
        print(count,flush=True)
        if (results['response']['totalResults']!=0):
            venuesn = results['response']['groups'][0]['items']
            venues_list = pd.json_normalize(venuesn)
            venues_list['Boroughs'] = name
            totalvenues['Boroughs'] = [name]
            totalvenues['TotalVenues'] = [results['response']['totalResults']]
            filtered_columns = ['Boroughs','venue.name', 'venue.categories']
            nearby_venues = nearby_venues.append([venues_list.loc[:,filtered_columns]])
            total_venues = total_venues.append([totalvenues])
        else:
            oops = oops+1
        count = count+1
        
    nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)
    nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]
    return(nearby_venues,total_venues,oops)

In [102]:
lfvenues,total_venues,oops=getNearbyVenues(names=flf['Boroughs'],latitudes=flf['latitude'],longitudes=flf['longitude'])

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

In [103]:
lfvenues

Unnamed: 0,Boroughs,name,categories
0,Bexley,Co-op Food,Grocery Store
1,Bexley,Bostal Gardens,Playground
2,Bexley,Othalias Security Services U K,IT Services
3,Bexley,Morley's,Fried Chicken Joint
4,Bexley,East Ocean,Chinese Restaurant
...,...,...,...
25,Hackney,Chumleys,Breakfast Spot
26,Hackney,Itto,Asian Restaurant
27,Hackney,Jan's bar,Bar
28,Hackney,Franco Manca,Pizza Place


In [104]:
print(oops)
total_venues

4


Unnamed: 0,Boroughs,TotalVenues
0,Bexley,5
0,Bexley,4
0,Bexley,7
0,Bexley,3
0,Bexley,17
...,...,...
0,Hackney,14
0,Hackney,9
0,Hackney,44
0,Hackney,8


In [105]:
total_venues.reset_index(drop=True,inplace=True)

In [106]:
london_onehot = pd.get_dummies(lfvenues[['categories']],prefix="",prefix_sep="")
london_onehot['Boroughs'] = lfvenues['Boroughs'] 
fixed_columns = list(london_onehot.columns[:])
fixed_columns.remove('Boroughs')
fixed_columns.insert(0,'Boroughs')
london_onehot = london_onehot[fixed_columns]

In [107]:
print(np.shape(london_onehot['Boroughs'].unique()))
london_onehot

(33,)


Unnamed: 0,Boroughs,Accessories Store,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,Airport Service,American Restaurant,Antique Shop,Arepa Restaurant,...,Windmill,Wine Bar,Wine Shop,Wings Joint,Women's Store,Xinjiang Restaurant,Yakitori Restaurant,Yoga Studio,Zoo,Zoo Exhibit
0,Bexley,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Bexley,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Bexley,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Bexley,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Bexley,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25,Hackney,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
26,Hackney,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
27,Hackney,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
28,Hackney,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [108]:
london_grouped = london_onehot.groupby('Boroughs').mean()
london_grouped['Boroughs'] = london_grouped.index
london_grouped = london_grouped[fixed_columns]
london_grouped.reset_index(drop=True,inplace=True)
london_grouped

Unnamed: 0,Boroughs,Accessories Store,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,Airport Service,American Restaurant,Antique Shop,Arepa Restaurant,...,Windmill,Wine Bar,Wine Shop,Wings Joint,Women's Store,Xinjiang Restaurant,Yakitori Restaurant,Yoga Studio,Zoo,Zoo Exhibit
0,Barking and Dagenham,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Barnet,0.0,0.002703,0.0,0.0,0.0,0.0,0.002703,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.005405,0.0,0.0
2,Bexley,0.0,0.0,0.0,0.0,0.0,0.0,0.009709,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Brent,0.0,0.00369,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.00369,0.0,0.0,0.0,0.01107,0.0,0.0
4,Bromley,0.0,0.0,0.0,0.003401,0.0,0.006803,0.003401,0.0,0.0,...,0.0,0.0,0.006803,0.0,0.0,0.0,0.0,0.003401,0.0,0.0
5,Camden,0.0,0.003431,0.001715,0.0,0.0,0.0,0.001715,0.0,0.0,...,0.0,0.005146,0.003431,0.0,0.0,0.0,0.0,0.005146,0.001715,0.010292
6,City,0.0,0.0,0.0,0.0,0.0,0.0,0.006667,0.0,0.0,...,0.0,0.026667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Croydon,0.0,0.0,0.00625,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Ealing,0.0,0.00625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.00625,0.00625,0.0,0.0,0.0,0.0,0.00625,0.0,0.0
9,Enfield,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.006667,0.0,0.0,0.0,0.0,0.0


In [109]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [110]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Boroughs']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
borough_venues_sorted = pd.DataFrame(columns=columns)
borough_venues_sorted['Boroughs'] = london_grouped['Boroughs']

for ind in np.arange(london_grouped.shape[0]):
    borough_venues_sorted.iloc[ind, 1:] = return_most_common_venues(london_grouped.iloc[ind, :], num_top_venues)

borough_venues_sorted.head()

Unnamed: 0,Boroughs,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Barking and Dagenham,Hotel,Supermarket,Grocery Store,Gas Station,Gym
1,Barnet,Coffee Shop,Grocery Store,Café,Pub,Bus Stop
2,Bexley,Pub,Grocery Store,Coffee Shop,Café,Fast Food Restaurant
3,Brent,Pub,Coffee Shop,Grocery Store,Fast Food Restaurant,Café
4,Bromley,Pub,Café,Supermarket,Platform,Coffee Shop


In [111]:
total_venues = total_venues.groupby('Boroughs').agg({'TotalVenues':'sum'})

In [112]:
total_venues

Unnamed: 0_level_0,TotalVenues
Boroughs,Unnamed: 1_level_1
Barking and Dagenham,54
Barnet,392
Bexley,206
Brent,290
Bromley,311
Camden,1279
City,397
Croydon,160
Ealing,219
Enfield,182


In [113]:
flf

Unnamed: 0,Location,OSgridref,Boroughs,latitude,longitude,employment rate,CrimeRate,Avg_house_price,Area(sqmi)
0,Abbey Wood,TQ465785,Bexley,51.486484,0.109318,76.0,7.239654,342500,23.38
1,Albany Park,TQ478728,Bexley,51.434929,0.125663,76.0,7.239654,342500,23.38
2,Barnehurst,TQ505755,Bexley,51.458479,0.165667,76.0,7.239654,342500,23.38
3,Barnes Cray,TQ525755,Bexley,51.457944,0.194464,76.0,7.239654,342500,23.38
4,Belvedere,TQ495785,Bexley,51.485699,0.152539,76.0,7.239654,342500,23.38
...,...,...,...,...,...,...,...,...,...
558,South Hackney,TQ355845,Hackney,51.543141,-0.046853,72.8,12.038754,530000,7.36
559,Stamford Hill,TQ335875,Hackney,51.570576,-0.074562,72.8,12.038754,530000,7.36
560,Stoke Newington,TQ335865,Hackney,51.561590,-0.074942,72.8,12.038754,530000,7.36
561,Upper Clapton,TQ345875,Hackney,51.570339,-0.060129,72.8,12.038754,530000,7.36


In [114]:
flf2 = pd.merge(flf,total_venues,on=['Boroughs'])

In [115]:
flf2

Unnamed: 0,Location,OSgridref,Boroughs,latitude,longitude,employment rate,CrimeRate,Avg_house_price,Area(sqmi),TotalVenues
0,Abbey Wood,TQ465785,Bexley,51.486484,0.109318,76.0,7.239654,342500,23.38,206
1,Albany Park,TQ478728,Bexley,51.434929,0.125663,76.0,7.239654,342500,23.38,206
2,Barnehurst,TQ505755,Bexley,51.458479,0.165667,76.0,7.239654,342500,23.38,206
3,Barnes Cray,TQ525755,Bexley,51.457944,0.194464,76.0,7.239654,342500,23.38,206
4,Belvedere,TQ495785,Bexley,51.485699,0.152539,76.0,7.239654,342500,23.38,206
...,...,...,...,...,...,...,...,...,...,...
558,South Hackney,TQ355845,Hackney,51.543141,-0.046853,72.8,12.038754,530000,7.36,627
559,Stamford Hill,TQ335875,Hackney,51.570576,-0.074562,72.8,12.038754,530000,7.36,627
560,Stoke Newington,TQ335865,Hackney,51.561590,-0.074942,72.8,12.038754,530000,7.36,627
561,Upper Clapton,TQ345875,Hackney,51.570339,-0.060129,72.8,12.038754,530000,7.36,627


In [116]:
flf2['Venuespermi'] = flf2['TotalVenues']/pd.to_numeric(flf2['Area(sqmi)'])

In [117]:
flf2

Unnamed: 0,Location,OSgridref,Boroughs,latitude,longitude,employment rate,CrimeRate,Avg_house_price,Area(sqmi),TotalVenues,Venuespermi
0,Abbey Wood,TQ465785,Bexley,51.486484,0.109318,76.0,7.239654,342500,23.38,206,8.810950
1,Albany Park,TQ478728,Bexley,51.434929,0.125663,76.0,7.239654,342500,23.38,206,8.810950
2,Barnehurst,TQ505755,Bexley,51.458479,0.165667,76.0,7.239654,342500,23.38,206,8.810950
3,Barnes Cray,TQ525755,Bexley,51.457944,0.194464,76.0,7.239654,342500,23.38,206,8.810950
4,Belvedere,TQ495785,Bexley,51.485699,0.152539,76.0,7.239654,342500,23.38,206,8.810950
...,...,...,...,...,...,...,...,...,...,...,...
558,South Hackney,TQ355845,Hackney,51.543141,-0.046853,72.8,12.038754,530000,7.36,627,85.190217
559,Stamford Hill,TQ335875,Hackney,51.570576,-0.074562,72.8,12.038754,530000,7.36,627,85.190217
560,Stoke Newington,TQ335865,Hackney,51.561590,-0.074942,72.8,12.038754,530000,7.36,627,85.190217
561,Upper Clapton,TQ345875,Hackney,51.570339,-0.060129,72.8,12.038754,530000,7.36,627,85.190217


In [118]:
flf2.drop(columns=['Area(sqmi)','TotalVenues'],inplace=True)

In [119]:
flf2

Unnamed: 0,Location,OSgridref,Boroughs,latitude,longitude,employment rate,CrimeRate,Avg_house_price,Venuespermi
0,Abbey Wood,TQ465785,Bexley,51.486484,0.109318,76.0,7.239654,342500,8.810950
1,Albany Park,TQ478728,Bexley,51.434929,0.125663,76.0,7.239654,342500,8.810950
2,Barnehurst,TQ505755,Bexley,51.458479,0.165667,76.0,7.239654,342500,8.810950
3,Barnes Cray,TQ525755,Bexley,51.457944,0.194464,76.0,7.239654,342500,8.810950
4,Belvedere,TQ495785,Bexley,51.485699,0.152539,76.0,7.239654,342500,8.810950
...,...,...,...,...,...,...,...,...,...
558,South Hackney,TQ355845,Hackney,51.543141,-0.046853,72.8,12.038754,530000,85.190217
559,Stamford Hill,TQ335875,Hackney,51.570576,-0.074562,72.8,12.038754,530000,85.190217
560,Stoke Newington,TQ335865,Hackney,51.561590,-0.074942,72.8,12.038754,530000,85.190217
561,Upper Clapton,TQ345875,Hackney,51.570339,-0.060129,72.8,12.038754,530000,85.190217


In [120]:
for index,row in borough_venues_sorted.iterrows():
    if(row['Boroughs']!=sorted(Filter)[index]):
        print(len(row['Boroughs']),len(sorted(Filter)[index]))
        
print(borough_venues_sorted['Boroughs'].unique())
print(np.shape(borough_venues_sorted['Boroughs'].unique()))

['Barking and Dagenham' 'Barnet' 'Bexley' 'Brent' 'Bromley' 'Camden'
 'City' 'Croydon' 'Ealing' 'Enfield' 'Greenwich' 'Hackney'
 'Hammersmith and Fulham' 'Haringey' 'Harrow' 'Havering' 'Hillingdon'
 'Hounslow' 'Islington' 'Kensington and Chelsea' 'Kingston upon Thames'
 'Lambeth' 'Lewisham' 'Merton' 'Newham' 'Redbridge' 'Richmond upon Thames'
 'Southwark' 'Sutton' 'Tower Hamlets' 'Waltham Forest' 'Wandsworth'
 'Westminster']
(33,)


In [121]:
flf2 = pd.merge(flf2, borough_venues_sorted, on=['Boroughs'])

In [122]:
flf2

Unnamed: 0,Location,OSgridref,Boroughs,latitude,longitude,employment rate,CrimeRate,Avg_house_price,Venuespermi,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Abbey Wood,TQ465785,Bexley,51.486484,0.109318,76.0,7.239654,342500,8.810950,Pub,Grocery Store,Coffee Shop,Café,Fast Food Restaurant
1,Albany Park,TQ478728,Bexley,51.434929,0.125663,76.0,7.239654,342500,8.810950,Pub,Grocery Store,Coffee Shop,Café,Fast Food Restaurant
2,Barnehurst,TQ505755,Bexley,51.458479,0.165667,76.0,7.239654,342500,8.810950,Pub,Grocery Store,Coffee Shop,Café,Fast Food Restaurant
3,Barnes Cray,TQ525755,Bexley,51.457944,0.194464,76.0,7.239654,342500,8.810950,Pub,Grocery Store,Coffee Shop,Café,Fast Food Restaurant
4,Belvedere,TQ495785,Bexley,51.485699,0.152539,76.0,7.239654,342500,8.810950,Pub,Grocery Store,Coffee Shop,Café,Fast Food Restaurant
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
558,South Hackney,TQ355845,Hackney,51.543141,-0.046853,72.8,12.038754,530000,85.190217,Café,Pub,Coffee Shop,Bakery,Bar
559,Stamford Hill,TQ335875,Hackney,51.570576,-0.074562,72.8,12.038754,530000,85.190217,Café,Pub,Coffee Shop,Bakery,Bar
560,Stoke Newington,TQ335865,Hackney,51.561590,-0.074942,72.8,12.038754,530000,85.190217,Café,Pub,Coffee Shop,Bakery,Bar
561,Upper Clapton,TQ345875,Hackney,51.570339,-0.060129,72.8,12.038754,530000,85.190217,Café,Pub,Coffee Shop,Bakery,Bar


In [123]:
flf2.drop(columns=['latitude','longitude','Location','OSgridref'],inplace=True)

In [124]:
flf2

Unnamed: 0,Boroughs,employment rate,CrimeRate,Avg_house_price,Venuespermi,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Bexley,76.0,7.239654,342500,8.810950,Pub,Grocery Store,Coffee Shop,Café,Fast Food Restaurant
1,Bexley,76.0,7.239654,342500,8.810950,Pub,Grocery Store,Coffee Shop,Café,Fast Food Restaurant
2,Bexley,76.0,7.239654,342500,8.810950,Pub,Grocery Store,Coffee Shop,Café,Fast Food Restaurant
3,Bexley,76.0,7.239654,342500,8.810950,Pub,Grocery Store,Coffee Shop,Café,Fast Food Restaurant
4,Bexley,76.0,7.239654,342500,8.810950,Pub,Grocery Store,Coffee Shop,Café,Fast Food Restaurant
...,...,...,...,...,...,...,...,...,...,...
558,Hackney,72.8,12.038754,530000,85.190217,Café,Pub,Coffee Shop,Bakery,Bar
559,Hackney,72.8,12.038754,530000,85.190217,Café,Pub,Coffee Shop,Bakery,Bar
560,Hackney,72.8,12.038754,530000,85.190217,Café,Pub,Coffee Shop,Bakery,Bar
561,Hackney,72.8,12.038754,530000,85.190217,Café,Pub,Coffee Shop,Bakery,Bar


In [125]:
flf2.drop_duplicates(inplace=True)

In [126]:
flf2.reset_index(drop=True,inplace=True)

In [127]:
flf2

Unnamed: 0,Boroughs,employment rate,CrimeRate,Avg_house_price,Venuespermi,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Bexley,76.0,7.239654,342500,8.81095,Pub,Grocery Store,Coffee Shop,Café,Fast Food Restaurant
1,Greenwich,73.0,10.357328,420000,8.369803,Pub,Bus Stop,Grocery Store,Café,Fast Food Restaurant
2,Ealing,75.3,9.268629,485000,10.214552,Pub,Café,Indian Restaurant,Park,Bus Stop
3,Hammersmith and Fulham,76.2,12.55354,777475,86.255924,Pub,Coffee Shop,Café,Italian Restaurant,Bakery
4,Croydon,77.8,9.274791,365000,4.788985,Park,Coffee Shop,Platform,Café,Grocery Store
5,Redbridge,71.6,8.248237,420000,6.290174,Coffee Shop,Café,Grocery Store,Pub,Pizza Place
6,City,82.1,103.96077,835000,354.464286,Coffee Shop,Pub,Hotel,Gym / Fitness Center,Cocktail Bar
7,Westminster,68.0,28.45875,1025000,177.442702,Coffee Shop,Hotel,Café,Pub,Italian Restaurant
8,Brent,67.5,9.305861,499950,17.365269,Pub,Coffee Shop,Grocery Store,Fast Food Restaurant,Café
9,Bromley,79.0,7.71541,440000,5.364844,Pub,Café,Supermarket,Platform,Coffee Shop


In [128]:
!pip install geopy
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="london_explorer")
location = geolocator.geocode("Bexley, London, United Kingdom")
print(location.address)

Collecting geopy
[?25l  Downloading https://files.pythonhosted.org/packages/07/e1/9c72de674d5c2b8fcb0738a5ceeb5424941fefa080bfe4e240d0bacb5a38/geopy-2.0.0-py3-none-any.whl (111kB)
[K     |████████████████████████████████| 112kB 21.3MB/s eta 0:00:01
[?25hCollecting geographiclib<2,>=1.49 (from geopy)
  Downloading https://files.pythonhosted.org/packages/8b/62/26ec95a98ba64299163199e95ad1b0e34ad3f4e176e221c40245f211e425/geographiclib-1.50-py3-none-any.whl
Installing collected packages: geographiclib, geopy
Successfully installed geographiclib-1.50 geopy-2.0.0
Bexley, London Borough of Bexley, London, Greater London, England, DA5 1LD, United Kingdom


In [129]:
i = 0
blat = np.zeros(33)
blng = np.zeros(33)
for boroughs in flf2['Boroughs']:
    location = geolocator.geocode("{}, London, United Kingdom".format(boroughs))
    print(location.address,flush=True)
    blat[i] = location.latitude
    blng[i] = location.longitude
    i = i + 1

Bexley, London Borough of Bexley, London, Greater London, England, DA5 1LD, United Kingdom
Greenwich, London, Greater London, England, SE10 9HF, United Kingdom
Ealing, London Borough of Ealing, London, Greater London, England, W5 5DG, United Kingdom
Hammersmith, London Borough of Hammersmith and Fulham, London, Greater London, England, W6 9YA, United Kingdom
Croydon, London, Greater London, England, CR0 1SZ, United Kingdom
Redbridge, Eastern Avenue, Aldersbrook, Redbridge, London Borough of Redbridge, London, Greater London, England, IG4 5DQ, United Kingdom
City of London, Greater London, England, EC2V 5AE, United Kingdom
Westminster, Millbank, City of Westminster, London, Greater London, England, SW1P 3JX, United Kingdom
London Borough of Brent, Greater London, England, United Kingdom
Bromley, London, Greater London, England, BR1 1EY, United Kingdom
Islington, London, Greater London, England, N1, United Kingdom
Havering, Lewis Street, Maitland Park, Kentish Town, London Borough of Cam

In [130]:
print(blat)
print(np.shape(blat))
print(blng)
print(np.shape(blng))

[51.4416793  51.4820845  51.5126553  51.4920377  51.3713049  51.5763203
 51.5156177  51.5004439  51.5638258  51.4028046  51.5384287  51.5443687
 51.65309    51.6520851  51.4570271  51.5029222  51.5541171  51.44037225
 51.52999955 51.3575114  51.4624325  51.5967688  51.5423045  51.4096275
 51.5256294  51.58792985 51.4686132  51.5013012  51.4989948  51.59816935
 51.41080285 51.5425193  51.5432402 ]
(33,)
[ 0.150488   -0.0045417  -0.3051952  -0.2236401  -0.101957    0.0454097
 -0.0919983  -0.1265398  -0.27575966  0.0148142  -0.0999051  -0.14430318
 -0.2002261  -0.0810175  -0.1932607  -0.103458    0.15050434 -0.30571955
  0.02931796 -0.17364012 -0.0101331  -0.33727516 -0.1395604  -0.3062621
 -0.0335853  -0.10541011 -0.3613471  -0.117287   -0.1991229  -0.01783667
 -0.18809851 -0.44833493 -0.0493621 ]
(33,)


In [131]:
flf2['Latitude'] = blat
flf2['Longitude'] = blng

In [132]:
flf2

Unnamed: 0,Boroughs,employment rate,CrimeRate,Avg_house_price,Venuespermi,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,Latitude,Longitude
0,Bexley,76.0,7.239654,342500,8.81095,Pub,Grocery Store,Coffee Shop,Café,Fast Food Restaurant,51.441679,0.150488
1,Greenwich,73.0,10.357328,420000,8.369803,Pub,Bus Stop,Grocery Store,Café,Fast Food Restaurant,51.482084,-0.004542
2,Ealing,75.3,9.268629,485000,10.214552,Pub,Café,Indian Restaurant,Park,Bus Stop,51.512655,-0.305195
3,Hammersmith and Fulham,76.2,12.55354,777475,86.255924,Pub,Coffee Shop,Café,Italian Restaurant,Bakery,51.492038,-0.22364
4,Croydon,77.8,9.274791,365000,4.788985,Park,Coffee Shop,Platform,Café,Grocery Store,51.371305,-0.101957
5,Redbridge,71.6,8.248237,420000,6.290174,Coffee Shop,Café,Grocery Store,Pub,Pizza Place,51.57632,0.04541
6,City,82.1,103.96077,835000,354.464286,Coffee Shop,Pub,Hotel,Gym / Fitness Center,Cocktail Bar,51.515618,-0.091998
7,Westminster,68.0,28.45875,1025000,177.442702,Coffee Shop,Hotel,Café,Pub,Italian Restaurant,51.500444,-0.12654
8,Brent,67.5,9.305861,499950,17.365269,Pub,Coffee Shop,Grocery Store,Fast Food Restaurant,Café,51.563826,-0.27576
9,Bromley,79.0,7.71541,440000,5.364844,Pub,Café,Supermarket,Platform,Coffee Shop,51.402805,0.014814


In [133]:
flf2.to_csv('london_dataset.csv')

In [134]:
london_grouped.to_csv('london_grouped.csv')