**Import Block:**

In [1]:
!conda install -c conda-forge geopy --yes

import os
import numpy as np
import pandas as pd
import datetime as dt
import json

from geopy.geocoders import Nominatim # address to latitude/longitude convert

import requests
from pandas.io.json import json_normalize 

import matplotlib.cm as cm
import matplotlib.colors as colors

!conda install -c conda-forge folium=0.11.0 --yes
import folium 

print('Import complete.')

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.

Import complete.


In [2]:
#Land Registry Base = lrb (http://landregistry.data.gov.uk/). We use 2019 year base.
lrb = pd.read_csv("http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-2019.csv")

**Lets understand what we have:**

In [3]:
lrb.sample(3)

Unnamed: 0,{8F1B26BD-60CA-53DB-E053-6C04A8C03649},221950,2019-04-26 00:00,TS17 5FF,D,Y,F,3,Unnamed: 8,CARRAWBURGH ROAD,INGLEBY BARWICK,STOCKTON-ON-TEES,STOCKTON-ON-TEES.1,STOCKTON-ON-TEES.2,A,A.1
655311,{9DBAD222-033B-6EB3-E053-6B04A8C0F257},520000,2019-12-19 00:00,N1 8QR,F,N,L,9,,TERLING WALK,,LONDON,ISLINGTON,GREATER LONDON,A,A
357056,{98C75471-76A5-72E9-E053-6B04A8C042F0},209995,2019-11-01 00:00,MK40 4SS,F,Y,L,44,,DANEGELD AVENUE,GREAT DENHAM,BEDFORD,BEDFORD,BEDFORD,A,A
580168,{85866A65-ABB1-143F-E053-6B04A8C06A15},100000,2019-01-02 00:00,S1 1AD,F,Y,L,"QUEENS HOUSE, 105",APARTMENT 46,QUEEN STREET,,SHEFFIELD,SHEFFIELD,SOUTH YORKSHIRE,B,A


In [4]:
print(lrb.columns)
print(lrb.shape)

Index(['{8F1B26BD-60CA-53DB-E053-6C04A8C03649}', '221950', '2019-04-26 00:00',
       'TS17 5FF', 'D', 'Y', 'F', '3', 'Unnamed: 8', 'CARRAWBURGH ROAD',
       'INGLEBY BARWICK', 'STOCKTON-ON-TEES', 'STOCKTON-ON-TEES.1',
       'STOCKTON-ON-TEES.2', 'A', 'A.1'],
      dtype='object')
(964124, 16)


**Now we need to prepare data for further work with them. And we will start with the name for the columns because now they are missing:**

In [5]:
lrb.columns = ['TUID', 'Price', 'Date_Transfer', 'Postcode', 'Prop_Type', 'Old_New', 'Duration', 'PAON', 'SAON', 'Street', 'Locality', 'Town_City', 'District', 'County', 'PPD_Cat_Type', 'Record_Status']
lrb.sample(3)

Unnamed: 0,TUID,Price,Date_Transfer,Postcode,Prop_Type,Old_New,Duration,PAON,SAON,Street,Locality,Town_City,District,County,PPD_Cat_Type,Record_Status
26259,{919FEC06-27E2-9A90-E053-6C04A8C0A300},310000,2019-08-07 00:00,HA0 2PE,F,N,L,BUTLER COURT,8.0,HARROW ROAD,,WEMBLEY,BRENT,GREATER LONDON,B,A
236053,{87E1551E-D1AE-6405-E053-6C04A8C0B2EE},325000,2019-03-18 00:00,BS5 6BZ,T,N,F,1,,BRITANNIA ROAD,EASTON,BRISTOL,CITY OF BRISTOL,CITY OF BRISTOL,B,A
69532,{965B6D91-469B-95E4-E053-6C04A8C07729},755000,2019-10-04 00:00,BN1 3RT,T,N,F,23,,WEST HILL ROAD,,BRIGHTON,BRIGHTON AND HOVE,BRIGHTON AND HOVE,A,A


In [9]:
# Format date, and sorting

lrb['Date_Transfer'] = lrb['Date_Transfer'].apply(pd.to_datetime)
lrb.sort_values(by=['Date_Transfer'],ascending=[False],inplace=True)
print('Complete')

Complete


In [10]:
#Lets see:
lrb.head(4)

Unnamed: 0,TUID,Price,Date_Transfer,Postcode,Prop_Type,Old_New,Duration,PAON,SAON,Street,Locality,Town_City,District,County,PPD_Cat_Type,Record_Status
924956,{A2479555-56B8-74C7-E053-6B04A8C0887D},294000,2019-12-31,SP11 6ZQ,T,Y,F,50,,CASHMERE DRIVE,,ANDOVER,TEST VALLEY,HAMPSHIRE,A,A
914683,{9FF0D96A-38F5-11ED-E053-6C04A8C06383},600000,2019-12-31,SW18 1UX,F,Y,L,"FLAT 45, 12",,OSIERS ROAD,,LONDON,WANDSWORTH,GREATER LONDON,A,A
932066,{A2479555-3E16-74C7-E053-6B04A8C0887D},228000,2019-12-31,BN22 8JH,T,N,F,20,,BELTRING ROAD,,EASTBOURNE,EASTBOURNE,EAST SUSSEX,A,A
410907,{9DBAD222-8F19-6EB3-E053-6B04A8C0F257},450000,2019-12-31,CH1 2LU,O,N,F,2,,STANLEY PLACE,,CHESTER,CHESHIRE WEST AND CHESTER,CHESHIRE WEST AND CHESTER,B,A


In [142]:
#ldn = London Base from lrb
#streets = list of streets
#st_pr = mean price grouping by street
#alwbl = our budget
ldn = lrb.query("Town_City == 'LONDON'")
streets = ldn['Street'].unique().tolist()
st_pr = ldn.groupby(['Street'])['Price'].mean().reset_index()
st_pr.columns = ['Street', 'Avg_Price']
alwbl = st_pr.query("(Avg_Price >= 2300000) & (Avg_Price <= 2600000)")

In [143]:
alwbl.sample(10)

Unnamed: 0,Street,Avg_Price
3963,DUKES LANE,2350000.0
11275,SOUTH PARADE,2425000.0
6414,HOLLAND PARK ROAD,2350000.0
4926,FOUNTAYNE ROAD,2497500.0
9292,ORMONDE GATE,2350000.0
3349,CROFTDOWN ROAD,2500000.0
495,ARTESIAN ROAD,2462500.0
4127,EATON TERRACE MEWS,2400000.0
2952,COLLINGHAM ROAD,2477000.0
2596,CHEVENING ROAD,2470154.0


In [116]:
from geopy.geocoders import Nominatim
from geopy.distance import vincenty
from sklearn.cluster import KMeans
print('Complete')

Complete


In [117]:
geolocator = Nominatim()

  """Entry point for launching an IPython kernel.


In [145]:
alwbl['city_coord'] = alwbl['Street'].apply(geolocator.geocode).apply(lambda x: (x.latitude, x.longitude))
print('Complete')

Complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [146]:
alwbl.sample(20)

Unnamed: 0,Street,Avg_Price,city_coord
4926,FOUNTAYNE ROAD,2497500.0,"(51.5624123, -0.0671209)"
9866,PORCHESTER TERRACE,2522500.0,"(51.5142019, -0.1850823)"
1834,BRUNSWICK PLACE,2455000.0,"(51.5982243, 0.5915693)"
5333,GLOUCESTER PLACE MEWS,2500000.0,"(51.5173903, -0.1582712)"
1016,BEECHWOOD AVENUE,2500000.0,"(54.85552785, -6.290580690119052)"
7316,LANGHAM STREET,2600000.0,"(53.7909096, -2.2817971)"
10069,QUEEN ANNES GROVE,2360000.0,"(51.4973784, -0.2570863)"
3955,DUDLEY ROAD,2315580.0,"(42.2760158, -71.4285504)"
11743,STOCKWELL GREEN,2573333.0,"(51.4684426, -0.1198062)"
12437,TOTTENHAM COURT ROAD,2507915.0,"(51.5162451, -0.1313998)"


In [147]:
alwbl.shape

(126, 3)

In [148]:
alwbl[['Latitude', 'Longitude']] = alwbl['city_coord'].apply(pd.Series)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


In [149]:
alwbl.sample(10)

Unnamed: 0,Street,Avg_Price,city_coord,Latitude,Longitude
8629,MORELLA ROAD,2500000.0,"(53.4366755, -2.9433945)",53.436675,-2.943395
3203,COURT LANE GARDENS,2440000.0,"(51.4485002, -0.0801016)",51.4485,-0.080102
8959,NIGHTINGALE SQUARE,2475000.0,"(51.4481983, -0.15650368682206042)",51.448198,-0.156504
2111,CAMPDEN HILL ROAD,2305400.0,"(51.5081106, -0.1996673)",51.508111,-0.199667
3222,COURTNELL STREET,2409552.0,"(51.5162321, -0.1988211)",51.516232,-0.198821
43,ABINGDON ROAD,2591591.0,"(51.8964358, -0.4714466)",51.896436,-0.471447
5835,HANLEY ROAD,2598767.0,"(-34.0322351, 18.4795718)",-34.032235,18.479572
1173,BETTRIDGE ROAD,2400000.0,"(57.0320376, -2.1476985)",57.032038,-2.147699
5525,GREAT TITCHFIELD STREET,2395000.0,"(51.5173741, -0.1400697)",51.517374,-0.14007
8481,MILLBANK,2514444.0,"(51.4926121, -0.1290435)",51.492612,-0.129044


In [150]:
new_alwbl = alwbl.drop(columns=['city_coord'])

In [151]:
new_alwbl.sample(10)

Unnamed: 0,Street,Avg_Price,Latitude,Longitude
10300,REDCLIFFE MEWS,2415894.0,51.48689,-0.186727
3037,CONIGER ROAD,2579833.0,51.471923,-0.198108
1213,BILLING ROAD,2400000.0,44.902046,-69.076333
13175,WEST HILL ROAD,2430000.0,34.410761,-117.641159
9108,OAK HILL PARK MEWS,2400000.0,51.557117,-0.183605
4263,ELLERBY STREET,2450000.0,-31.921783,115.817825
3753,DEVONSHIRE MEWS WEST,2400000.0,51.522231,-0.149168
11336,SOUTHWOOD AVENUE,2520000.0,41.039035,-85.151557
9761,PINE GROVE,2400000.0,39.349339,-123.814182
451,ARGYLE STREET,2300000.0,22.32634,114.188127


In [152]:
new_alwbl.shape

(126, 4)

In [153]:
address = 'London, UK'
geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('London coordinate is {}, {}.'.format(latitude, longitude))
print('Complete')

London coordinate is 51.5073219, -0.1276474.
Complete


  


**Lets create map:**

In [154]:
ldn_map = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, lng, price, street in zip(new_alwbl['Latitude'], new_alwbl['Longitude'], new_alwbl['Avg_Price'], new_alwbl['Street']):
    label = '{}, {}'.format(street, price)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(ldn_map)  
    
ldn_map

**Lets get foursquare connection:**

In [157]:
CLIENT_ID = 'T3CY1VC1EMLS0HMKLMJNRBU0EVFT001DSRO04YMRC1Z5Z5O0' # Foursquare ID
CLIENT_SECRET = 'AUP4SPZ2KHXJHKPTHDZ2XRCIQQOF3RPFRC2ALMSQSLIDOOEN' # Foursquare Secret
VERSION = '20200615' # Foursquare API version

print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

CLIENT_ID: T3CY1VC1EMLS0HMKLMJNRBU0EVFT001DSRO04YMRC1Z5Z5O0
CLIENT_SECRET:AUP4SPZ2KHXJHKPTHDZ2XRCIQQOF3RPFRC2ALMSQSLIDOOEN


**Now is time for k-means clustering:**

In [158]:
def get_meet_points(names, latitudes, longitudes, radius=500, LIMIT=100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_points = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_points.columns = ['Street', 
                  'Street Latitude', 
                  'Street Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_points)

In [159]:
location_venues = get_meet_points(names=new_alwbl['Street'], latitudes=new_alwbl['Latitude'], longitudes=new_alwbl['Longitude'])

ABINGDON ROAD
ARGYLE STREET
ARTESIAN ROAD
ASHCHURCH GROVE
ASHCHURCH PARK VILLAS
AYNHOE ROAD
BARRETT'S GROVE
BEAR LANE
BEDALE STREET
BEECHWOOD AVENUE
BETTERTON STREET
BETTRIDGE ROAD
BILLING ROAD
BLENHEIM CRESCENT
BOLSOVER STREET
BROMPTON PLACE
BRUNSWICK PLACE
BUCKINGHAM PALACE ROAD
CAMBRIDGE TERRACE
CAMDEN SQUARE
CAMPDEN HILL ROAD
CANONBURY PARK SOUTH
CARLTON GARDENS
CHALCOT SQUARE
CHENISTON GARDENS
CHEPSTOW PLACE
CHEVAL PLACE
CHEVENING ROAD
CHILTERN STREET
CIRCUS ROAD
COLLINGHAM ROAD
CONIGER ROAD
COULTER ROAD
COURT LANE GARDENS
COURTNELL STREET
CRANBROOK ROAD
CRAWFORD MEWS
CRESSWELL GARDENS
CROFTDOWN ROAD
DARTMOUTH PARK AVENUE
DEEPDALE
DEER PARK ROAD
DEVEREUX LANE
DEVONSHIRE MEWS WEST
DOVER PARK DRIVE
DOVER STREET
DOWNSIDE CRESCENT
DUDLEY ROAD
DUKES LANE
EATON TERRACE MEWS
ECCLESTON SQUARE
ELLERBY STREET
ELVASTON PLACE
ESSEX STREET
FENCHURCH STREET
FLORAL STREET
FOUNTAYNE ROAD
FOURNIER STREET
FRISTON STREET
GERTRUDE STREET
GLOUCESTER PLACE MEWS
GORST ROAD
GREAT TITCHFIELD STREET
HALSEY

In [160]:
location_venues

Unnamed: 0,Street,Street Latitude,Street Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,ABINGDON ROAD,51.896436,-0.471447,Costa Coffee,51.892428,-0.470789,Coffee Shop
1,ABINGDON ROAD,51.896436,-0.471447,Luton Town Training Ground,51.899466,-0.471619,Soccer Field
2,ARGYLE STREET,22.326340,114.188127,Lazy Creatures (慵懶生物),22.327093,114.188935,Café
3,ARGYLE STREET,22.326340,114.188127,Rings Coffee,22.328186,114.189370,Coffee Shop
4,ARGYLE STREET,22.326340,114.188127,Kung Wo Dou Bun Chong (公和荳品廠),22.329728,114.187646,Dessert Shop
...,...,...,...,...,...,...,...
3218,WYATT DRIVE,32.757156,-97.447296,Bangkok House,32.758238,-97.451370,Thai Restaurant
3219,WYATT DRIVE,32.757156,-97.447296,Family Dollar,32.756976,-97.451502,Discount Store
3220,WYATT DRIVE,32.757156,-97.447296,Goodwill,32.755659,-97.450636,Thrift / Vintage Store
3221,WYATT DRIVE,32.757156,-97.447296,Arroyo's Mexican Food,32.757486,-97.451379,Mexican Restaurant


In [161]:
location_venues.groupby('Street').count()

Unnamed: 0_level_0,Street Latitude,Street Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Street,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ABINGDON ROAD,2,2,2,2,2,2
ARGYLE STREET,56,56,56,56,56,56
ARTESIAN ROAD,8,8,8,8,8,8
ASHCHURCH GROVE,28,28,28,28,28,28
ASHCHURCH PARK VILLAS,28,28,28,28,28,28
...,...,...,...,...,...,...
WELL ROAD,1,1,1,1,1,1
WEST HILL PARK,4,4,4,4,4,4
WEST SQUARE,14,14,14,14,14,14
WEST TEMPLE SHEEN,5,5,5,5,5,5


In [162]:
print('There are {} uniques categories.'.format(len(location_venues['Venue Category'].unique())))

There are 303 uniques categories.


In [163]:
location_venues.shape

(3223, 7)

In [164]:
venues_onehot = pd.get_dummies(location_venues[['Venue Category']], prefix="", prefix_sep="")

venues_onehot['Street'] = location_venues['Street'] 

fixed_columns = [venues_onehot.columns[-1]] + list(venues_onehot.columns[:-1])

venues_onehot = venues_onehot[fixed_columns]

venues_onehot.head()

Unnamed: 0,Street,Accessories Store,African Restaurant,American Restaurant,Antique Shop,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,...,Video Store,Vietnamese Restaurant,Watch Shop,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yakitori Restaurant,Yoga Studio
0,ABINGDON ROAD,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,ABINGDON ROAD,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,ARGYLE STREET,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,ARGYLE STREET,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,ARGYLE STREET,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [165]:
ldn_grp = venues_onehot.groupby('Street').mean().reset_index()
ldn_grp

Unnamed: 0,Street,Accessories Store,African Restaurant,American Restaurant,Antique Shop,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,...,Video Store,Vietnamese Restaurant,Watch Shop,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yakitori Restaurant,Yoga Studio
0,ABINGDON ROAD,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
1,ARGYLE STREET,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,...,0.0,0.017857,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
2,ARTESIAN ROAD,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
3,ASHCHURCH GROVE,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0
4,ASHCHURCH PARK VILLAS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
108,WELL ROAD,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
109,WEST HILL PARK,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
110,WEST SQUARE,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
111,WEST TEMPLE SHEEN,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0


In [166]:
ldn_grp.shape

(113, 304)

In [167]:
# Lets denote 5 centers

num_top_centres = 5

for hood in ldn_grp['Street']:
    print("----"+hood+"----")
    temp = ldn_grp[ldn_grp['Street'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_centres))
    print('\n')

----ABINGDON ROAD----
                  venue  freq
0           Coffee Shop   0.5
1          Soccer Field   0.5
2     Accessories Store   0.0
3  Outdoor Supply Store   0.0
4           Pastry Shop   0.0


----ARGYLE STREET----
                venue  freq
0     Thai Restaurant  0.16
1        Dessert Shop  0.16
2  Chinese Restaurant  0.11
3         Coffee Shop  0.07
4                Café  0.07


----ARTESIAN ROAD----
                venue  freq
0  Italian Restaurant  0.12
1   Indian Restaurant  0.12
2            Pharmacy  0.12
3      Pilates Studio  0.12
4         Auto Garage  0.12


----ASHCHURCH GROVE----
                      venue  freq
0                       Pub  0.14
1             Grocery Store  0.14
2                      Park  0.07
3                    Bakery  0.07
4  Mediterranean Restaurant  0.07


----ASHCHURCH PARK VILLAS----
                      venue  freq
0             Grocery Store  0.14
1                       Pub  0.11
2                      Park  0.11
3               

In [168]:
# most popular objects nearby:

def get_most_pop(row, num_top_centres):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return(row_categories_sorted.index.values[0:num_top_centres])

In [169]:
num_top_centres = 10

indicators = ['st', 'nd', 'rd']

columns = ['Street']
for ind in np.arange(num_top_centres):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

In [170]:
#create sorted points-df 
meet_points_sorted = pd.DataFrame(columns=columns)
meet_points_sorted['Street'] = ldn_grp['Street']

for ind in np.arange(ldn_grp.shape[0]):
    meet_points_sorted.iloc[ind, 1:] = get_most_pop(ldn_grp.iloc[ind, :], num_top_centres)

In [95]:
meet_points_sorted.sample(10)

Unnamed: 0,Street,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,BEECHWOOD AVENUE,Restaurant,Yoga Studio,Filipino Restaurant,Exhibit,Fabric Shop,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field
14,BRUNSWICK PLACE,Grocery Store,Chinese Restaurant,Rental Car Location,Gas Station,Fast Food Restaurant,Event Space,Exhibit,Fabric Shop,Falafel Restaurant,Farm
51,FOURNIER STREET,Coffee Shop,Café,Hotel,Indian Restaurant,Flea Market,Art Gallery,Cocktail Bar,Cosmetics Shop,Garden,Market
70,LANGHAM STREET,Business Service,Used Auto Dealership,Gastropub,Fast Food Restaurant,Yoga Studio,Field,Exhibit,Fabric Shop,Falafel Restaurant,Farm
105,THE MARLOWES,Grocery Store,Pub,Laser Tag,Discount Store,Thai Restaurant,Sandwich Place,Supermarket,Clothing Store,Bakery,Middle Eastern Restaurant
95,RIDGWAY PLACE,Pub,Bakery,Coffee Shop,Italian Restaurant,Thai Restaurant,Hotel,Gym / Fitness Center,Scenic Lookout,Sushi Restaurant,Supermarket
101,STOCKWELL GREEN,Café,Portuguese Restaurant,Skate Park,Grocery Store,Park,Music Venue,Coffee Shop,Bakery,Restaurant,Supermarket
103,SYDNEY STREET,Italian Restaurant,Men's Store,Gym,Field,Fabric Shop,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Filipino Restaurant
100,ST AUBYNS AVENUE,Food Truck,Scenic Lookout,Argentinian Restaurant,Clothing Store,Field,Exhibit,Fabric Shop,Falafel Restaurant,Farm,Farmers Market
98,SOUTH PARADE,Shopping Mall,Bakery,Construction & Landscaping,Farmers Market,Yoga Studio,Field,Fabric Shop,Falafel Restaurant,Farm,Fast Food Restaurant


In [171]:
meet_points_sorted.shape

(113, 11)

In [172]:
ldn_grp.shape

(113, 304)

In [174]:
ldn_grp = new_alwbl

In [175]:
kclusters = 5

ldn_grp_clust = ldn_grp.drop('Street', 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(ldn_grp_clust)

kmeans.labels_[:]

array([3, 2, 4, 4, 3, 4, 3, 3, 2, 1, 4, 0, 0, 0, 3, 0, 4, 1, 0, 4, 2, 2,
       1, 2, 2, 3, 3, 4, 3, 3, 4, 3, 0, 4, 0, 3, 1, 1, 1, 0, 1, 4, 2, 0,
       1, 1, 2, 2, 2, 0, 1, 4, 2, 4, 4, 4, 1, 4, 4, 2, 1, 2, 0, 0, 1, 3,
       4, 1, 0, 0, 1, 1, 1, 3, 2, 3, 2, 1, 2, 1, 3, 3, 1, 0, 0, 2, 1, 1,
       3, 4, 3, 2, 0, 0, 2, 0, 2, 2, 2, 0, 1, 2, 4, 2, 1, 3, 0, 1, 0, 2,
       4, 1, 1, 3, 2, 2, 3, 1, 1, 4, 4, 4, 4, 2, 4, 0])

In [176]:
#include Clusters
ldn_grp_clust=new_alwbl
ldn_grp_clust.head()

Unnamed: 0,Street,Avg_Price,Latitude,Longitude
43,ABINGDON ROAD,2591591.0,51.896436,-0.471447
451,ARGYLE STREET,2300000.0,22.32634,114.188127
495,ARTESIAN ROAD,2462500.0,41.725304,-88.205529
532,ASHCHURCH GROVE,2425000.0,51.501121,-0.241411
533,ASHCHURCH PARK VILLAS,2600000.0,51.500051,-0.242173


In [177]:
ldn_grp_clust.shape

(126, 4)

In [178]:
new_alwbl.shape

(126, 4)

In [179]:
ldn_grp_clust.dtypes

Street        object
Avg_Price    float64
Latitude     float64
Longitude    float64
dtype: object

In [181]:
new_alwbl.dtypes

Street        object
Avg_Price    float64
Latitude     float64
Longitude    float64
dtype: object

In [182]:
#clustering labels and merging
ldn_grp_clust['Cluster Labels'] = kmeans.labels_
ldn_grp_clust = ldn_grp_clust.join(meet_points_sorted.set_index('Street'), on='Street')

ldn_grp_clust.head(30) # check the last columns!

Unnamed: 0,Street,Avg_Price,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
43,ABINGDON ROAD,2591591.0,51.896436,-0.471447,3,Soccer Field,Coffee Shop,Yoga Studio,Exhibit,Fabric Shop,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field
451,ARGYLE STREET,2300000.0,22.32634,114.188127,2,Dessert Shop,Thai Restaurant,Chinese Restaurant,Coffee Shop,Café,Cha Chaan Teng,Hotpot Restaurant,Halal Restaurant,Asian Restaurant,Fast Food Restaurant
495,ARTESIAN ROAD,2462500.0,41.725304,-88.205529,4,Italian Restaurant,Indian Restaurant,Jewelry Store,Pizza Place,Pilates Studio,Sushi Restaurant,Pharmacy,Auto Garage,Fast Food Restaurant,Exhibit
532,ASHCHURCH GROVE,2425000.0,51.501121,-0.241411,4,Pub,Grocery Store,Bakery,Indian Restaurant,Coffee Shop,Park,Mediterranean Restaurant,Ice Cream Shop,Fish & Chips Shop,Café
533,ASHCHURCH PARK VILLAS,2600000.0,51.500051,-0.242173,3,Grocery Store,Pub,Park,Coffee Shop,Indian Restaurant,Mediterranean Restaurant,Bakery,Ice Cream Shop,Fish & Chips Shop,Moroccan Restaurant
675,AYNHOE ROAD,2475000.0,52.018806,-1.305945,4,Pub,Bar,Construction & Landscaping,Park,Food & Drink Shop,Food,Food Court,Flea Market,Fish Market,Fish & Chips Shop
833,BARRETT'S GROVE,2588470.0,51.552173,-0.077329,3,Turkish Restaurant,Cocktail Bar,Café,Coffee Shop,Bar,Yoga Studio,Cuban Restaurant,Pub,Middle Eastern Restaurant,Boutique
941,BEAR LANE,2600000.0,51.751622,-1.254984,3,Coffee Shop,Café,Pub,Sandwich Place,Restaurant,Art Gallery,Thai Restaurant,Bookstore,Bakery,Indian Restaurant
984,BEDALE STREET,2344600.0,54.815387,-1.454895,2,Gym,Auto Garage,Yoga Studio,Event Service,Exhibit,Fabric Shop,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant
1016,BEECHWOOD AVENUE,2500000.0,54.855528,-6.290581,1,Restaurant,Yoga Studio,Filipino Restaurant,Exhibit,Fabric Shop,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field


In [183]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(ldn_grp_clust['Latitude'], ldn_grp_clust['Longitude'], ldn_grp_clust['Street'], ldn_grp_clust['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [185]:
ldn_grp_clust.loc[ldn_grp_clust['Cluster Labels'] == 0, ldn_grp_clust.columns[[1] + list(range(5, ldn_grp_clust.shape[1]))]].sample(10)

Unnamed: 0,Avg_Price,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10300,2415894.0,Italian Restaurant,Café,Pub,Middle Eastern Restaurant,Coffee Shop,Garden,Tapas Restaurant,Grocery Store,Gourmet Shop,French Restaurant
1319,2376941.0,,,,,,,,,,
5525,2395000.0,Coffee Shop,French Restaurant,Italian Restaurant,Burger Joint,Cocktail Bar,Hotel,Jewelry Store,Café,Clothing Store,English Restaurant
13729,2400000.0,Discount Store,Thrift / Vintage Store,Thai Restaurant,Mexican Restaurant,Donut Shop,Field,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant
10823,2407000.0,Coffee Shop,Bar,Gym,Yoga Studio,Gym / Fitness Center,Pub,Pizza Place,Park,Modern European Restaurant,Japanese Restaurant
3222,2409552.0,Italian Restaurant,Pub,Bakery,Restaurant,Clothing Store,Café,Cocktail Bar,Boutique,Gym / Fitness Center,Juice Bar
9761,2400000.0,,,,,,,,,,
1734,2400000.0,Park,Lounge,Convenience Store,Sandwich Place,Fast Food Restaurant,Event Space,Exhibit,Fabric Shop,Falafel Restaurant,Farm
9256,2400000.0,Baseball Field,Monument / Landmark,Trail,Park,Food & Drink Shop,Food,Food Court,Flea Market,Fish Market,Fish & Chips Shop
6071,2388000.0,Diner,Filipino Restaurant,Exhibit,Fabric Shop,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field,Yoga Studio


In [186]:
ldn_grp_clust.loc[ldn_grp_clust['Cluster Labels'] == 1, ldn_grp_clust.columns[[1] + list(range(5, ldn_grp_clust.shape[1]))]].sample(10)

Unnamed: 0,Avg_Price,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2209,2500000.0,Italian Restaurant,Dessert Shop,Café,Lebanese Restaurant,Pizza Place,Convenience Store,Park,Coffee Shop,Deli / Bodega,Ice Cream Shop
1862,2513551.0,Hotel,Theater,Palace,Coffee Shop,Historic Site,Sporting Goods Shop,Garden,Sandwich Place,Hotel Bar,Movie Theater
11336,2520000.0,Music Venue,Baseball Field,Photography Studio,Park,Fast Food Restaurant,Event Space,Exhibit,Fabric Shop,Falafel Restaurant,Farm
6336,2525000.0,Bus Stop,Construction & Landscaping,Grocery Store,Yoga Studio,Field,Fabric Shop,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant
7347,2519336.0,Café,Hotel,Gastropub,Pub,Pizza Place,River,Museum,Spa,Soccer Stadium,Gourmet Shop
1016,2500000.0,Restaurant,Yoga Studio,Filipino Restaurant,Exhibit,Fabric Shop,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field
10081,2502500.0,Furniture / Home Store,Electronics Store,Supermarket,Fast Food Restaurant,Yoga Studio,Filipino Restaurant,Fabric Shop,Falafel Restaurant,Farm,Farmers Market
8481,2514444.0,Café,Sandwich Place,Restaurant,Pub,Coffee Shop,Park,Hotel,Grocery Store,Bar,Plaza
6316,2500000.0,,,,,,,,,,
9866,2522500.0,Hotel,Café,Pub,Chinese Restaurant,Coffee Shop,Garden,Hostel,Indian Restaurant,Gym / Fitness Center,Greek Restaurant


In [187]:
ldn_grp_clust.loc[ldn_grp_clust['Cluster Labels'] == 2, ldn_grp_clust.columns[[1] + list(range(5, ldn_grp_clust.shape[1]))]].sample(10)

Unnamed: 0,Avg_Price,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6414,2350000.0,Café,Pub,Coffee Shop,Hotel,Sushi Restaurant,History Museum,Modern European Restaurant,Persian Restaurant,Restaurant,Burger Joint
3955,2315580.0,Deli / Bodega,Italian Restaurant,Wings Joint,Food,Bar,Liquor Store,Diner,Pharmacy,Bakery,Brazilian Restaurant
12008,2348825.0,Italian Restaurant,Men's Store,Gym,Field,Fabric Shop,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Filipino Restaurant
2541,2310000.0,Café,Pub,Clothing Store,Bakery,Italian Restaurant,Burger Joint,Gym / Fitness Center,Supermarket,Breakfast Spot,Juice Bar
984,2344600.0,Gym,Auto Garage,Yoga Studio,Event Service,Exhibit,Fabric Shop,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant
11264,2350000.0,Clothing Store,Restaurant,Hotel,Juice Bar,Gym / Fitness Center,Bakery,Italian Restaurant,Pub,Indian Restaurant,Recording Studio
4372,2352000.0,Hotel,Science Museum,Exhibit,Mediterranean Restaurant,Coffee Shop,Café,Garden,Restaurant,Bakery,Pub
11786,2344250.0,Dive Bar,Home Service,Campground,Yoga Studio,Exhibit,Fabric Shop,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant
3963,2350000.0,Pub,Field,Event Space,Exhibit,Fabric Shop,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Filipino Restaurant
9003,2355000.0,Park,Outdoors & Recreation,Arts & Crafts Store,Field,Exhibit,Fabric Shop,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant


In [200]:
ldn_grp_clust.loc[ldn_grp_clust['Cluster Labels'] == 3, ldn_grp_clust.columns[[1] + list(range(5, ldn_grp_clust.shape[1]))]].sample(10)

Unnamed: 0,Avg_Price,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10121,2550000.0,Construction & Landscaping,Bar,Filipino Restaurant,Exhibit,Fabric Shop,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field
8788,2600000.0,Pub,Farm,Field,Event Space,Exhibit,Fabric Shop,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Filipino Restaurant
7316,2600000.0,Business Service,Used Auto Dealership,Gastropub,Fast Food Restaurant,Yoga Studio,Field,Exhibit,Fabric Shop,Falafel Restaurant,Farm
6446,2555000.0,Japanese Restaurant,Coffee Shop,Italian Restaurant,Café,French Restaurant,Wine Bar,Restaurant,Chinese Restaurant,Bar,Beer Bar
5835,2598767.0,Convenience Store,Food & Drink Shop,Nightclub,Yoga Studio,Filipino Restaurant,Fabric Shop,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant
3037,2579833.0,Coffee Shop,Italian Restaurant,Café,Pub,Park,Grocery Store,Athletics & Sports,Mediterranean Restaurant,French Restaurant,Gym / Fitness Center
2713,2600000.0,,,,,,,,,,
2627,2555526.0,Outlet Store,Convenience Store,Grocery Store,Furniture / Home Store,Shopping Plaza,Yoga Studio,Farmers Market,Exhibit,Fabric Shop,Falafel Restaurant
2543,2562230.0,Grocery Store,Coffee Shop,Restaurant,Yoga Studio,Fast Food Restaurant,Exhibit,Fabric Shop,Falafel Restaurant,Farm,Farmers Market
12161,2550000.0,Hotel,Pizza Place,Café,Garden,Bakery,Palace,Exhibit,Restaurant,Indian Restaurant,Tram Station


In [202]:
ldn_grp_clust.loc[ldn_grp_clust['Cluster Labels'] == 4, ldn_grp_clust.columns[[1] + list(range(5, ldn_grp_clust.shape[1]))]].head(5)

Unnamed: 0,Avg_Price,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
495,2462500.0,Italian Restaurant,Indian Restaurant,Jewelry Store,Pizza Place,Pilates Studio,Sushi Restaurant,Pharmacy,Auto Garage,Fast Food Restaurant,Exhibit
532,2425000.0,Pub,Grocery Store,Bakery,Indian Restaurant,Coffee Shop,Park,Mediterranean Restaurant,Ice Cream Shop,Fish & Chips Shop,Café
675,2475000.0,Pub,Bar,Construction & Landscaping,Park,Food & Drink Shop,Food,Food Court,Flea Market,Fish Market,Fish & Chips Shop
1172,2425000.0,Theater,Coffee Shop,Italian Restaurant,Cocktail Bar,Bakery,Ice Cream Shop,Hotel,Dessert Shop,Museum,Indian Restaurant
1834,2455000.0,Grocery Store,Chinese Restaurant,Rental Car Location,Gas Station,Fast Food Restaurant,Event Space,Exhibit,Fabric Shop,Falafel Restaurant,Farm


**Results:**

Thanks to our model, we received information about which real estate in London are the most attractive for potential investors.
Now we can research objects by district to find out the most and least profitable places.


**Conclusion:**

So, according to LonRes Agent Survey Q4 2019, 45% of real estate experts believe that in the next 12 months, residential real estate in the UK expects an increase in the volume of sales transactions by an average of 5% (and this is a big figure for the British market). According to 56% of experts, in the next 12 months, housing prices will increase by 1-5%. As for rent, 70% of experts believe that its price will also increase by 1–5%.
To help investors make a bargain, we grouped London's neighborhoods to recommend locations and the current average price of real estate, where home buyers can make real estate investments. We have recommended profitable establishments according to the amenities and necessary amenities, such as elementary schools, high schools, hospitals and grocery stores.

First, we collected data on London real estate, and relative data on prices paid were obtained from the Land Registry (http://landregistry.data.gov.uk/). In addition, in order to explore and determine recommended locations in different places in accordance with the availability of amenities and fixed assets, we got access to the data through the FourSquare API and organized it as a data frame for visualization. By combining data on London real estate properties and the relative price paid from the HM land registry and data on the objects and necessary objects surrounding such objects through the FourSquare API, we were able to recommend profitable real estate investments.