# Exploring City of Manchester Using Foursquare API

This projects explore the city of Manchester using Foursquare API and identify the most common type of venue in the city and less common venue in the city.
The project aims to put skills such as web scraping, data cleaning, wrangling and accessing data using API.
The result shows that italian restaurant is the most common venues in Manchester followed by bars and coffeeshops.

Data source: wikipedia, GMmapping, Foursquare.

In [166]:
import numpy as np
import pandas as pd 
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)


import json 
import geojson
import geopandas as gpd
import geojsonio

from itertools import chain


from shapely.geometry import mapping, shape


from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe


# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans
from sklearn.datasets.samples_generator import make_blobs

import folium # map rendering library

from IPython.display import Image 
from IPython.core.display import HTML
from bs4 import BeautifulSoup as bs
import lxml

In [167]:
# Load data
wiki = requests.get("https://en.wikipedia.org/wiki/M_postcode_area")
load_data = bs(wiki.text,'html.parser')

In [168]:
latitude= 53.483959 
longitude= -2.244644

In [169]:
# find table from the data loaded.
table = load_data.find('table', class_ = 'wikitable')
table

<table class="wikitable sortable" style="font-size:95%">
<tbody><tr style="white-space:nowrap">
<th>Postcode district
</th>
<th><a href="/wiki/Post_town" title="Post town">Post town</a>
</th>
<th>Coverage
</th>
<th>Local authority area
</th></tr>
<tr>
<th>M1
</th>
<td>MANCHESTER
</td>
<td><a class="mw-redirect" href="/wiki/Manchester_City_Centre" title="Manchester City Centre">Piccadilly</a>, <a class="mw-redirect" href="/wiki/Manchester_City_Centre" title="Manchester City Centre">City Centre</a>, Market Street
</td>
<td><a href="/wiki/Manchester" title="Manchester">Manchester</a>
</td></tr>
<tr>
<th>M2
</th>
<td>MANCHESTER
</td>
<td><a href="/wiki/Deansgate" title="Deansgate">Deansgate</a>, <a class="mw-redirect" href="/wiki/Manchester_City_Centre" title="Manchester City Centre">City Centre</a>
</td>
<td>Manchester
</td></tr>
<tr>
<th>M3<br/><small>(Sectors 1, 2, 3, 4 and 9)</small>
</th>
<td>MANCHESTER
</td>
<td><a class="mw-redirect" href="/wiki/Manchester_City_Centre" title="Manche

In [170]:
# find rows for the table.
rows = table.find_all('tr')
rows

[<tr style="white-space:nowrap">
 <th>Postcode district
 </th>
 <th><a href="/wiki/Post_town" title="Post town">Post town</a>
 </th>
 <th>Coverage
 </th>
 <th>Local authority area
 </th></tr>, <tr>
 <th>M1
 </th>
 <td>MANCHESTER
 </td>
 <td><a class="mw-redirect" href="/wiki/Manchester_City_Centre" title="Manchester City Centre">Piccadilly</a>, <a class="mw-redirect" href="/wiki/Manchester_City_Centre" title="Manchester City Centre">City Centre</a>, Market Street
 </td>
 <td><a href="/wiki/Manchester" title="Manchester">Manchester</a>
 </td></tr>, <tr>
 <th>M2
 </th>
 <td>MANCHESTER
 </td>
 <td><a href="/wiki/Deansgate" title="Deansgate">Deansgate</a>, <a class="mw-redirect" href="/wiki/Manchester_City_Centre" title="Manchester City Centre">City Centre</a>
 </td>
 <td>Manchester
 </td></tr>, <tr>
 <th>M3<br/><small>(Sectors 1, 2, 3, 4 and 9)</small>
 </th>
 <td>MANCHESTER
 </td>
 <td><a class="mw-redirect" href="/wiki/Manchester_City_Centre" title="Manchester City Centre">City Centre</

In [171]:
# Extract content for each columns from each row
data = []
for row in rows:
    info = row.text.split('\n')[1:-1] # remove empty str (first and last items)
    data.append(info)
    
data[0:10]

[['Postcode district',
  '',
  'Post town',
  '',
  'Coverage',
  '',
  'Local authority area'],
 ['M1',
  '',
  'MANCHESTER',
  '',
  'Piccadilly, City Centre, Market Street',
  '',
  'Manchester'],
 ['M2', '', 'MANCHESTER', '', 'Deansgate, City Centre', '', 'Manchester'],
 ['M3(Sectors 1, 2, 3, 4 and 9)',
  '',
  'MANCHESTER',
  '',
  'City Centre, Deansgate,  Castlefield',
  '',
  'Manchester'],
 ['M3(Sectors 5, 6 and 7)',
  '',
  'SALFORD',
  '',
  'Blackfriars, Greengate, Trinity',
  '',
  'Salford'],
 ['M4',
  '',
  'MANCHESTER',
  '',
  'Ancoats, Northern Quarter, Strangeways',
  '',
  'Manchester'],
 ['M5',
  '',
  'SALFORD',
  '',
  'Ordsall, Seedley, Weaste, University',
  '',
  'Salford'],
 ['M6',
  '',
  'SALFORD',
  '',
  "Pendleton, Irlams o' th' Height, Langworthy, Seedley, Charlestown",
  '',
  'Salford'],
 ['M7',
  '',
  'SALFORD',
  '',
  'Higher Broughton, Cheetwood, Lower Broughton, Kersal',
  '',
  'Salford'],
 ['M8', '', 'MANCHESTER', '', 'Crumpsall, Cheetham Hill

In [172]:
# store information in dataframe.
df = pd.DataFrame(data[1:], columns=data[0])
df

Unnamed: 0,Postcode district,Unnamed: 2,Post town,Unnamed: 4,Coverage,Unnamed: 6,Local authority area
0,M1,,MANCHESTER,,"Piccadilly, City Centre, Market Street",,Manchester
1,M2,,MANCHESTER,,"Deansgate, City Centre",,Manchester
2,"M3(Sectors 1, 2, 3, 4 and 9)",,MANCHESTER,,"City Centre, Deansgate, Castlefield",,Manchester
3,"M3(Sectors 5, 6 and 7)",,SALFORD,,"Blackfriars, Greengate, Trinity",,Salford
4,M4,,MANCHESTER,,"Ancoats, Northern Quarter, Strangeways",,Manchester
5,M5,,SALFORD,,"Ordsall, Seedley, Weaste, University",,Salford
6,M6,,SALFORD,,"Pendleton, Irlams o' th' Height, Langworthy, S...",,Salford
7,M7,,SALFORD,,"Higher Broughton, Cheetwood, Lower Broughton, ...",,Salford
8,M8,,MANCHESTER,,"Crumpsall, Cheetham Hill",,Manchester
9,M9,,MANCHESTER,,"Harpurhey, Blackley",,Manchester


In [173]:
df.columns

Index(['Postcode district', '', 'Post town', '', 'Coverage', '',
       'Local authority area'],
      dtype='object')

In [174]:
df=df[["Postcode district","Post town","Coverage","Local authority area"]]
df

Unnamed: 0,Postcode district,Post town,Coverage,Local authority area
0,M1,MANCHESTER,"Piccadilly, City Centre, Market Street",Manchester
1,M2,MANCHESTER,"Deansgate, City Centre",Manchester
2,"M3(Sectors 1, 2, 3, 4 and 9)",MANCHESTER,"City Centre, Deansgate, Castlefield",Manchester
3,"M3(Sectors 5, 6 and 7)",SALFORD,"Blackfriars, Greengate, Trinity",Salford
4,M4,MANCHESTER,"Ancoats, Northern Quarter, Strangeways",Manchester
5,M5,SALFORD,"Ordsall, Seedley, Weaste, University",Salford
6,M6,SALFORD,"Pendleton, Irlams o' th' Height, Langworthy, S...",Salford
7,M7,SALFORD,"Higher Broughton, Cheetwood, Lower Broughton, ...",Salford
8,M8,MANCHESTER,"Crumpsall, Cheetham Hill",Manchester
9,M9,MANCHESTER,"Harpurhey, Blackley",Manchester


In [175]:
df.dtypes

Postcode district       object
Post town               object
Coverage                object
Local authority area    object
dtype: object

In [176]:
# filtered special postcode (postbox/large users/airport) out

df=df[:43]

df["Postcode district"][2]="M3"
df["Postcode district"][3]="M3"

df

Unnamed: 0,Postcode district,Post town,Coverage,Local authority area
0,M1,MANCHESTER,"Piccadilly, City Centre, Market Street",Manchester
1,M2,MANCHESTER,"Deansgate, City Centre",Manchester
2,M3,MANCHESTER,"City Centre, Deansgate, Castlefield",Manchester
3,M3,SALFORD,"Blackfriars, Greengate, Trinity",Salford
4,M4,MANCHESTER,"Ancoats, Northern Quarter, Strangeways",Manchester
5,M5,SALFORD,"Ordsall, Seedley, Weaste, University",Salford
6,M6,SALFORD,"Pendleton, Irlams o' th' Height, Langworthy, S...",Salford
7,M7,SALFORD,"Higher Broughton, Cheetwood, Lower Broughton, ...",Salford
8,M8,MANCHESTER,"Crumpsall, Cheetham Hill",Manchester
9,M9,MANCHESTER,"Harpurhey, Blackley",Manchester


In [177]:
def chainer(s):
    return list(chain.from_iterable(s.str.split(', ')))


In [178]:
manc_dist= pd.DataFrame({'Area': chainer(df['Coverage'])})
manc_dist=manc_dist.drop_duplicates()
manc_dist

Unnamed: 0,Area
0,Piccadilly
1,City Centre
2,Market Street
3,Deansgate
7,Castlefield
8,Blackfriars
9,Greengate
10,Trinity
11,Ancoats
12,Northern Quarter


In [179]:
manc= gpd.read_file('Data/gmauthorities.geojson')
print(manc.head())

  LGDNAME  AREA LGDCode  OBJECTID                     NAME AREA_CODE  \
0       0   0.0       0         0    Trafford District (B)       MTD   
1       0   0.0       0         0  Manchester District (B)       MTD   
2       0   0.0       0         0       Wigan District (B)       MTD   
3       0   0.0       0         0      Oldham District (B)       MTD   
4       0   0.0       0         0      Bolton District (B)       MTD   

              DESCRIPTIO                FILE_NAME  NUMBER  NUMBER0  \
0  Metropolitan District    TRAFFORD_DISTRICT_(B)      10       55   
1  Metropolitan District  MANCHESTER_DISTRICT_(B)       4       70   
2  Metropolitan District       WIGAN_DISTRICT_(B)      15       60   
3  Metropolitan District      OLDHAM_DISTRICT_(B)       8       48   
4  Metropolitan District      BOLTON_DISTRICT_(B)      13       50   

   POLYGON_ID  UNIT_ID       CODE   HECTARES TYPE_CODE  \
0       56556    18786  E08000009  10604.476        AA   
1       56591    18821  E08000

In [180]:
manc2=pd.DataFrame(manc)
manc2

Unnamed: 0,LGDNAME,AREA,LGDCode,OBJECTID,NAME,AREA_CODE,DESCRIPTIO,FILE_NAME,NUMBER,NUMBER0,POLYGON_ID,UNIT_ID,CODE,HECTARES,TYPE_CODE,DESCRIPT0,TYPE_COD0,DESCRIPT1,X,Y,geometry
0,0,0.0,0,0,Trafford District (B),MTD,Metropolitan District,TRAFFORD_DISTRICT_(B),10,55,56556,18786,E08000009,10604.476,AA,CIVIL ADMINISTRATION AREA,,,,,"(POLYGON ((-2.427710495 53.3888997755, -2.4277..."
1,0,0.0,0,0,Manchester District (B),MTD,Metropolitan District,MANCHESTER_DISTRICT_(B),4,70,56591,18821,E08000003,11564.826,AA,CIVIL ADMINISTRATION AREA,,,,,"(POLYGON ((-2.2863437319 53.3758308855, -2.285..."
2,0,0.0,0,0,Wigan District (B),MTD,Metropolitan District,WIGAN_DISTRICT_(B),15,60,56640,18868,E08000010,18817.091,AA,CIVIL ADMINISTRATION AREA,,,,,"(POLYGON ((-2.6121390164 53.4811225025, -2.612..."
3,0,0.0,0,0,Oldham District (B),MTD,Metropolitan District,OLDHAM_DISTRICT_(B),8,48,56458,18690,E08000004,14234.507,AA,CIVIL ADMINISTRATION AREA,,,,,"(POLYGON ((-2.0094719608 53.616764598, -2.0092..."
4,0,0.0,0,0,Bolton District (B),MTD,Metropolitan District,BOLTON_DISTRICT_(B),13,50,56602,18832,E08000001,13979.21,AA,CIVIL ADMINISTRATION AREA,,,,,"(POLYGON ((-2.354390447 53.5262240218, -2.3544..."
5,0,0.0,0,0,Rochdale District (B),MTD,Metropolitan District,ROCHDALE_DISTRICT_(B),4,44,56595,18825,E08000005,15812.803,AA,CIVIL ADMINISTRATION AREA,,,,,"(POLYGON ((-2.0308878311 53.6292249322, -2.030..."
6,0,0.0,0,0,Salford District (B),MTD,Metropolitan District,SALFORD_DISTRICT_(B),6,44,56575,18805,E08000006,9719.732,AA,CIVIL ADMINISTRATION AREA,,,,,"(POLYGON ((-2.4677003312 53.437426729, -2.4679..."
7,0,0.0,0,0,Bury District (B),MTD,Metropolitan District,BURY_DISTRICT_(B),9,38,56616,18846,E08000002,9946.009,AA,CIVIL ADMINISTRATION AREA,,,,,"(POLYGON ((-2.3253293023 53.6554399236, -2.324..."
8,0,0.0,0,0,Stockport District (B),MTD,Metropolitan District,STOCKPORT_DISTRICT_(B),12,50,56527,18757,E08000007,12604.032,AA,CIVIL ADMINISTRATION AREA,,,,,"(POLYGON ((-2.0928109838 53.4375223883, -2.092..."
9,0,0.0,0,0,Tameside District (B),MTD,Metropolitan District,TAMESIDE_DISTRICT_(B),15,44,56468,18700,E08000008,10315.421,AA,CIVIL ADMINISTRATION AREA,,,,,"(POLYGON ((-2.0928109838 53.4375223883, -2.092..."


## Exploring Map of Manchester

In [181]:
CLIENT_ID = 'QIK1D0ZS3O5Q0LQFW1B55UFZTS1SEJSNEQXZWBOREOKPCBYT' # Foursquare ID
CLIENT_SECRET = '11D3FUSHEDTKLWO1AZNGDTGHX45SSPRISCQGY5KUV2T1MGP5' # your Foursquare Secret
VERSION = '20190605' # Foursquare API version

In [182]:
# get the top 100 venues that are in Parkwoods within a radius of 500 meters.

LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude, 
    longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=QIK1D0ZS3O5Q0LQFW1B55UFZTS1SEJSNEQXZWBOREOKPCBYT&client_secret=11D3FUSHEDTKLWO1AZNGDTGHX45SSPRISCQGY5KUV2T1MGP5&v=20190605&ll=53.483959,-2.244644&radius=500&limit=100'

In [183]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5cf6e600351e3d1286b48a5f'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'City Centre',
  'headerFullLocation': 'City Centre, Manchester',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 126,
  'suggestedBounds': {'ne': {'lat': 53.4884590045, 'lng': -2.2370956949025502},
   'sw': {'lat': 53.479458995499996, 'lng': -2.25219230509745}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4ade0e2ff964a520426f21e3',
       'name': 'Royal Exchange Theatre',
       'location': {'address': "St Ann's Square",
        'lat': 53.48255513360243,
        'lng': -2.2447701252502883,
        'labeledLatLngs': [{'label': 'display',
          'la

In [184]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']


In [185]:
# clean the json and structure it into a pandas dataframe.
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Royal Exchange Theatre,Theater,53.482555,-2.24477
1,Grindsmith Coffee,Coffee Shop,53.485324,-2.246198
2,Pot Kettle Black,Coffee Shop,53.482725,-2.246399
3,Selfridges & Co,Department Store,53.48429,-2.243432
4,The Cosy Club,Bar,53.484744,-2.243265


In [189]:
# This function returns nearby venues retriec from foursquare.

def getNearbyVenues(names, radius=500):
    
    venues_list=[]
    for name in names:
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            latitude, 
            longitude, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name,  
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Area',  
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [190]:
latitude= 53.483959 
longitude= -2.244644
manc_venues = getNearbyVenues(names=manc_dist['Area'])

Piccadilly
City Centre
Market Street
Deansgate
 Castlefield
Blackfriars
Greengate
Trinity
Ancoats
Northern Quarter
Strangeways
Ordsall
Seedley
Weaste
University
Pendleton
Irlams o' th' Height
Langworthy
Charlestown
Higher Broughton
Cheetwood
Lower Broughton
Kersal
Crumpsall
Cheetham Hill
Harpurhey
Blackley
Clayton
Openshaw
Beswick
Ardwick
Longsight
Chorlton-on-Medlock
Fallowfield
Moss Side
Ladybarn
Rusholme
Hulme
Manchester Science Park
Old Trafford
Firswood
Whalley Range
Trafford Park
The Trafford Centre
Abbey Hey
Gorton
Levenshulme
Burnage
Didsbury
Withington
Chorlton-cum-Hardy
 Barlow Moor
Wythenshawe
Northenden
Sharston Industrial Area
Baguley
Brooklands (Manchester and Trafford)
Roundthorn Industrial Estate
Middleton
Alkrington
Chadderton
Prestwich
 Sedgley Park
 Simister
Radcliffe
Stoneclough
Swinton
Clifton
Pendlebury
Wardley
Agecroft
Worsley
Walkden
Boothstown
Mosley Common
Wardley Industrial Estate
Tyldesley
Astley
Eccles
Monton
Peel Green
Winton
Patricroft
Barton-upon-Irwell


In [191]:
print(manc_venues.shape)
manc_venues.head()

(10900, 5)


Unnamed: 0,Area,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Piccadilly,Royal Exchange Theatre,53.482555,-2.24477,Theater
1,Piccadilly,Grindsmith Coffee,53.485324,-2.246198,Coffee Shop
2,Piccadilly,Pot Kettle Black,53.482725,-2.246399,Coffee Shop
3,Piccadilly,Selfridges & Co,53.48429,-2.243432,Department Store
4,Piccadilly,The Cosy Club,53.484744,-2.243265,Bar


In [192]:
manc_venues.groupby('Area').count()

Unnamed: 0_level_0,Venue,Venue Latitude,Venue Longitude,Venue Category
Area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Barlow Moor,100,100,100,100
Castlefield,100,100,100,100
Sedgley Park,100,100,100,100
Simister,100,100,100,100
Abbey Hey,100,100,100,100
Agecroft,100,100,100,100
Alkrington,100,100,100,100
Ancoats,100,100,100,100
Ardwick,100,100,100,100
Astley,100,100,100,100


In [193]:
print('There are {} uniques categories.'.format(len(manc_venues['Venue Category'].unique())))

There are 51 uniques categories.


### Analysing each area

In [194]:
# one hot encoding
manc_onehot = pd.get_dummies(manc_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
manc_onehot['Area'] = manc_venues['Area'] 

# move neighborhood column to the first column
fixed_columns = [manc_onehot.columns[-1]] + list(manc_onehot.columns[:-1])
manc_onehot = manc_onehot[fixed_columns]

manc_onehot.head()

Unnamed: 0,Area,American Restaurant,Argentinian Restaurant,Asian Restaurant,Bar,Bike Shop,Bookstore,Boutique,Brazilian Restaurant,Burger Joint,Burrito Place,Café,Chocolate Shop,Clothing Store,Cocktail Bar,Coffee Shop,Department Store,Dessert Shop,Electronics Store,English Restaurant,French Restaurant,Furniture / Home Store,Gift Shop,Hotel,Indian Restaurant,Italian Restaurant,Latin American Restaurant,Library,Market,Mexican Restaurant,Modern European Restaurant,Museum,Pizza Place,Plaza,Portuguese Restaurant,Pub,Sandwich Place,Seafood Restaurant,Shopping Mall,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Stationery Store,Steakhouse,Sushi Restaurant,Tapas Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Vietnamese Restaurant,Wine Bar
0,Piccadilly,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
1,Piccadilly,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Piccadilly,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Piccadilly,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Piccadilly,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [196]:
# Group rows by area and by taking the mean of the frequency of occurrence of each category.

manc_grouped = manc_onehot.groupby('Area').mean().reset_index()
manc_grouped

Unnamed: 0,Area,American Restaurant,Argentinian Restaurant,Asian Restaurant,Bar,Bike Shop,Bookstore,Boutique,Brazilian Restaurant,Burger Joint,Burrito Place,Café,Chocolate Shop,Clothing Store,Cocktail Bar,Coffee Shop,Department Store,Dessert Shop,Electronics Store,English Restaurant,French Restaurant,Furniture / Home Store,Gift Shop,Hotel,Indian Restaurant,Italian Restaurant,Latin American Restaurant,Library,Market,Mexican Restaurant,Modern European Restaurant,Museum,Pizza Place,Plaza,Portuguese Restaurant,Pub,Sandwich Place,Seafood Restaurant,Shopping Mall,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Stationery Store,Steakhouse,Sushi Restaurant,Tapas Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Vietnamese Restaurant,Wine Bar
0,Barlow Moor,0.03,0.01,0.02,0.07,0.01,0.02,0.01,0.01,0.01,0.01,0.03,0.01,0.02,0.03,0.07,0.02,0.01,0.01,0.01,0.01,0.01,0.02,0.06,0.04,0.08,0.02,0.01,0.01,0.01,0.01,0.01,0.01,0.02,0.01,0.04,0.03,0.01,0.02,0.01,0.01,0.02,0.01,0.01,0.02,0.01,0.01,0.01,0.01,0.02,0.01,0.01
1,Castlefield,0.03,0.01,0.02,0.07,0.01,0.02,0.01,0.01,0.01,0.01,0.03,0.01,0.02,0.03,0.07,0.02,0.01,0.01,0.01,0.01,0.01,0.02,0.06,0.04,0.08,0.02,0.01,0.01,0.01,0.01,0.01,0.01,0.02,0.01,0.04,0.03,0.01,0.02,0.01,0.01,0.02,0.01,0.01,0.02,0.01,0.01,0.01,0.01,0.02,0.01,0.01
2,Sedgley Park,0.03,0.01,0.02,0.07,0.01,0.02,0.01,0.01,0.01,0.01,0.03,0.01,0.02,0.03,0.07,0.02,0.01,0.01,0.01,0.01,0.01,0.02,0.06,0.04,0.08,0.02,0.01,0.01,0.01,0.01,0.01,0.01,0.02,0.01,0.04,0.03,0.01,0.02,0.01,0.01,0.02,0.01,0.01,0.02,0.01,0.01,0.01,0.01,0.02,0.01,0.01
3,Simister,0.03,0.01,0.02,0.07,0.01,0.02,0.01,0.01,0.01,0.01,0.03,0.01,0.02,0.03,0.07,0.02,0.01,0.01,0.01,0.01,0.01,0.02,0.06,0.04,0.08,0.02,0.01,0.01,0.01,0.01,0.01,0.01,0.02,0.01,0.04,0.03,0.01,0.02,0.01,0.01,0.02,0.01,0.01,0.02,0.01,0.01,0.01,0.01,0.02,0.01,0.01
4,Abbey Hey,0.03,0.01,0.02,0.07,0.01,0.02,0.01,0.01,0.01,0.01,0.03,0.01,0.02,0.03,0.07,0.02,0.01,0.01,0.01,0.01,0.01,0.02,0.06,0.04,0.08,0.02,0.01,0.01,0.01,0.01,0.01,0.01,0.02,0.01,0.04,0.03,0.01,0.02,0.01,0.01,0.02,0.01,0.01,0.02,0.01,0.01,0.01,0.01,0.02,0.01,0.01
5,Agecroft,0.03,0.01,0.02,0.07,0.01,0.02,0.01,0.01,0.01,0.01,0.03,0.01,0.02,0.03,0.07,0.02,0.01,0.01,0.01,0.01,0.01,0.02,0.06,0.04,0.08,0.02,0.01,0.01,0.01,0.01,0.01,0.01,0.02,0.01,0.04,0.03,0.01,0.02,0.01,0.01,0.02,0.01,0.01,0.02,0.01,0.01,0.01,0.01,0.02,0.01,0.01
6,Alkrington,0.03,0.01,0.02,0.07,0.01,0.02,0.01,0.01,0.01,0.01,0.03,0.01,0.02,0.03,0.07,0.02,0.01,0.01,0.01,0.01,0.01,0.02,0.06,0.04,0.08,0.02,0.01,0.01,0.01,0.01,0.01,0.01,0.02,0.01,0.04,0.03,0.01,0.02,0.01,0.01,0.02,0.01,0.01,0.02,0.01,0.01,0.01,0.01,0.02,0.01,0.01
7,Ancoats,0.03,0.01,0.02,0.07,0.01,0.02,0.01,0.01,0.01,0.01,0.03,0.01,0.02,0.03,0.07,0.02,0.01,0.01,0.01,0.01,0.01,0.02,0.06,0.04,0.08,0.02,0.01,0.01,0.01,0.01,0.01,0.01,0.02,0.01,0.04,0.03,0.01,0.02,0.01,0.01,0.02,0.01,0.01,0.02,0.01,0.01,0.01,0.01,0.02,0.01,0.01
8,Ardwick,0.03,0.01,0.02,0.07,0.01,0.02,0.01,0.01,0.01,0.01,0.03,0.01,0.02,0.03,0.07,0.02,0.01,0.01,0.01,0.01,0.01,0.02,0.06,0.04,0.08,0.02,0.01,0.01,0.01,0.01,0.01,0.01,0.02,0.01,0.04,0.03,0.01,0.02,0.01,0.01,0.02,0.01,0.01,0.02,0.01,0.01,0.01,0.01,0.02,0.01,0.01
9,Astley,0.03,0.01,0.02,0.07,0.01,0.02,0.01,0.01,0.01,0.01,0.03,0.01,0.02,0.03,0.07,0.02,0.01,0.01,0.01,0.01,0.01,0.02,0.06,0.04,0.08,0.02,0.01,0.01,0.01,0.01,0.01,0.01,0.02,0.01,0.04,0.03,0.01,0.02,0.01,0.01,0.02,0.01,0.01,0.02,0.01,0.01,0.01,0.01,0.02,0.01,0.01


In [199]:
# Print each neighborhood along with the top 5 most common venues.
num_top_venues = 5

for x in manc_grouped['Area']:
    print("----"+x+"----")
    temp = manc_grouped[manc_grouped['Area'] == x].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

---- Barlow Moor----
                venue  freq
0  Italian Restaurant  0.08
1                 Bar  0.07
2         Coffee Shop  0.07
3               Hotel  0.06
4                 Pub  0.04


---- Castlefield----
                venue  freq
0  Italian Restaurant  0.08
1                 Bar  0.07
2         Coffee Shop  0.07
3               Hotel  0.06
4                 Pub  0.04


---- Sedgley Park----
                venue  freq
0  Italian Restaurant  0.08
1                 Bar  0.07
2         Coffee Shop  0.07
3               Hotel  0.06
4                 Pub  0.04


---- Simister----
                venue  freq
0  Italian Restaurant  0.08
1                 Bar  0.07
2         Coffee Shop  0.07
3               Hotel  0.06
4                 Pub  0.04


----Abbey Hey----
                venue  freq
0  Italian Restaurant  0.08
1                 Bar  0.07
2         Coffee Shop  0.07
3               Hotel  0.06
4                 Pub  0.04


----Agecroft----
                venue  freq
0  It

In [200]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [203]:
# Return the 10 most common venue
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Area']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
areas_venues_sorted = pd.DataFrame(columns=columns)
areas_venues_sorted['Area'] = manc_grouped['Area']

for ind in np.arange(manc_grouped.shape[0]):
    areas_venues_sorted.iloc[ind, 1:] = return_most_common_venues(manc_grouped.iloc[ind, :], num_top_venues)

areas_venues_sorted

Unnamed: 0,Area,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Barlow Moor,Italian Restaurant,Bar,Coffee Shop,Hotel,Pub,Indian Restaurant,American Restaurant,Café,Cocktail Bar,Sandwich Place
1,Castlefield,Italian Restaurant,Bar,Coffee Shop,Hotel,Pub,Indian Restaurant,American Restaurant,Café,Cocktail Bar,Sandwich Place
2,Sedgley Park,Italian Restaurant,Bar,Coffee Shop,Hotel,Pub,Indian Restaurant,American Restaurant,Café,Cocktail Bar,Sandwich Place
3,Simister,Italian Restaurant,Bar,Coffee Shop,Hotel,Pub,Indian Restaurant,American Restaurant,Café,Cocktail Bar,Sandwich Place
4,Abbey Hey,Italian Restaurant,Bar,Coffee Shop,Hotel,Pub,Indian Restaurant,American Restaurant,Café,Cocktail Bar,Sandwich Place
5,Agecroft,Italian Restaurant,Bar,Coffee Shop,Hotel,Pub,Indian Restaurant,American Restaurant,Café,Cocktail Bar,Sandwich Place
6,Alkrington,Italian Restaurant,Bar,Coffee Shop,Hotel,Pub,Indian Restaurant,American Restaurant,Café,Cocktail Bar,Sandwich Place
7,Ancoats,Italian Restaurant,Bar,Coffee Shop,Hotel,Pub,Indian Restaurant,American Restaurant,Café,Cocktail Bar,Sandwich Place
8,Ardwick,Italian Restaurant,Bar,Coffee Shop,Hotel,Pub,Indian Restaurant,American Restaurant,Café,Cocktail Bar,Sandwich Place
9,Astley,Italian Restaurant,Bar,Coffee Shop,Hotel,Pub,Indian Restaurant,American Restaurant,Café,Cocktail Bar,Sandwich Place
