## Capstone notebook - identifying potential tram stops in Manchester as a site for a new business##

In [1]:
import numpy as np 

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!pip install geopy
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

!pip install folium
import folium
print('Libraries imported.')

Libraries imported.


### Import tram stop locations and map them ###

In [2]:
tramstops = pd.read_csv('http://odata.tfgm.com/opendata/downloads/TfGMMetroRailStops.csv')
tramstops.head()

Unnamed: 0,NPTREF,SMSREF,GPSLAT,GPSLON,GMGRFE,GMGRFN,RSTNAM,MAINRD,SIDEST,NETTYP,GAZREF,NETWRF
0,9100ALTRNHM,,53.387668,-2.347158,377008,387924,ALTRINCHAM INTERCHANGE,STAMFORD NEW RD,MOSS LANE,R,E0028261,SJR032
1,9100ARDWICK,,53.471206,-2.213369,385934,397183,ARDWICK,DEVONSHIRE ST N,BLIND LANE,R,E0028484,EBR116
2,9100ASHBRYS,,53.471867,-2.194752,387150,397253,ASHBURYS,POTTERY LANE,JARMAIN STREET,R,N0074883,EBR115
3,9100ASHONUL,,53.491279,-2.094327,393839,399399,ASHTON UNDER LYNE,SACKVILLE ST,TURNER LANE,R,E0028492,EHR101
4,9100ATHERTN,,53.529109,-2.47799,368412,403710,ATHERTON,UPTON ROAD,BOLTON ROAD,R,E0028727,WKR069


In [3]:
#clean up data, we only need coordinates and stop name
tramstops = tramstops[['GPSLAT', 'GPSLON', 'RSTNAM']]
tramstops.columns = ['Latitude', 'Longitude', 'Station']
tramstops.head()

Unnamed: 0,Latitude,Longitude,Station
0,53.387668,-2.347158,ALTRINCHAM INTERCHANGE
1,53.471206,-2.213369,ARDWICK
2,53.471867,-2.194752,ASHBURYS
3,53.491279,-2.094327,ASHTON UNDER LYNE
4,53.529109,-2.47799,ATHERTON


### Putting the tram stops on a folium map ###

In [4]:
address = 'Manchester, United Kingdom'

geolocator = Nominatim(user_agent="tl-manchester-neigh")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Manchester are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Manchester are 53.4794892, -2.2451148.


In [5]:
# map tram stops onto folium map as markers
map_manchester = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, long, station in zip(tramstops['Latitude'], tramstops['Longitude'], tramstops['Station']):
    label = "{}".format(station)
    popup = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius=5,
        popup=popup,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_manchester)
    
map_manchester

### Get Foursquare data for nearby venues in relation to the tram stops ###  

In [6]:
# The code was removed by Watson Studio for sharing.

**As tram stops are relatively close together, a limit of up to 100 venues within a radius of 300 metres has been selected**

In [7]:
def getNearbyVenues(names, latitudes, longitudes, LIMIT = 100, radius=300):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Station', 
                  'Station Latitude', 
                  'Station Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [8]:
manchester_venues = getNearbyVenues(names=tramstops['Station'],
                                   latitudes=tramstops['Latitude'],
                                   longitudes=tramstops['Longitude'])

ALTRINCHAM INTERCHANGE
ARDWICK
ASHBURYS
ASHTON UNDER LYNE
ATHERTON
BURNAGE
BELLE VUE
BLACKROD
BRAMHALL
BOLTON INTERCHANGE
BROADBOTTOM
BREDBURY
BROMLEY CROSS
BRINNINGTON
BRYN
CHASSEN ROAD
CHEADLE HULME
CLIFTON
CASTLETON
DAISY HILL
DAVENPORT
DENTON
ECCLES
EAST DIDSBURY
FLIXTON
FLOWERY FIELD
FAIRFIELD
FARNWORTH
GATHURST
GATLEY
GREENFIELD
GUIDE BRIDGE
GODLEY
GORTON
HAG FOLD
HALE
HATTERSLEY
HAZEL GROVE
HINDLEY
HALL I'TH' WOOD
HEALD GREEN
HUMPHREY PARK
HORWICH PARKWAY
HEATON CHAPEL
HYDE CENTRAL
HYDE NORTH
INCE
IRLAM
KEARSLEY
LITTLEBOROUGH
LOSTOCK
LEVENSHULME
MARPLE
MIDDLEWOOD
MAULDETH ROAD
MILLS HILL
DEANSGATE
MANCHESTER AIRPORT RAIL STATION
MANCHESTER OXFORD ROAD
MANCHESTER PICCADILLY
MANCHESTER UNITED FOOTBALL GROUND
MANCHESTER VICTORIA
MOSSLEY
MOORSIDE
MOSES GATE
MOSTON
NAVIGATION ROAD
NEWTON FOR HYDE
ORRELL
PEMBERTON
PATRICROFT
ROCHDALE
REDDISH NORTH
REDDISH SOUTH
ROSE HILL
ROMILEY
RYDER BROW
STALYBRIDGE
SALFORD CRESCENT
SALFORD CENTRAL
SMITHY BRIDGE
STOCKPORT
SWINTON
TRAFFORD PARK
URMST

In [9]:
# check venues local to tram stations have been returned, and how many types of venue there are
print(manchester_venues.shape)
print('There are {} unique categories of venue.'.format(len(manchester_venues['Venue Category'].unique())))
manchester_venues.head()

(1598, 7)
There are 203 unique categories of venue.


Unnamed: 0,Station,Station Latitude,Station Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,ALTRINCHAM INTERCHANGE,53.387668,-2.347158,Pi Altrincham,53.387497,-2.351508,Bar
1,ALTRINCHAM INTERCHANGE,53.387668,-2.347158,Tre Ciccio,53.386496,-2.348426,Italian Restaurant
2,ALTRINCHAM INTERCHANGE,53.387668,-2.347158,Caffè Nero,53.387259,-2.350751,Coffee Shop
3,ALTRINCHAM INTERCHANGE,53.387668,-2.347158,Rhode Island Coffee,53.386762,-2.350638,Coffee Shop
4,ALTRINCHAM INTERCHANGE,53.387668,-2.347158,Nando's,53.38706,-2.348962,Portuguese Restaurant


### One hot encoding and grouping venues by frequency ###

In [10]:
# one hot encoding
manchester_onehot = pd.get_dummies(manchester_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
manchester_onehot['Station'] = manchester_venues['Station'] 

# move neighborhood column to the first column
fixed_columns = [manchester_onehot.columns[-1]] + list(manchester_onehot.columns[:-1])
manchester_onehot = manchester_onehot[fixed_columns]

manchester_onehot.head()

Unnamed: 0,Station,Accessories Store,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Garage,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bar,Bed & Breakfast,Beer Bar,Betting Shop,Bistro,Boat or Ferry,Bookstore,Bowling Alley,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Station,Bus Stop,Business Service,Café,Canal,Canal Lock,Candy Store,Caribbean Restaurant,Casino,Caucasian Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Clothing Store,Cocktail Bar,Coffee Shop,Comedy Club,Comic Shop,Concert Hall,Construction & Landscaping,Convenience Store,Cricket Ground,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Dive Bar,Doctor's Office,Donut Shop,Eastern European Restaurant,Electronics Store,English Restaurant,Escape Room,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Film Studio,Fish & Chips Shop,Flea Market,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,Gift Shop,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Harbor / Marina,Hardware Store,Health & Beauty Service,Health Food Store,History Museum,Hobby Shop,Home Service,Hookah Bar,Hostel,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indoor Play Area,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Kitchen Supply Store,Lake,Laser Tag,Latin American Restaurant,Lebanese Restaurant,Light Rail Station,Lingerie Store,Lounge,Market,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Mini Golf,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Motorcycle Shop,Movie Theater,Moving Target,Multiplex,Museum,Music Store,Music Venue,New American Restaurant,Nightclub,Optical Shop,Other Nightlife,Outdoor Supply Store,Paper / Office Supplies Store,Park,Performing Arts Venue,Persian Restaurant,Pet Store,Pharmacy,Photography Studio,Physical Therapist,Pie Shop,Pizza Place,Platform,Playground,Plaza,Pool,Pool Hall,Portuguese Restaurant,Pub,Ramen Restaurant,Record Shop,Rental Car Location,Rest Area,Restaurant,River,Rock Club,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,Shoe Store,Shopping Mall,Shopping Plaza,Skate Park,Skating Rink,Snack Place,Soccer Field,Soccer Stadium,Souvenir Shop,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Sports Club,Sri Lankan Restaurant,Stadium,Stationery Store,Steakhouse,Supermarket,Sushi Restaurant,Taco Place,Tanning Salon,Tapas Restaurant,Tea Room,Temple,Tennis Court,Tennis Stadium,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Track Stadium,Train Station,Tram Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Warehouse Store,Wine Bar,Women's Store,Yoga Studio
0,ALTRINCHAM INTERCHANGE,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,ALTRINCHAM INTERCHANGE,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,ALTRINCHAM INTERCHANGE,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,ALTRINCHAM INTERCHANGE,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,ALTRINCHAM INTERCHANGE,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [11]:
manchester_onehot.shape

(1598, 204)

In [12]:
# group rows by station and group frequency of each category
manchester_grouped = manchester_onehot.groupby('Station').mean().reset_index()
manchester_grouped.head()

Unnamed: 0,Station,Accessories Store,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Garage,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bar,Bed & Breakfast,Beer Bar,Betting Shop,Bistro,Boat or Ferry,Bookstore,Bowling Alley,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Station,Bus Stop,Business Service,Café,Canal,Canal Lock,Candy Store,Caribbean Restaurant,Casino,Caucasian Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Clothing Store,Cocktail Bar,Coffee Shop,Comedy Club,Comic Shop,Concert Hall,Construction & Landscaping,Convenience Store,Cricket Ground,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Dive Bar,Doctor's Office,Donut Shop,Eastern European Restaurant,Electronics Store,English Restaurant,Escape Room,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Film Studio,Fish & Chips Shop,Flea Market,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,Gift Shop,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Harbor / Marina,Hardware Store,Health & Beauty Service,Health Food Store,History Museum,Hobby Shop,Home Service,Hookah Bar,Hostel,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indoor Play Area,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Kitchen Supply Store,Lake,Laser Tag,Latin American Restaurant,Lebanese Restaurant,Light Rail Station,Lingerie Store,Lounge,Market,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Mini Golf,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Motorcycle Shop,Movie Theater,Moving Target,Multiplex,Museum,Music Store,Music Venue,New American Restaurant,Nightclub,Optical Shop,Other Nightlife,Outdoor Supply Store,Paper / Office Supplies Store,Park,Performing Arts Venue,Persian Restaurant,Pet Store,Pharmacy,Photography Studio,Physical Therapist,Pie Shop,Pizza Place,Platform,Playground,Plaza,Pool,Pool Hall,Portuguese Restaurant,Pub,Ramen Restaurant,Record Shop,Rental Car Location,Rest Area,Restaurant,River,Rock Club,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,Shoe Store,Shopping Mall,Shopping Plaza,Skate Park,Skating Rink,Snack Place,Soccer Field,Soccer Stadium,Souvenir Shop,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Sports Club,Sri Lankan Restaurant,Stadium,Stationery Store,Steakhouse,Supermarket,Sushi Restaurant,Taco Place,Tanning Salon,Tapas Restaurant,Tea Room,Temple,Tennis Court,Tennis Stadium,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Track Stadium,Train Station,Tram Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Warehouse Store,Wine Bar,Women's Store,Yoga Studio
0,ALTRINCHAM INTERCHANGE,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.178571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.107143,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.035714,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,ARDWICK,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,ASHBURYS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.666667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,ASHTON UNDER LYNE,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.083333,0.0,0.0,0.0
4,ATHERTON,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
manchester_grouped.shape

(186, 204)

**Organising stations to show most common venues near them**

In [14]:

# top 5 venues for each station
num_top_venues = 5

for station in manchester_grouped['Station']:
    print("----"+station+"----")
    temp = manchester_grouped[manchester_grouped['Station'] == station].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----ALTRINCHAM INTERCHANGE----
              venue  freq
0       Coffee Shop  0.18
1  Department Store  0.11
2               Pub  0.07
3    Clothing Store  0.07
4         Bookstore  0.07


----ARDWICK----
               venue  freq
0   Business Service  0.25
1               Café  0.25
2     Sandwich Place  0.25
3      Train Station  0.25
4  Accessories Store  0.00


----ASHBURYS----
                venue  freq
0            Bus Stop  0.67
1   Food & Drink Shop  0.33
2   Accessories Store  0.00
3  Photography Studio  0.00
4     Other Nightlife  0.00


----ASHTON UNDER LYNE----
                  venue  freq
0                   Pub  0.08
1  Gym / Fitness Center  0.08
2           Coffee Shop  0.08
3             Bookstore  0.08
4       Warehouse Store  0.08


----ATHERTON----
                venue  freq
0  Chinese Restaurant  0.25
1                 Gym  0.25
2       Train Station  0.25
3         Pizza Place  0.25
4   Accessories Store  0.00


----Abraham Moss----
                venue  freq


In [15]:
# sort venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [16]:
# df of top 10 venues for each station
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Station']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe?
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Station'] = manchester_grouped['Station']

for ind in np.arange(manchester_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(manchester_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Station,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,ALTRINCHAM INTERCHANGE,Coffee Shop,Department Store,Bookstore,Pub,Clothing Store,Gym Pool,Gym / Fitness Center,Movie Theater,Furniture / Home Store,French Restaurant
1,ARDWICK,Business Service,Café,Train Station,Sandwich Place,Yoga Studio,Farmers Market,French Restaurant,Food Truck,Food Court,Food & Drink Shop
2,ASHBURYS,Bus Stop,Food & Drink Shop,Yoga Studio,Farmers Market,Fried Chicken Joint,French Restaurant,Food Truck,Food Court,Flea Market,Fish & Chips Shop
3,ASHTON UNDER LYNE,Furniture / Home Store,Coffee Shop,Bookstore,Shopping Mall,Market,Gym / Fitness Center,Sandwich Place,Pet Store,Clothing Store,Warehouse Store
4,ATHERTON,Chinese Restaurant,Train Station,Pizza Place,Gym,Falafel Restaurant,French Restaurant,Food Truck,Food Court,Food & Drink Shop,Flea Market


### Using k-means clusters to cluster similar stations together ###

In [17]:
# set number of clusters to 5
kclusters = 5

manchester_grouped_clustering = manchester_grouped.drop('Station', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(manchester_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([2, 3, 2, 2, 3, 2, 2, 2, 1, 2], dtype=int32)

In [19]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

manchester_merged = tramstops

#merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
manchester_merged = manchester_merged.join(neighborhoods_venues_sorted.set_index('Station'), on='Station')
#drop NaN clusters and convert remaining clusters to int for mapping
manchester_merged=manchester_merged.dropna()
manchester_merged['Cluster Labels'] = manchester_merged['Cluster Labels'].apply(lambda x: int(x) if x == x else "")

manchester_merged.head()

Unnamed: 0,Latitude,Longitude,Station,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,53.387668,-2.347158,ALTRINCHAM INTERCHANGE,2,Coffee Shop,Department Store,Bookstore,Pub,Clothing Store,Gym Pool,Gym / Fitness Center,Movie Theater,Furniture / Home Store,French Restaurant
1,53.471206,-2.213369,ARDWICK,3,Business Service,Café,Train Station,Sandwich Place,Yoga Studio,Farmers Market,French Restaurant,Food Truck,Food Court,Food & Drink Shop
2,53.471867,-2.194752,ASHBURYS,2,Bus Stop,Food & Drink Shop,Yoga Studio,Farmers Market,Fried Chicken Joint,French Restaurant,Food Truck,Food Court,Flea Market,Fish & Chips Shop
3,53.491279,-2.094327,ASHTON UNDER LYNE,2,Furniture / Home Store,Coffee Shop,Bookstore,Shopping Mall,Market,Gym / Fitness Center,Sandwich Place,Pet Store,Clothing Store,Warehouse Store
4,53.529109,-2.47799,ATHERTON,3,Chinese Restaurant,Train Station,Pizza Place,Gym,Falafel Restaurant,French Restaurant,Food Truck,Food Court,Food & Drink Shop,Flea Market


**Map the clustered stations**

In [20]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(manchester_merged['Latitude'], manchester_merged['Longitude'], manchester_merged['Station'], manchester_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [24]:
# selecting the second cluster
final_result = manchester_merged.loc[manchester_merged['Cluster Labels'] == 2]
final_result.head()

Unnamed: 0,Latitude,Longitude,Station,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,53.387668,-2.347158,ALTRINCHAM INTERCHANGE,2,Coffee Shop,Department Store,Bookstore,Pub,Clothing Store,Gym Pool,Gym / Fitness Center,Movie Theater,Furniture / Home Store,French Restaurant
2,53.471867,-2.194752,ASHBURYS,2,Bus Stop,Food & Drink Shop,Yoga Studio,Farmers Market,Fried Chicken Joint,French Restaurant,Food Truck,Food Court,Flea Market,Fish & Chips Shop
3,53.491279,-2.094327,ASHTON UNDER LYNE,2,Furniture / Home Store,Coffee Shop,Bookstore,Shopping Mall,Market,Gym / Fitness Center,Sandwich Place,Pet Store,Clothing Store,Warehouse Store
5,53.421964,-2.215229,BURNAGE,2,Record Shop,Bar,Dessert Shop,Rental Car Location,Café,Train Station,Sandwich Place,Grocery Store,Food Court,Food & Drink Shop
6,53.461787,-2.180187,BELLE VUE,2,Supermarket,Bus Stop,Chinese Restaurant,Yoga Studio,Fast Food Restaurant,Fried Chicken Joint,French Restaurant,Food Truck,Food Court,Food & Drink Shop


In [25]:
final_result.shape

(101, 14)

In [26]:
# map tram stops onto folium map as markers
final_map = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, long, station in zip(final_result['Latitude'], final_result['Longitude'], final_result['Station']):
    label = "{}".format(station)
    popup = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius=5,
        popup=popup,
        color='green',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(final_map)
    
final_map