# IBM APPLIED DATA SCIENCE CAPSTONE

## Opening a new office building for an engineering company in Madrid, Spain.

#### First, we import all the libraries that we will use in this project:

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files


from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans


import folium # map rendering library

from bs4 import BeautifulSoup # library to parse HTML and XML documents

print('Libraries imported.')

Libraries imported.


#### Load the dataframe that contains all information related the districts in Madrid: 

#### Source: https://datos.gob.es/en

In [11]:
districts_df=pd.read_csv('madrid_districs.csv',sep=';',encoding='latin1')

In [26]:
col=['District_id','District','city_id','city','area_km2','population_dens','Latitude','Longitude']

In [27]:
districts_df.columns=col

In [28]:
districts_df

Unnamed: 0,District_id,District,city_id,city,area_km2,population_dens,Latitude,Longitude
0,79601,Centro,796,Madrid,5.21,25340.69,40.415347,-3.707371
1,79602,Arganzuela,796,Madrid,6.52,23306.44,40.402733,-3.695403
2,79603,Retiro,796,Madrid,5.42,21867.53,40.408072,-3.676729
3,79604,Salamanca,796,Madrid,5.36,26830.78,40.43,-3.677778
4,79605,Chamartín,796,Madrid,9.12,15723.25,40.453333,-3.6775
5,79606,Tetuan,796,Madrid,5.37,28664.25,40.460556,-3.7
6,79607,Chamberí,796,Madrid,4.73,29049.26,40.432792,-3.697186
7,79608,Fuencarral-El Pardo,796,Madrid,238.0,1003.0,40.478611,-3.709722
8,79609,Moncloa-Aravaca,796,Madrid,46.47,2515.26,40.435151,-3.718765
9,79610,Latina,796,Madrid,25.47,9183.75,40.402461,-3.741294


#### Get the latitude and longitude of Madrid through Geocode:

In [29]:
address = 'Madrid'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 40.4167047, -3.7035825.


#### So, in order to locate alll the district in the map, we will use the library Folium. 

In [32]:
# create map of Toronto using latitude and longitude values
map_Madrid = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, borough in zip(districts_df['Latitude'], districts_df['Longitude'], districts_df['District']):
    label = '{}'.format(borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.6,
        parse_html=False).add_to(map_Madrid)  
    
map_Madrid

#### For next steps, we will need information of venues for each district. In order to get that information we will use the API Foursquare:

In [33]:
CLIENT_ID = 'C5MWSXQPRH3D35CALJ4BQSQGGKFFOLJIRHZYD5TSHKZVTVE3' # your Foursquare ID
CLIENT_SECRET = 'UWKM4GYY1BZQ2GJ4QTM1I0DHLHALYIMGCIFBZE00PU044QSW' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: C5MWSXQPRH3D35CALJ4BQSQGGKFFOLJIRHZYD5TSHKZVTVE3
CLIENT_SECRET:UWKM4GYY1BZQ2GJ4QTM1I0DHLHALYIMGCIFBZE00PU044QSW


#### We select a radius of 500 and limit of 100, and save the info of the name, location, and category name of each venue:

In [34]:
radius = 500
LIMIT = 100

venues = []

for lat, long, borough in zip(districts_df['Latitude'], districts_df['Longitude'], districts_df['District']):
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    for venue in results:
        venues.append((
            borough,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

#### Finally, we save the data into a DataFrame:

In [40]:
venues_df = pd.DataFrame(venues)
venues_df.columns = ['District', 'District_lat', 'District_long', 'VenueName', 'VenueLat', 'VenueLong', 'VenueCat']
venues_df.head()

Unnamed: 0,District,District_lat,District_long,VenueName,VenueLat,VenueLong,VenueCat
0,Centro,40.415347,-3.707371,La Taberna de Mister Pinkleton,40.414536,-3.708108,Other Nightlife
1,Centro,40.415347,-3.707371,The Hat Madrid,40.414343,-3.70712,Hotel
2,Centro,40.415347,-3.707371,Plaza Mayor,40.415527,-3.707506,Plaza
3,Centro,40.415347,-3.707371,Plaza Menor,40.414192,-3.708494,Lounge
4,Centro,40.415347,-3.707371,Bodegas Ricla,40.414266,-3.708077,Wine Bar


#### In order to analyze the information, we create a new DataFrame, Madrid_onehot, in which we use the Pandas tool: get_dummies

In [45]:
# one hot encoding
Madrid_onehot = pd.get_dummies(venues_df[['VenueCat']], prefix="", prefix_sep="")

# add postal, borough and neighborhood column back to dataframe

Madrid_onehot['District'] = venues_df['District'] 

# move postal, borough and neighborhood column to the first column
fixed_columns = list(Madrid_onehot.columns[-1:]) + list(Madrid_onehot.columns[:-1])
Madrid_onehot = Madrid_onehot[fixed_columns]

print(Madrid_onehot.shape)
Madrid_onehot.head()

(709, 158)


Unnamed: 0,District,American Restaurant,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Art Studio,Asian Restaurant,Athletics & Sports,Auto Workshop,BBQ Joint,Bakery,Bar,Beer Bar,Beer Garden,Big Box Store,Bistro,Board Shop,Bookstore,Boutique,Bowling Alley,Boxing Gym,Brazilian Restaurant,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Burrito Place,Cafeteria,Café,Camera Store,Candy Store,Chinese Restaurant,Chocolate Shop,Church,Circus,Clothing Store,Cocktail Bar,Coffee Shop,Concert Hall,Convenience Store,Cosmetics Shop,Cuban Restaurant,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Fabric Shop,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Fish Market,Flea Market,Food,Food & Drink Shop,Food Stand,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden,Gas Station,Gastropub,General Entertainment,German Restaurant,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gymnastics Gym,Health & Beauty Service,Health Food Store,Herbs & Spices Store,Historic Site,History Museum,Hostel,Hot Dog Joint,Hotel,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Italian Restaurant,Japanese Restaurant,Juice Bar,Kebab Restaurant,Korean Restaurant,Latin American Restaurant,Liquor Store,Lounge,Market,Mediterranean Restaurant,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Moroccan Restaurant,Motorcycle Shop,Movie Theater,Museum,Music Venue,Nightclub,Noodle House,Opera House,Other Great Outdoors,Other Nightlife,Outdoors & Recreation,Paella Restaurant,Park,Pastry Shop,Peruvian Restaurant,Pet Store,Pharmacy,Pie Shop,Pizza Place,Plaza,Polish Restaurant,Pub,Ramen Restaurant,Resort,Restaurant,Road,Salad Place,Salvadoran Restaurant,Sandwich Place,Scenic Lookout,Science Museum,Seafood Restaurant,Shopping Mall,Snack Place,Soccer Field,South American Restaurant,Spa,Spanish Restaurant,Sporting Goods Shop,Steakhouse,Supermarket,Sushi Restaurant,Tapas Restaurant,Tattoo Parlor,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Toy / Game Store,Trade School,Train Station,Udon Restaurant,Wine Bar,Wine Shop,Women's Store
0,Centro,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Centro,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Centro,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Centro,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Centro,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0


#### Madrid_grouped DataFrame could be used to analyze the mean contribution of each category in each district:

In [66]:
Madrid_grouped = Madrid_onehot.groupby('District').mean().reset_index()
Madrid_grouped

Unnamed: 0,District,American Restaurant,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Art Studio,Asian Restaurant,Athletics & Sports,Auto Workshop,BBQ Joint,Bakery,Bar,Beer Bar,Beer Garden,Big Box Store,Bistro,Board Shop,Bookstore,Boutique,Bowling Alley,Boxing Gym,Brazilian Restaurant,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Burrito Place,Cafeteria,Café,Camera Store,Candy Store,Chinese Restaurant,Chocolate Shop,Church,Circus,Clothing Store,Cocktail Bar,Coffee Shop,Concert Hall,Convenience Store,Cosmetics Shop,Cuban Restaurant,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Fabric Shop,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Fish Market,Flea Market,Food,Food & Drink Shop,Food Stand,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden,Gas Station,Gastropub,General Entertainment,German Restaurant,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gymnastics Gym,Health & Beauty Service,Health Food Store,Herbs & Spices Store,Historic Site,History Museum,Hostel,Hot Dog Joint,Hotel,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Italian Restaurant,Japanese Restaurant,Juice Bar,Kebab Restaurant,Korean Restaurant,Latin American Restaurant,Liquor Store,Lounge,Market,Mediterranean Restaurant,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Moroccan Restaurant,Motorcycle Shop,Movie Theater,Museum,Music Venue,Nightclub,Noodle House,Opera House,Other Great Outdoors,Other Nightlife,Outdoors & Recreation,Paella Restaurant,Park,Pastry Shop,Peruvian Restaurant,Pet Store,Pharmacy,Pie Shop,Pizza Place,Plaza,Polish Restaurant,Pub,Ramen Restaurant,Resort,Restaurant,Road,Salad Place,Salvadoran Restaurant,Sandwich Place,Scenic Lookout,Science Museum,Seafood Restaurant,Shopping Mall,Snack Place,Soccer Field,South American Restaurant,Spa,Spanish Restaurant,Sporting Goods Shop,Steakhouse,Supermarket,Sushi Restaurant,Tapas Restaurant,Tattoo Parlor,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Toy / Game Store,Trade School,Train Station,Udon Restaurant,Wine Bar,Wine Shop,Women's Store
0,Arganzuela,0.0,0.023529,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.047059,0.0,0.011765,0.023529,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023529,0.023529,0.0,0.023529,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.011765,0.0,0.0,0.011765,0.011765,0.0,0.0,0.011765,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.023529,0.011765,0.011765,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.058824,0.011765,0.035294,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023529,0.0,0.0,0.0,0.011765,0.011765,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.023529,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.011765,0.011765,0.0,0.0,0.0,0.011765,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.011765,0.0,0.0,0.0,0.0,0.105882,0.0,0.0,0.011765,0.023529,0.0,0.011765,0.0,0.0,0.011765,0.0,0.0,0.0,0.117647,0.0,0.0,0.0,0.0,0.047059,0.011765,0.0,0.011765,0.0,0.011765,0.0,0.011765,0.011765,0.0,0.0,0.0,0.0
1,Barajas,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.233333,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.033333,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0
2,Carabanchel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.090909,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Centro,0.012346,0.0,0.0,0.0,0.012346,0.0,0.0,0.0,0.0,0.0,0.012346,0.024691,0.012346,0.0,0.0,0.037037,0.0,0.024691,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012346,0.0,0.0,0.0,0.0,0.0,0.037037,0.012346,0.012346,0.0,0.012346,0.0,0.0,0.012346,0.0,0.012346,0.0,0.0,0.0,0.0,0.0,0.012346,0.0,0.0,0.012346,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012346,0.0,0.0,0.0,0.0,0.012346,0.0,0.0,0.0,0.012346,0.0,0.0,0.0,0.0,0.012346,0.012346,0.074074,0.0,0.012346,0.024691,0.0,0.012346,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012346,0.024691,0.0,0.0,0.012346,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012346,0.0,0.012346,0.0,0.0,0.0,0.024691,0.0,0.0,0.0,0.0,0.0,0.123457,0.0,0.0,0.0,0.0,0.024691,0.012346,0.0,0.0,0.0,0.0,0.0,0.012346,0.0,0.0,0.0,0.0,0.0,0.098765,0.0,0.0,0.0,0.0,0.135802,0.0,0.0,0.0,0.012346,0.0,0.0,0.0,0.0,0.0,0.024691,0.0,0.0
4,Chamartín,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.056604,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037736,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037736,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.018868,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.018868,0.0,0.0,0.037736,0.0,0.018868,0.0,0.0,0.056604,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037736,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.056604,0.0,0.0,0.0,0.0,0.0,0.037736,0.0,0.0,0.018868,0.0,0.0,0.150943,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.132075,0.0,0.0,0.018868,0.018868,0.056604,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Chamberí,0.01,0.0,0.0,0.01,0.01,0.0,0.02,0.0,0.0,0.01,0.02,0.05,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.05,0.0,0.02,0.0,0.0,0.04,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.01,0.02,0.01,0.0,0.0,0.0,0.03,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.03,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.03,0.0,0.02,0.0,0.0,0.07,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.12,0.0,0.0,0.0,0.01,0.03,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0
6,Ciudad_Lineal,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.038462,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.153846,0.0,0.0,0.076923,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Fuencarral-El Pardo,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.026316,0.0,0.0,0.026316,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.105263,0.0,0.026316,0.0,0.0,0.026316,0.0,0.0,0.0,0.026316,0.0,0.026316,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.078947,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.026316,0.026316,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.026316,0.0,0.0,0.0
8,Hortaleza,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.095238,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.095238,0.047619,0.0,0.047619,0.0,0.0,0.047619,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.095238,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Latina,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0


#### As many categories appeard in the previous DataFrame, we will create a new one, with the most important venues per district:

In [67]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [83]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['District']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
Districts_venues_sorted = pd.DataFrame(columns=columns)
Districts_venues_sorted['District'] = Madrid_grouped['District']


for ind in np.arange(Madrid_grouped.shape[0]):
    Districts_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Madrid_grouped.iloc[ind, :], num_top_venues)

Districts_venues_sorted

Unnamed: 0,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Arganzuela,Spanish Restaurant,Restaurant,Grocery Store,Tapas Restaurant,Bakery,Gym / Fitness Center,Sandwich Place,Breakfast Spot,Beer Garden,Brewery
1,Barajas,Hotel,Restaurant,Spanish Restaurant,Tapas Restaurant,Coffee Shop,Japanese Restaurant,Bistro,Mexican Restaurant,Bar,Brewery
2,Carabanchel,Gym / Fitness Center,Bakery,Park,Nightclub,Pizza Place,Plaza,Metro Station,Burger Joint,Soccer Field,Tapas Restaurant
3,Centro,Tapas Restaurant,Plaza,Spanish Restaurant,Hostel,Bistro,Cocktail Bar,Restaurant,Bar,Bookstore,Market
4,Chamartín,Restaurant,Spanish Restaurant,Park,Grocery Store,Bakery,Tapas Restaurant,Gastropub,Coffee Shop,Pizza Place,Japanese Restaurant
5,Chamberí,Spanish Restaurant,Restaurant,Bar,Japanese Restaurant,Brewery,Café,Tapas Restaurant,Mexican Restaurant,Plaza,Italian Restaurant
6,Ciudad_Lineal,Spanish Restaurant,Supermarket,Gastropub,Burger Joint,Restaurant,Argentinian Restaurant,Ice Cream Shop,Gym / Fitness Center,Convenience Store,Café
7,Fuencarral-El Pardo,Clothing Store,Italian Restaurant,Restaurant,Fast Food Restaurant,Bakery,Burger Joint,Tapas Restaurant,Park,Ice Cream Shop,Cosmetics Shop
8,Hortaleza,Breakfast Spot,Pizza Place,Supermarket,Gym,Pharmacy,Restaurant,Cafeteria,Pub,Food,Donut Shop
9,Latina,Pizza Place,Bakery,Asian Restaurant,Falafel Restaurant,Park,Metro Station,Fast Food Restaurant,Grocery Store,Train Station,Deli / Bodega


#### In this point of the analysis, we decide that information according the area of the district and more important, population density, could be key factor por the location decision and we want to be part of the data that the Machine Learning algorithm take into account:

In [98]:
Madrid_grouped_clustering=pd.merge(Madrid_grouped, districts_df,how='left',on='District')
Madrid_grouped_clustering.drop(['District_id','city_id','city','Latitude','Longitude'],1,inplace=True)
Madrid_grouped_clustering.head()
Madrid_grouped_clustering['area_km2']=Madrid_grouped_clustering['area_km2']/(max(Madrid_grouped_clustering['area_km2']))
Madrid_grouped_clustering['population_dens']=Madrid_grouped_clustering['population_dens']/(max(Madrid_grouped_clustering['population_dens']))
Madrid_grouped_clustering

Unnamed: 0,District,American Restaurant,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Art Studio,Asian Restaurant,Athletics & Sports,Auto Workshop,BBQ Joint,Bakery,Bar,Beer Bar,Beer Garden,Big Box Store,Bistro,Board Shop,Bookstore,Boutique,Bowling Alley,Boxing Gym,Brazilian Restaurant,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Burrito Place,Cafeteria,Café,Camera Store,Candy Store,Chinese Restaurant,Chocolate Shop,Church,Circus,Clothing Store,Cocktail Bar,Coffee Shop,Concert Hall,Convenience Store,Cosmetics Shop,Cuban Restaurant,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Fabric Shop,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Fish Market,Flea Market,Food,Food & Drink Shop,Food Stand,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden,Gas Station,Gastropub,General Entertainment,German Restaurant,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gymnastics Gym,Health & Beauty Service,Health Food Store,Herbs & Spices Store,Historic Site,History Museum,Hostel,Hot Dog Joint,Hotel,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Italian Restaurant,Japanese Restaurant,Juice Bar,Kebab Restaurant,Korean Restaurant,Latin American Restaurant,Liquor Store,Lounge,Market,Mediterranean Restaurant,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Moroccan Restaurant,Motorcycle Shop,Movie Theater,Museum,Music Venue,Nightclub,Noodle House,Opera House,Other Great Outdoors,Other Nightlife,Outdoors & Recreation,Paella Restaurant,Park,Pastry Shop,Peruvian Restaurant,Pet Store,Pharmacy,Pie Shop,Pizza Place,Plaza,Polish Restaurant,Pub,Ramen Restaurant,Resort,Restaurant,Road,Salad Place,Salvadoran Restaurant,Sandwich Place,Scenic Lookout,Science Museum,Seafood Restaurant,Shopping Mall,Snack Place,Soccer Field,South American Restaurant,Spa,Spanish Restaurant,Sporting Goods Shop,Steakhouse,Supermarket,Sushi Restaurant,Tapas Restaurant,Tattoo Parlor,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Toy / Game Store,Trade School,Train Station,Udon Restaurant,Wine Bar,Wine Shop,Women's Store,area_km2,population_dens
0,Arganzuela,0.0,0.023529,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.047059,0.0,0.011765,0.023529,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023529,0.023529,0.0,0.023529,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.011765,0.0,0.0,0.011765,0.011765,0.0,0.0,0.011765,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.023529,0.011765,0.011765,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.058824,0.011765,0.035294,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023529,0.0,0.0,0.0,0.011765,0.011765,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.023529,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.011765,0.011765,0.0,0.0,0.0,0.011765,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.011765,0.0,0.0,0.0,0.0,0.105882,0.0,0.0,0.011765,0.023529,0.0,0.011765,0.0,0.0,0.011765,0.0,0.0,0.0,0.117647,0.0,0.0,0.0,0.0,0.047059,0.011765,0.0,0.011765,0.0,0.011765,0.0,0.011765,0.011765,0.0,0.0,0.0,0.0,0.027395,0.802308
1,Barajas,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.233333,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.033333,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.183025,0.037043
2,Carabanchel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.090909,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.059244,0.596121
3,Centro,0.012346,0.0,0.0,0.0,0.012346,0.0,0.0,0.0,0.0,0.0,0.012346,0.024691,0.012346,0.0,0.0,0.037037,0.0,0.024691,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012346,0.0,0.0,0.0,0.0,0.0,0.037037,0.012346,0.012346,0.0,0.012346,0.0,0.0,0.012346,0.0,0.012346,0.0,0.0,0.0,0.0,0.0,0.012346,0.0,0.0,0.012346,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012346,0.0,0.0,0.0,0.0,0.012346,0.0,0.0,0.0,0.012346,0.0,0.0,0.0,0.0,0.012346,0.012346,0.074074,0.0,0.012346,0.024691,0.0,0.012346,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012346,0.024691,0.0,0.0,0.012346,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012346,0.0,0.012346,0.0,0.0,0.0,0.024691,0.0,0.0,0.0,0.0,0.0,0.123457,0.0,0.0,0.0,0.0,0.024691,0.012346,0.0,0.0,0.0,0.0,0.0,0.012346,0.0,0.0,0.0,0.0,0.0,0.098765,0.0,0.0,0.0,0.0,0.135802,0.0,0.0,0.0,0.012346,0.0,0.0,0.0,0.0,0.0,0.024691,0.0,0.0,0.021891,0.872335
4,Chamartín,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.056604,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037736,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037736,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.018868,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.018868,0.0,0.0,0.037736,0.0,0.018868,0.0,0.0,0.056604,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037736,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.056604,0.0,0.0,0.0,0.0,0.0,0.037736,0.0,0.0,0.018868,0.0,0.0,0.150943,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.132075,0.0,0.0,0.018868,0.018868,0.056604,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038319,0.541262
5,Chamberí,0.01,0.0,0.0,0.01,0.01,0.0,0.02,0.0,0.0,0.01,0.02,0.05,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.05,0.0,0.02,0.0,0.0,0.04,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.01,0.02,0.01,0.0,0.0,0.0,0.03,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.03,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.03,0.0,0.02,0.0,0.0,0.07,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.12,0.0,0.0,0.0,0.01,0.03,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.019874,1.0
6,Ciudad_Lineal,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.038462,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.153846,0.0,0.0,0.076923,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.048403,0.635319
7,Fuencarral-El Pardo,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.026316,0.0,0.0,0.026316,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.105263,0.0,0.026316,0.0,0.0,0.026316,0.0,0.0,0.0,0.026316,0.0,0.026316,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.078947,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.026316,0.026316,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.026316,0.0,0.0,0.0,1.0,0.034528
8,Hortaleza,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.095238,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.095238,0.047619,0.0,0.047619,0.0,0.0,0.047619,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.095238,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.108697,0.240052
9,Latina,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.107017,0.316144


#### Once we have the DataFrame Madrid_grouped_clustering ready, we define the Machine learning algorithm:

#### in this case, we will use the unsupervised machine learning algorithm K-Means, because the final goal is to cluster all district in Madrid and figure out if there are some other districts with the same characteristics of Charmartin.

In [169]:
### set number of clusters
kclusters = 6

Madrid_grouped_clustering = Madrid_grouped.drop('District', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Madrid_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_ 

array([0, 1, 4, 1, 0, 1, 0, 1, 1, 4, 1, 4, 1, 1, 1, 5, 1, 1, 1, 2, 3])

#### We introduce the labels of the result of the algorithm in two DataFrames: Madrid_merged and districts_df_merged

In [170]:
Madrid_merged=Districts_venues_sorted.copy()
Madrid_merged["Cluster_labels"] = kmeans.labels_
Madrid_merged

Unnamed: 0,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster_labels
0,Arganzuela,Spanish Restaurant,Restaurant,Grocery Store,Tapas Restaurant,Bakery,Gym / Fitness Center,Sandwich Place,Breakfast Spot,Beer Garden,Brewery,0
1,Barajas,Hotel,Restaurant,Spanish Restaurant,Tapas Restaurant,Coffee Shop,Japanese Restaurant,Bistro,Mexican Restaurant,Bar,Brewery,1
2,Carabanchel,Gym / Fitness Center,Bakery,Park,Nightclub,Pizza Place,Plaza,Metro Station,Burger Joint,Soccer Field,Tapas Restaurant,4
3,Centro,Tapas Restaurant,Plaza,Spanish Restaurant,Hostel,Bistro,Cocktail Bar,Restaurant,Bar,Bookstore,Market,1
4,Chamartín,Restaurant,Spanish Restaurant,Park,Grocery Store,Bakery,Tapas Restaurant,Gastropub,Coffee Shop,Pizza Place,Japanese Restaurant,0
5,Chamberí,Spanish Restaurant,Restaurant,Bar,Japanese Restaurant,Brewery,Café,Tapas Restaurant,Mexican Restaurant,Plaza,Italian Restaurant,1
6,Ciudad_Lineal,Spanish Restaurant,Supermarket,Gastropub,Burger Joint,Restaurant,Argentinian Restaurant,Ice Cream Shop,Gym / Fitness Center,Convenience Store,Café,0
7,Fuencarral-El Pardo,Clothing Store,Italian Restaurant,Restaurant,Fast Food Restaurant,Bakery,Burger Joint,Tapas Restaurant,Park,Ice Cream Shop,Cosmetics Shop,1
8,Hortaleza,Breakfast Spot,Pizza Place,Supermarket,Gym,Pharmacy,Restaurant,Cafeteria,Pub,Food,Donut Shop,1
9,Latina,Pizza Place,Bakery,Asian Restaurant,Falafel Restaurant,Park,Metro Station,Fast Food Restaurant,Grocery Store,Train Station,Deli / Bodega,4


In [171]:
districts_df_merged=districts_df.copy()
districts_df_merged.sort_values('District',inplace=True)
districts_df_merged["Cluster_labels"] = kmeans.labels_
districts_df_merged.reset_index(drop=True)

Unnamed: 0,District_id,District,city_id,city,area_km2,population_dens,Latitude,Longitude,Cluster_labels
0,79602,Arganzuela,796,Madrid,6.52,23306.44,40.402733,-3.695403,0
1,79621,Barajas,796,Madrid,43.56,1076.06,40.470196,-3.58489,1
2,79611,Carabanchel,796,Madrid,14.1,17316.88,40.383669,-3.727989,4
3,79601,Centro,796,Madrid,5.21,25340.69,40.415347,-3.707371,1
4,79605,Chamartín,796,Madrid,9.12,15723.25,40.453333,-3.6775,0
5,79607,Chamberí,796,Madrid,4.73,29049.26,40.432792,-3.697186,1
6,79615,Ciudad_Lineal,796,Madrid,11.52,18455.56,40.45,-3.65,0
7,79608,Fuencarral-El Pardo,796,Madrid,238.0,1003.0,40.478611,-3.709722,1
8,79616,Hortaleza,796,Madrid,25.87,6973.33,40.469457,-3.640482,1
9,79610,Latina,796,Madrid,25.47,9183.75,40.402461,-3.741294,4


#### Finally, we can visualize all districts in the map, and using different colors for each cluster:

In [172]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.brg(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, bor, cluster in zip(districts_df_merged['Latitude'], districts_df_merged['Longitude'], districts_df_merged['District'], districts_df_merged['Cluster_labels']):
    label = folium.Popup('{} - Cluster {}'.format(bor, cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

#### Analysis of Charmartin cluster (label=0)

In [173]:
analysis_df1=Madrid_merged[Madrid_merged.Cluster_labels==0]
analysis_df1

Unnamed: 0,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster_labels
0,Arganzuela,Spanish Restaurant,Restaurant,Grocery Store,Tapas Restaurant,Bakery,Gym / Fitness Center,Sandwich Place,Breakfast Spot,Beer Garden,Brewery,0
4,Chamartín,Restaurant,Spanish Restaurant,Park,Grocery Store,Bakery,Tapas Restaurant,Gastropub,Coffee Shop,Pizza Place,Japanese Restaurant,0
6,Ciudad_Lineal,Spanish Restaurant,Supermarket,Gastropub,Burger Joint,Restaurant,Argentinian Restaurant,Ice Cream Shop,Gym / Fitness Center,Convenience Store,Café,0


In [176]:
analysis_df2=districts_df_merged.groupby('Cluster_labels')['population_dens'].mean().reset_index()
analysis_df2

Unnamed: 0,Cluster_labels,population_dens
0,0,19161.75
1,1,14848.466667
2,2,2026.82
3,3,7059.13
4,4,13998.073333
5,5,6934.37


#### In the k-means algorithm the population density is also included and this 3 district are also one of the most clouwded.


In [178]:
analysis_df3=Madrid_merged.sort_values('Cluster_labels')
analysis_df3

Unnamed: 0,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster_labels
0,Arganzuela,Spanish Restaurant,Restaurant,Grocery Store,Tapas Restaurant,Bakery,Gym / Fitness Center,Sandwich Place,Breakfast Spot,Beer Garden,Brewery,0
4,Chamartín,Restaurant,Spanish Restaurant,Park,Grocery Store,Bakery,Tapas Restaurant,Gastropub,Coffee Shop,Pizza Place,Japanese Restaurant,0
6,Ciudad_Lineal,Spanish Restaurant,Supermarket,Gastropub,Burger Joint,Restaurant,Argentinian Restaurant,Ice Cream Shop,Gym / Fitness Center,Convenience Store,Café,0
18,Vicalvaro,Pizza Place,Spanish Restaurant,Fast Food Restaurant,Grocery Store,Sandwich Place,Asian Restaurant,Ice Cream Shop,Breakfast Spot,Beer Bar,Café,1
17,Usera,Seafood Restaurant,Theater,Market,Bubble Tea Shop,Spanish Restaurant,Plaza,Chinese Restaurant,Fast Food Restaurant,Asian Restaurant,Noodle House,1
16,Tetuan,Spanish Restaurant,Grocery Store,Supermarket,Chinese Restaurant,Brazilian Restaurant,Brewery,Bookstore,Seafood Restaurant,Breakfast Spot,Resort,1
14,Salamanca,Spanish Restaurant,Restaurant,Mediterranean Restaurant,Seafood Restaurant,Tapas Restaurant,Burger Joint,Mexican Restaurant,Coffee Shop,Lounge,Wine Shop,1
13,Retiro,Spanish Restaurant,Supermarket,Bar,Tapas Restaurant,Museum,Mediterranean Restaurant,Board Shop,Boutique,Food & Drink Shop,Dog Run,1
12,Puente_de_Vallecas,Fast Food Restaurant,Grocery Store,Supermarket,Gym,Hotel,Breakfast Spot,Pub,Café,Camera Store,Market,1
10,Moncloa-Aravaca,Spanish Restaurant,Restaurant,Tapas Restaurant,Pub,Coffee Shop,Ice Cream Shop,Pizza Place,Italian Restaurant,Japanese Restaurant,Mediterranean Restaurant,1


### DISCUSSION

The data of the clusters shows that most of the districts can be included in cluster number 1 (blue) and are places more related to residential areas. 

The second big group is the red one, which include outer districts like Latina, Carabanchel or Moratalaz with similar characteristics. 

Some districts have unique characteristics, such us Villa de Vallecas, San Cristobal and San Blas Canillejas. The three of them are far from the city centre.

And finally, the green cluster, which contains the districts of Chamartin, Ciudad Lineal, and Arganzuela. The three of them are between the city centre and the most outer districts.
