# Cultural Spaces in the city of Buenos Aires - Code

### Libraries

In [1]:
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
import folium
from sklearn.cluster import KMeans

### Getting data

In [2]:
# https://data.buenosaires.gob.ar/dataset/espacios-culturales
cultural_spaces = pd.read_csv("espacios-culturales.csv")
cultural_spaces_clean = cultural_spaces[['FUNCION_PR',"ESTABLECIM",'LATITUD', 'LONGITUD', 'BARRIO']]

dic_venue_translations = {"BIBLIOTECA": "Library",
    "LIBRERIA":"Bookshop",
    "ESPACIO ESCENICO":"Theater Space",
    "ESPACIO DE EXHIBICION":"Exhibition Space",
    "CENTRO CULTURAL":"Cultural Center",
    "MONUMENTOS Y LUGARES HISTORICOS":"Monuments and Historical Sites",
    "BAR":"Bar",
    "ESPACIO DE FORMACION":"Comprehensive Training Space",
    "DISQUERIA":"Record Store",
    "CALESITA":"Carousel",
    "ESPACIO FERIAL":"Carnival",
    "SALA DE CINE":"Cinema"}
for value,i in zip(cultural_spaces_clean["FUNCION_PR"],range(3200)):
    translation = dic_venue_translations[value]
    cultural_spaces_clean.at[i,"FUNCION_PR"] = translation

venues_each_neighborhood = cultural_spaces_clean.groupby('BARRIO')['FUNCION_PR'].value_counts()
venues_each_neighborhood


BARRIO         FUNCION_PR                  
AGRONOMIA      Cultural Center                 4
               Library                         4
               Bar                             2
               Bookshop                        1
               Exhibition Space                1
                                              ..
VILLA URQUIZA  Theater Space                   3
               Library                         2
               Carousel                        1
               Comprehensive Training Space    1
               Record Store                    1
Name: FUNCION_PR, Length: 383, dtype: int64

In [3]:
# one hot encoding
caba_onehot = pd.get_dummies(cultural_spaces_clean[cultural_spaces_clean[["FUNCION_PR"]] != "DummySTR"]["FUNCION_PR"], prefix="", prefix_sep="")
# add neighborhood column back to dataframe
caba_onehot.insert(loc=0, column="Neighborhood", value=cultural_spaces_clean[['BARRIO']].values)

caba_grouped = caba_onehot.groupby('Neighborhood').mean().reset_index()
caba_grouped.head(5)

Unnamed: 0,Neighborhood,Bar,Bookshop,Carnival,Carousel,Cinema,Comprehensive Training Space,Cultural Center,Exhibition Space,Library,Monuments and Historical Sites,Record Store,Theater Space
0,AGRONOMIA,0.166667,0.083333,0.0,0.0,0.0,0.0,0.333333,0.083333,0.333333,0.0,0.0,0.0
1,ALMAGRO,0.082803,0.140127,0.0,0.006369,0.006369,0.127389,0.22293,0.031847,0.082803,0.0,0.006369,0.292994
2,BALVANERA,0.059055,0.259843,0.0,0.007874,0.007874,0.066929,0.07874,0.059055,0.240157,0.051181,0.007874,0.161417
3,BARRACAS,0.076923,0.076923,0.0,0.038462,0.0,0.115385,0.192308,0.076923,0.192308,0.153846,0.0,0.076923
4,BELGRANO,0.026549,0.300885,0.017699,0.017699,0.035398,0.079646,0.017699,0.088496,0.292035,0.026549,0.044248,0.053097


In [4]:
barrios_geo = r'barrios.json' # geojson file

# create a plain world map
address = 'Buenos Aires'
geolocator = Nominatim(user_agent="CABA")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
caba_map = folium.Map(location=[latitude, longitude], zoom_start=12)

In [5]:
# generate choropleth map using the total immigration of each country to Canada from 1980 to 2013
caba_map.choropleth(
    geo_data=barrios_geo,
    data=caba_grouped,
    columns=['Neighborhood', 'Bookshop'],
    key_on='feature.properties.barrio',
    fill_color='RdPu', 
    fill_opacity=0.7, 
    line_opacity=0.2,
    legend_name='Immigration to Canada'
)

# # display map
caba_map

In [6]:
num_top_venues = 5

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)

    return row_categories_sorted.index.values[0:num_top_venues]

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))
# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = caba_grouped['Neighborhood']

for ind in np.arange(caba_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(caba_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head(10)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,AGRONOMIA,Library,Cultural Center,Bar,Exhibition Space,Bookshop
1,ALMAGRO,Theater Space,Cultural Center,Bookshop,Comprehensive Training Space,Library
2,BALVANERA,Bookshop,Library,Theater Space,Cultural Center,Comprehensive Training Space
3,BARRACAS,Library,Cultural Center,Monuments and Historical Sites,Comprehensive Training Space,Theater Space
4,BELGRANO,Bookshop,Library,Exhibition Space,Comprehensive Training Space,Theater Space
5,BOCA,Exhibition Space,Theater Space,Bar,Monuments and Historical Sites,Cultural Center
6,BOEDO,Cultural Center,Bar,Theater Space,Bookshop,Library
7,CABALLITO,Bookshop,Library,Theater Space,Cultural Center,Comprehensive Training Space
8,CHACARITA,Monuments and Historical Sites,Theater Space,Cultural Center,Exhibition Space,Bookshop
9,COGHLAN,Theater Space,Library,Exhibition Space,Bookshop,Bar


In [49]:
from sklearn.preprocessing import StandardScaler

caba_grouped_clustering = caba_grouped.drop('Neighborhood', 1)
X = caba_grouped_clustering.values
X = np.nan_to_num(X)
cluster_dataset = StandardScaler().fit_transform(X)
cluster_dataset


# set number of clusters
kclusters = 3
# run k-means clustering
kmeans = KMeans(init="k-means++",n_clusters=kclusters, random_state=0).fit(cluster_dataset)
# check cluster labels generated for each row in the dataframe
kmeans.labels_


import copy
# add clustering labels
neighborhoods_venues_clusters= copy.deepcopy(neighborhoods_venues_sorted)
neighborhoods_venues_clusters.insert(0, 'Cluster Labels', kmeans.labels_)

neighborhoods_venues_clusters

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,2,AGRONOMIA,Library,Cultural Center,Bar,Exhibition Space,Bookshop
1,2,ALMAGRO,Theater Space,Cultural Center,Bookshop,Comprehensive Training Space,Library
2,1,BALVANERA,Bookshop,Library,Theater Space,Cultural Center,Comprehensive Training Space
3,2,BARRACAS,Library,Cultural Center,Monuments and Historical Sites,Comprehensive Training Space,Theater Space
4,1,BELGRANO,Bookshop,Library,Exhibition Space,Comprehensive Training Space,Theater Space
5,0,BOCA,Exhibition Space,Theater Space,Bar,Monuments and Historical Sites,Cultural Center
6,2,BOEDO,Cultural Center,Bar,Theater Space,Bookshop,Library
7,1,CABALLITO,Bookshop,Library,Theater Space,Cultural Center,Comprehensive Training Space
8,0,CHACARITA,Monuments and Historical Sites,Theater Space,Cultural Center,Exhibition Space,Bookshop
9,0,COGHLAN,Theater Space,Library,Exhibition Space,Bookshop,Bar


In [53]:
caba_map = folium.Map(location=[latitude, longitude],tiles="cartodbpositron", zoom_start=13) #cartodbdark_matter
# generate choropleth map using the total immigration of each country to Canada from 1980 to 2013
caba_map.choropleth(
    geo_data=barrios_geo,
    data=neighborhoods_venues_clusters,
    columns=['Neighborhood', 'Cluster Labels'],
    key_on='feature.properties.barrio',
    fill_color='Spectral', #RdYlBu 
    fill_opacity=0.5, 
    line_opacity=0.9,
    legend_name='Clusters of Barrios of CABA',
    threshold_scale=[1,2,3,4]
)

# # display map
caba_map

In [31]:
# add markers to map -- all cultural spaces with a marker, very slow
# for lat, lng, label in zip(cultural_spaces_clean['LATITUD'], cultural_spaces_clean['LONGITUD'], cultural_spaces_clean['ESTABLECIM']):
#     label = folium.Popup(label, parse_html=True)
#     folium.CircleMarker(
#         [lat, lng],
#         radius=5,
#         popup=label,
#         color='blue',
#         fill=True,
#         fill_color='#3186cc',
#         fill_opacity=0.7,
#         parse_html=False).add_to(caba_map)  
    
# caba_map