In [None]:
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.cm as cm
import plotly.graph_objects as go
import seaborn as sns
import folium
import branca
from plot_init import setup_plotting
setup_plotting()

In [None]:
import numpy as np
from numpy.linalg import norm as norm
from scipy.optimize import linear_sum_assignment
import pandas as pd

import os, sys
from tqdm import trange, tqdm

In [None]:
# spectral clustering
from numpy import unique
from numpy import where
from sklearn.datasets import make_classification
from sklearn.cluster import SpectralClustering, KMeans, AgglomerativeClustering

Add OSM POIs Ontology

In [None]:
sys.path.insert(0, '01.osm')
from tags_custom import ontology

Ontologize data using POIs Ontology scheme

In [None]:
def ontologize_pois(df):
    # What categories there are?
    categories = []
    for category_I in ontology.keys():

        for category_II in ontology[category_I].keys():
            if type(ontology[category_I][category_II]) is dict:
                for category_III in ontology[category_I][category_II].keys():
                    if type(ontology[category_I][category_II][category_III]) is dict:
                        for category_IV in ontology[category_I][category_II][category_III].keys():
                            if type(ontology[category_I][category_II][category_III][category_IV]) is dict:
                                categories.append(
                                    category_I + ':' + category_II + ':' + category_III + ':' + category_IV)

                            else:
                                categories.append(
                                    category_I + ':' + category_II + ':' + category_III + ':' + category_IV)
                    else:
                        categories.append(category_I + ':' + category_II + ':' + category_III)
            else:
                categories.append(category_I + ':' + category_II)

    df_ontolized = pd.DataFrame(columns=categories)

    # Ontologize the data

    for category in categories:
        cat = category.split(':')
        try:
            if len(cat) == 0:
                pass
            elif len(cat) == 1:
                df_ontolized[category] = df[ontology[cat[0]]].sum(axis=1)
            elif len(cat) == 2:
                df_ontolized[category] = df[ontology[cat[0]][cat[1]]].sum(axis=1)
            elif len(cat) == 3:
                df_ontolized[category] = df[ontology[cat[0]][cat[1]][cat[2]]].sum(axis=1)
            elif len(cat) == 4:
                df_ontolized[category] = df[ontology[cat[0]][cat[1]][cat[2]][cat[3]]].sum(axis=1)
        except KeyError:
            continue

    return df_ontolized

Read OSM POIs data

In [None]:
def read_pois(ontologize=False, pois_directory=None, bbox_pois=None):
    data_pois = {}

    if type(bbox_pois) == list:
        _BBOXES_POIs = bbox_pois
    elif type(bbox_pois) == int:
        if bbox_pois in _BBOXES:
            _BBOXES_POIs = [bbox_pois]
        else:
            _BBOXES_POIs = _BBOXES

    with tqdm(_BBOXES_POIs) as bbox_it:
        for bbox in bbox_it:
            bbox_it.set_description('Reading POIS ({})                 '.format(bbox))

            data_pois[bbox] = pd.read_csv(os.path.join(pois_directory, 'pois_' + str(bbox) + '.csv')).fillna(0.0)
            data_pois[bbox].pop('Unnamed: 0')
            data_pois[bbox].pop('index')

            if ontologize:
                data_pois[bbox] = ontologize_pois(data_pois[bbox])

    return data_pois

# Barcelona Accidents

## POIs 10

In [None]:
_POIS_directory    = 'data/barcelona_2021/pois'
_BBox_POIs = [10]
_Use_Ontology = True

data_pois = read_pois(ontologize=_Use_Ontology, pois_directory=_POIS_directory, bbox_pois=_BBox_POIs)

Filter to circulation spaces

In [None]:
data = data_pois[10]
data_circulation_spaces_10 = data.filter(regex='circulation_spaces:')
data_circulation_spaces_10[data_circulation_spaces_10 > 0]=1

In [None]:
labels_circulation_spaces=[
    'Cycleways            ',
    'Cycleways SharedBus  ',
    'Cycleways SharedLane ',
    'Footways             ',
    'Crossings (Footways)    ',
    'Motorway       ',
    'Primary        ',
    'Secondary      ',
    'Tertiary       ',
    'Street         ',
    'Other          ',
    'Bridge         ',
    'Public Transportation',
    'Rail                 ',
    'Subway          ']

In [None]:
fig = go.Figure(data=[
    go.Bar(name="a", 
           x=labels_circulation_spaces, 
           y=data_circulation_spaces_10.sum(),
           marker_color=['lightgreen', 'lightgreen', 'lightgreen', 
                         'deepskyblue', 'deepskyblue', 
                         'wheat', 'wheat', 'wheat', 'wheat', 'wheat', 'wheat', 'wheat',
                         'tomato', 'thistle', 'thistle'
                         ],            
           textposition='auto',
           text=data_circulation_spaces_10.sum(),) 
])

fig.update_layout(
    title_text='Accident & Circuation Spaces', # title of plot
    xaxis_title_text='Infrastructure Types', # xaxis label
    yaxis_title_text='# of Accidents', # yaxis label
    bargap=0.05, # gap between bars of adjacent location coordinates
    bargroupgap=0.05, # gap between bars of the same location coordinates
    font_family="Liberation Serif",
    
)
fig.show()

### Clustering

#### Spectral clustering

In [None]:
K = 10

In [None]:
X = data_circulation_spaces_10.to_numpy()

model = SpectralClustering(n_clusters=K)

yhat_10 = model.fit_predict(X)
clusters = unique(yhat_10)

In [None]:
avg_spectral_clusters_10 = np.array([np.mean(X[yhat_10 == cluster_i], axis=0) for cluster_i in clusters])

plt.figure(figsize=(20,10))
figure = sns.heatmap(avg_spectral_clusters_10, annot=True, fmt='.3f', xticklabels=labels_circulation_spaces, vmax=2);
figure = figure.get_figure()

In [None]:
figure.savefig("images/clustering_sp_barcelona_10.png")

Plot accidents and their clusters

In [None]:
_Accidents_file    = 'data/barcelona_2021/cycling_safety_barcelona_2021.csv'
accidents = pd.read_csv(_Accidents_file)

In [None]:
df = pd.DataFrame([X[yhat_10 == cluster_i].shape[0] for cluster_i in clusters])
fig = go.Figure(data=[
    go.Bar(name='aaa', 
           x=df.index, 
           y=df[0],
           marker_color='crimson',
           textposition='auto',
           text=df[0])  
])
fig.update_layout(barmode='stack',
                  title_text='[Spectral Clustering] Samples per cluster',
                  xaxis=dict(
                      title='Cluster',
                      tickfont_size=14,
                      showticklabels=True,
                      tickmode = 'linear',),
                  yaxis=dict(
                      title='# of Samples',
                      titlefont_size=16,
                      tickfont_size=14,),
                  font_family="Arial",
                 )

fig.show()

In [None]:
cluster_i = 3
marker_colors = ['red', 'blue', 'green', 'purple', 'orange', 'darkred', 'lightred', 'beige', 'darkblue', 'darkgreen', 'cadetblue', 'darkpurple', 'white', 'pink', 'lightblue', 'lightgreen', 'gray', 'black', 'lightgray']

m = folium.Map(location=[accidents['Latitude'].mean(), 
                         accidents['Longitude'].mean()], 
               zoom_start=12, tiles='OpenStreetMap')

for cluster_i in clusters:
    df = pd.DataFrame(
        {
            'lat':            accidents[yhat_10 == cluster_i]['Latitude'],
            'lon':            accidents[yhat_10 == cluster_i]['Longitude'],
            'marker_color' :  marker_colors[cluster_i],
            'marker_color1' : accidents[yhat_10 == cluster_i].index,
            #'index':          X_train[:5000, 12],
        }
    )

    df.apply(lambda row: folium.Circle(location=[row["lat"], 
                                                 row["lon"]],
                                             radius=25, 
                                             fill_color=row['marker_color'],
                                             color=row['marker_color'],
                                             tooltip=row['marker_color1'], 
                                             popup='{}, {}'.format(row["lat"], row["lon"]),
                                            ).add_to(m), axis=1)

m

#### k-Means

In [None]:
kmeans = KMeans(n_clusters=K)
yhat = kmeans.fit_predict(X)
clusters = unique(yhat)

In [None]:
avg_kmean_clusters = np.array([np.mean(X[yhat == cluster_i], axis=0) for cluster_i in clusters])

plt.figure(figsize=(20,10))
sns.heatmap(avg_kmean_clusters, annot=True, fmt='.3f', xticklabels=labels_circulation_spaces, vmax=1);

In [None]:
df = pd.DataFrame([X[yhat == cluster_i].shape[0] for cluster_i in clusters])
fig = go.Figure(data=[
    go.Bar(name='aaa', 
           x=df.index, 
           y=df[0],
           marker_color='crimson',
           textposition='auto',
           text=df[0])  
])
fig.update_layout(barmode='stack',
                  title_text='[k-Means] Samples per cluster',
                  xaxis=dict(
                      title='Cluster',
                      tickfont_size=14,
                      showticklabels=True,
                      tickmode = 'linear',),
                  yaxis=dict(
                      title='# of Samples',
                      titlefont_size=16,
                      tickfont_size=14,),
                  font_family="Arial",
                 )

fig.show()

#### Hierarchical Clustering

In [None]:
Agg_hc = AgglomerativeClustering(n_clusters = 10, affinity = 'euclidean', linkage = 'ward')
yhat = Agg_hc.fit_predict(X)
clusters = unique(yhat)

In [None]:
avg_hierarchical_clusters = np.array([np.mean(X[yhat == cluster_i], axis=0) for cluster_i in clusters])

plt.figure(figsize=(20,10))
sns.heatmap(avg_hierarchical_clusters, annot=True, fmt='.3f', xticklabels=labels_circulation_spaces, vmax=1);

In [None]:
df = pd.DataFrame([X[yhat == cluster_i].shape[0] for cluster_i in clusters])
fig = go.Figure(data=[
    go.Bar(name='aaa', 
           x=df.index, 
           y=df[0],
           marker_color='crimson',
           textposition='auto',
           text=df[0])  
])
fig.update_layout(barmode='stack',
                  title_text='[Hierarchical Clustering] Samples per cluster',
                  xaxis=dict(
                      title='Cluster',
                      tickfont_size=14,
                      showticklabels=True,
                      tickmode = 'linear',),
                  yaxis=dict(
                      title='# of Samples',
                      titlefont_size=16,
                      tickfont_size=14,),
                  font_family="Arial",
                 )

fig.show()

#### Spectral Clustering vs. k-Means

In [None]:
fig = plt.figure(figsize=(20,10))

plt.subplot(1, 2, 1)
sns.heatmap(avg_spectral_clusters, annot=True, fmt='.3f', xticklabels=nnmf_labels, vmax=1);

plt.subplot(1, 2, 2)
sns.heatmap(avg_kmean_clusters, annot=True, fmt='.3f', xticklabels=nnmf_labels, vmax=1);




### POIs 25

In [None]:
_POIS_directory = 'data/barcelona_2021/pois'
_BBox_POIs = [25]
_Use_Ontology = True

data_pois = read_pois(ontologize=_Use_Ontology, pois_directory=_POIS_directory, bbox_pois=_BBox_POIs)

In [None]:
data = data_pois[25]
df_circulation_spaces_25 = data.filter(regex='circulation_spaces:')
df_circulation_spaces_25[df_circulation_spaces_25 > 0]=1

In [None]:
nnmf_labels = ['Crossings',
               'Subway',
               'Rail',
               'Tertiary & Footways',
               'Primary & PT',
               'Street',
               'Secondary',
               'Motorway',
               'Footways',
               'CyclingSharedLane & Footways'
              ]

#### Spectral clustering

In [None]:
X = df_circulation_spaces_25.to_numpy()

model = SpectralClustering(n_clusters=K)

yhat_25 = model.fit_predict(X)
clusters = unique(yhat_25)

In [None]:
mean_clusters_25 = np.array([np.mean(X[yhat_25 == cluster_i], axis=0) for cluster_i in clusters])

plt.figure(figsize=(20,10))
theilsu_fig = sns.heatmap(mean_clusters_25, annot=True, fmt='.3f', xticklabels=labels_circulation_spaces, vmax=2.)
plt.xticks(rotation=45)
figure = theilsu_fig.get_figure()

In [None]:
figure.savefig("images/clustering_sp_barcelona_25.png")

In [None]:
df = pd.DataFrame([X[yhat_25 == cluster_i].shape[0] for cluster_i in clusters])
fig = go.Figure(data=[
    go.Bar(name='aaa', 
           x=df.index, 
           y=df[0],
           marker_color='crimson',
           textposition='auto',
           text=df[0])  
])
fig.update_layout(barmode='stack',
                  title_text='[Spectral Clustering] Samples per cluster',
                  xaxis=dict(
                      title='Cluster',
                      tickfont_size=14,
                      showticklabels=True,
                      tickmode = 'linear',),
                  yaxis=dict(
                      title='# of Samples',
                      titlefont_size=16,
                      tickfont_size=14,),
                  font_family="Arial",
                 )

fig.show()

Mapping of Accident Environment Types

In [None]:
marker_colors = ['red', 'blue', 'green', 'purple', 'orange', 'darkred', 'lightred', 'beige', 'darkblue', 'darkgreen', 'cadetblue', 'darkpurple', 'white', 'pink', 'lightblue', 'lightgreen', 'gray', 'black', 'lightgray']

m = folium.Map(location=[accidents['Latitude'].mean(), 
                         accidents['Longitude'].mean()], 
               zoom_start=12, tiles='OpenStreetMap')

for cluster_i in clusters:
    df = pd.DataFrame(
        {
            'lat':            accidents[yhat_25 == cluster_i]['Latitude'],
            'lon':            accidents[yhat_25 == cluster_i]['Longitude'],
            'marker_color' :  marker_colors[cluster_i],
            'marker_color1' : accidents[yhat_25 == cluster_i].index,
            #'index':          X_train[:5000, 12],
        }
    )

    df.apply(lambda row: folium.Circle(location=[row["lat"], 
                                                 row["lon"]],
                                             radius=20, 
                                             fill_color=row['marker_color'],
                                             color=row['marker_color'],
                                             tooltip='Cluster: '+str(cluster_i)+'\nIndex:'+str(row['marker_color1']), 
                                             popup='{}, {}'.format(row["lat"], row["lon"]),

                                            ).add_to(m), axis=1)

m

# Barcelona City (Random Sampling)

In [None]:
_POIS_directory    = 'data/barcelona_rs/pois'
_BBox_POIs = [10, 25]
_Use_Ontology = True

data_pois = read_pois(ontologize=_Use_Ontology, pois_directory=_POIS_directory, bbox_pois=_BBox_POIs)

Filter to circulation spaces

## POIs 25

In [None]:
data_circulation_spaces_RS25

In [None]:
data = data_pois[25]
data_circulation_spaces_RS25 = data.filter(regex='circulation_spaces:')
data_circulation_spaces_RS25 = data_circulation_spaces_RS25.fillna(0)
data_circulation_spaces_RS25[data_circulation_spaces_RS25 > 0]=1

### Clustering

#### Spectral clustering

In [None]:
X = data_circulation_spaces_RS25.to_numpy()

model = SpectralClustering(n_clusters=K)

yhat_RS25 = model.fit_predict(X)
clusters = unique(yhat_RS25)

In [None]:
avg_spectral_clusters_rs25 = np.array([np.mean(X[yhat_RS25 == cluster_i], axis=0) for cluster_i in clusters])

plt.figure(figsize=(20,10))
theilsu_fig = sns.heatmap(avg_spectral_clusters_rs25, annot=True, fmt='.3f', xticklabels=labels_circulation_spaces, vmax=2);
plt.xticks(rotation=45)
figure = theilsu_fig.get_figure()

In [None]:
figure.savefig("images/clustering_sp_barcelonaRS_25.png")

# Comparing Accident Environment Types to city Environment Types

## Cost matrix

In [None]:
c = list()
for i in range(10):
    b = list()
    for j in range(10):
        a = norm([mean_clusters_25[i, :] - avg_spectral_clusters_rs25[j, :]])
        b.append(a)
    c.append(b)

C = np.array(c)

### Find the match between AET & CET (using hungarian method)

In [None]:
row_ind, col_ind = linear_sum_assignment(C)

In [None]:
print('AET:', row_ind)
print('CET:', col_ind)

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(20, 6), sharey=True)

fig.suptitle('Horizontally stacked subplots')

sns.heatmap(mean_clusters_25, annot=True, fmt='.2f', xticklabels=labels_circulation_spaces, vmax=1., ax=axes[0], )
axes[0].set_title('Accident Environment Types')

sns.heatmap(avg_spectral_clusters_rs25[col_ind, :], annot=True, fmt='.2f', xticklabels=labels_circulation_spaces, vmax=1., ax=axes[1], )
axes[1].set_title('City Environment Types')


If we sort by cost

In [None]:
sorted_cost = np.argsort(C[row_ind, col_ind])
print('Sorted Cost:', np.sort(C[row_ind, col_ind]))
print(sorted_cost)

In [None]:
POIS25 = mean_clusters_25[sorted_cost, :]
RS25   = avg_spectral_clusters_rs25

In [None]:
c = list()
for i in range(10):
    b = list()
    for j in range(10):
        a = norm([POIS25[i, :] - RS25[j, :]])
        b.append(a)
    c.append(b)

C = np.array(c)

In [None]:
row_ind, col_ind = linear_sum_assignment(C)

In [None]:
print('AET:', row_ind)
print('CET:', col_ind)

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(20, 6), sharey=True)
sns.set(font_scale = .8)

fig.suptitle('Horizontally stacked subplots')

sns.heatmap(POIS25, annot=True, fmt='.2f', xticklabels=labels_circulation_spaces, vmax=1., ax=axes[0], cmap='viridis')
axes[0].set_title('Accident Environment Types')
axes[0].set_xticklabels(labels_circulation_spaces, fontsize=14,)
axes[0].set_yticklabels([i for i in range(0,10)], fontsize=14,)

sns.heatmap(RS25[col_ind, :], annot=True, fmt='.2f', xticklabels=labels_circulation_spaces, vmax=1., ax=axes[1], cmap='viridis')
axes[1].set_title('City Environment Types')
axes[1].set_xticklabels(labels_circulation_spaces, fontsize=14,)
axes[1].set_yticklabels([i for i in range(0,10)], fontsize=14,)

# For the ICSC abstract

In [None]:
aa = np.array([[0.00, 0.08, 1.00, 1.00, 0.95, 0.15, 0.12, 0.04, 0.27, 0.99, 0.00, 0.00, 0.06, 0.04, 0.17],
               [0.00, 0.00, 0.01, 1.00, 0.00, 0.00, 0.08, 0.01, 0.35, 0.00, 0.01, 0.00, 0.06, 0.06, 0.09],
               [0.01, 0.00, 0.00, 0.99, 1.00, 0.00, 0.06, 0.13, 0.00, 0.95, 0.00, 0.00, 0.02, 0.10, 0.00],
               [0.00, 0.00, 0.00, 1.00, 0.79, 0.13, 1.00, 0.10, 0.10, 0.09, 0.01, 0.00, 0.09, 0.52, 0.46],
               [0.00, 0.00, 0.01, 1.00, 0.75, 0.00, 0.06, 1.00, 0.19, 0.00, 0.01, 0.00, 0.08, 0.09, 0.00],
               [0.00, 0.00, 0.01, 1.00, 1.00, 0.01, 0.04, 0.03, 1.00, 0.43, 0.00, 0.00, 0.07, 0.15, 0.09],
               [0.10, 0.00, 0.02, 0.98, 0.64, 1.00, 0.12, 0.27, 0.26, 0.32, 0.00, 0.00, 0.09, 0.03, 0.25],
               [0.00, 0.00, 0.00, 0.00, 0.00, 0.08, 0.01, 0.01, 0.01, 0.00, 0.00, 0.00, 0.00, 0.02, 0.02],
               [0.01, 0.00, 0.07, 0.89, 0.00, 0.07, 0.03, 0.03, 0.02, 1.00, 0.00, 0.00, 0.02, 0.05, 0.07],
               [0.00, 0.04, 0.01, 1.00, 0.88, 0.02, 0.03, 0.48, 0.25, 0.65, 0.00, 0.00, 0.09, 0.09, 1.00]],)

bb = np.array([[0.00, 0.00, 1.00, 0.99, 0.75, 0.09, 0.05, 0.03, 0.11, 1.00, 0.00, 0.00, 0.03, 0.01, 0.04],
               [0.00, 0.00, 0.00, 0.98, 0.00, 0.00, 0.06, 0.13, 0.00, 0.00, 0.02, 0.00, 0.02, 0.03, 0.01],
               [0.00, 0.00, 0.00, 0.92, 0.58, 0.01, 0.04, 0.06, 0.01, 0.98, 0.01, 0.00, 0.03, 0.04, 0.05],
               [0.00, 0.00, 0.02, 1.00, 0.77, 0.28, 0.99, 0.08, 0.07, 0.29, 0.00, 0.00, 0.14, 0.34, 0.91],
               [0.00, 0.00, 0.00, 1.00, 0.84, 0.18, 0.05, 1.00, 0.21, 0.15, 0.0+0, 0.00, 0.09, 0.14, 0.49],
               [0.00, 0.00, 0.02, 0.98, 0.50, 0.00, 0.06, 0.02, 1.00, 0.00, 0.01, 0.00, 0.12, 0.13, 0.12],
               [0.00, 0.00, 0.01, 0.99, 0.49, 1.00, 0.04, 0.09, 1.00, 0.00, 0.01, 0.00, 0.07, 0.06, 0.13],
               [0.00, 0.00, 0.00, 0.00, 0.00, 1.00, 0.01, 0.06, 0.02, 0.01, 0.01, 0.00, 0.00, 0.15, 0.01],
               [0.00, 0.00, 0.01, 1.00, 0.44, 1.00, 0.07, 0.11, 0.03, 0.54, 0.01, 0.00, 0.04, 0.05, 0.04],
               [0.01, 0.03, 0.04, 1.00, 0.99, 0.18, 0.04, 0.05, 1.00, 1.00, 0.00, 0.00, 0.07, 0.10, 0.24]],)

In [None]:
labels_circulation_spaces=[
    'Cycleways',
    'PT Shr. Cycleways ',
    'Shr. Cycleways',
    'Footways',
    'Crossings',
    'Motorway',
    'Primary',
    'Secondary',
    'Tertiary',
    'Street',
    'Other',
    'Bridge',
    'PT',
    'Rail',
    'Subway']

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(20, 8), sharey=True, dpi=300)
sns.set(font_scale = .8)

fig.suptitle('Horizontally stacked subplots')


cbar_ax = fig.add_axes([.91, .15, .03, .7])

sns.heatmap(bb, annot=True, fmt='.2f', xticklabels=labels_circulation_spaces, vmax=1., ax=axes[1], cmap='viridis', cbar_ax=cbar_ax)


sns.heatmap(aa, annot=True, fmt='.2f', xticklabels=labels_circulation_spaces, vmax=1., ax=axes[0], cmap='viridis', cbar=False, cbar_ax=None)
#axes[0].set_title('Accident Environment Types')
axes[0].set_xticklabels(labels_circulation_spaces, fontsize=20, rotation=60, ha='right')
axes[0].set_yticklabels([i for i in range(0,10)], fontsize=20,)

sns.heatmap(bb, annot=True, fmt='.2f', xticklabels=labels_circulation_spaces, vmax=1., ax=axes[1], cmap='viridis', cbar=False, cbar_ax=None)
#axes[1].set_title('City Environment Types')
axes[1].set_xticklabels(labels_circulation_spaces, fontsize=20, rotation=60, ha='right')
axes[1].set_yticklabels([i for i in range(0,10)], fontsize=20,)

from matplotlib.transforms import ScaledTranslation
dx, dy = 35, 0
offset = ScaledTranslation(dx / fig.dpi, dy / fig.dpi, fig.dpi_scale_trans)

# apply offset to all xticklabels
for ax in axes.flat:
    for label in ax.xaxis.get_majorticklabels():
        label.set_transform(label.get_transform() + offset)

In [None]:
fig, axes = plt.subplots(1, 1, figsize=(15, 8), sharey=True, dpi=300)

sns.heatmap(aa, annot=True, fmt='.2f', xticklabels=labels_circulation_spaces, vmax=1., cmap='viridis', cbar=False, cbar_ax=None,
           annot_kws={
                'fontsize': 16,
                'fontweight': 'bold',
            })
#axes.set_title('Accident Environment Types')
axes.set_xticklabels(labels_circulation_spaces, fontsize=20, rotation=60, ha='right');
axes.set_yticklabels([i+1 for i in range(0,10)], fontsize=20,);

In [None]:
fig, axes = plt.subplots(1, 1, figsize=(15, 8), sharey=True, dpi=300)

sns.heatmap(bb, annot=True, fmt='.2f', xticklabels=labels_circulation_spaces, vmax=1., cmap='viridis', cbar=False, cbar_ax=None,
           annot_kws={
                'fontsize': 16,
                'fontweight': 'bold',
            })
#axes.set_title('Accident Environment Types')
axes.set_xticklabels(labels_circulation_spaces, fontsize=20, rotation=60, ha='right');
axes.set_yticklabels([i+1 for i in range(0,10)], fontsize=20,);

# Accidents

In [None]:
import matplotlib.colors as clrs

In [None]:
marker_colors = [
    'red',        # 0                   
    'darkgreen',  # 1                   
    'green',      # 2                   
    'purple',     # 3                   
    'orange',     # 4                   
    'beige',      # 5                   
    'pink',       # 6                   
    'darkred',    # 7                   
    'darkblue',   # 8                   
    'blue',       # 9                   
    'cadetblue',  # 10                   
    'darkpurple', # 11                  
    'white',      # 12                   
    'pink',       # 13                   
    'lightblue',  # 14                   
    'lightgreen', # 15                   
    'gray',       # 16                   
    'black',      # 17                   
    'lightgray']  # 18


colormap = plt.cm.get_cmap('Spectral')
marker_colors = [
    colors.rgb2hex(colormap(0.95)), # 0
    colors.rgb2hex(colormap(0.9)),  # 1
    colors.rgb2hex(colormap(0.8)),  # 2
    colors.rgb2hex(colormap(0.5)),  # 3
    colors.rgb2hex(colormap(0.75)), # 4
    colors.rgb2hex(colormap(0.1)),  # 5
    colors.rgb2hex(colormap(0.25)), # 6
    colors.rgb2hex(colormap(0.85)), # 7
    colors.rgb2hex(colormap(0.3)),  # 8
    colors.rgb2hex(colormap(0)),    # 9
]

m = folium.Map(location=[accidents['Latitude'].mean()+0.06, 
                         accidents['Longitude'].mean()+0.02],  
               zoom_start=13, 
               tiles='CartoDB positron',
               zoom_control=False,
               scrollWheelZoom=False,
               dragging=False,
               height=600,
               # tiles='OpenStreetMap'
              )

for cluster_i in [3,4,5,6,7,8,9,0,1,2]:
    df = pd.DataFrame({
            'lat':            accidents[yhat_25 == cluster_i]['Latitude'],
            'lon':            accidents[yhat_25 == cluster_i]['Longitude'],
            'marker_color' :  marker_colors[cluster_i],
            'marker_color1' : accidents[yhat_25 == cluster_i].index,
            #'index':          X_train[:5000, 12],
        }
    )

    df.apply(lambda row: folium.Circle(location=[row["lat"], 
                                                 row["lon"]],
                                             radius=15, 
                                             fill_color=row['marker_color'],
                                             color     =row['marker_color'],
                                             tooltip='Cluster: '+str(cluster_i)+'\nIndex:'+str(row['marker_color1']), 
                                             popup='{}, {}'.format(row["lat"], row["lon"]),

                                            ).add_to(m), axis=1)
    
# Add Legend
legend_html = '''
{% macro html(this, kwargs) %}
<div style="
    position: fixed; 
    top: 50px;
    right: 50px;
    width: 250px;
    height: 200px;
    z-index:9999;
    font-size:14px;
    ">
'''
for cluster_i in range(10):
#for cluster_i in [3,4,5,6,7,8,9,0,1,2]:
    legend_html += '''
        <p><a style="color:{};font-size:100%;margin-right:20px;">&diams;</a>Class {}</p>
    '''.format(marker_colors[cluster_i], cluster_i+1)
legend_html += '''
</div>
<div style="
    position: fixed; 
    top: 50px;
    right: 170px;
    width: 150px;
    height: 300px; 
    z-index:9998;
    font-size:14px;
    background-color: #ffffff;

    opacity: 0.8;
    ">
</div>
{% endmacro %}
'''
legend = branca.element.MacroElement()
legend._template = branca.element.Template(legend_html)
    
m.get_root().add_child(legend)   
m#.save(outfile="barcelona_points.html")