In [51]:
import pandas as pd
import numpy as np
from difflib import SequenceMatcher
import json
import folium
from sklearn.cluster import KMeans, DBSCAN

In [52]:
# Read votation data
df = pd.read_pickle("data/data.pkl")

In [53]:
df.head()

Unnamed: 0,Commune,Votation,Electeurs inscrits,Bulletins rentrés,Participation en %,Bulletins valables,Oui,Non,Oui en %,District,Canton,Pays
0,Aeugst am Albis,29.11.1998 Initiative Droleg,1070.0,487.0,45.5,478.0,167.0,311.0,34.9,Affoltern,Zürich,Suisse
1,Aeugst am Albis,14.06.2015 Initiative sur les bourses d'études,1380.0,706.0,51.2,695.0,186.0,509.0,26.8,Affoltern,Zürich,Suisse
2,Aeugst am Albis,25.09.2016 Loi fédérale sur le renseignement,1400.0,670.0,47.9,659.0,417.0,242.0,63.3,Affoltern,Zürich,Suisse
3,Aeugst am Albis,03.03.1991 Encouragement des transports publics,835.0,321.0,38.4,312.0,128.0,184.0,41.0,Affoltern,Zürich,Suisse
4,Aeugst am Albis,12.02.2017 Réforme de l'imposition des entrepr...,1395.0,759.0,54.4,750.0,318.0,432.0,42.4,Affoltern,Zürich,Suisse


In [54]:
commune_list = list(set(df['Commune'].values))
commune_dict = { val : idx for idx , val in enumerate(commune_list)   }

votation_list = list(set(df['Votation'].values))
votation_dict = { val : idx for idx , val in enumerate(votation_list)   }

X = np.ones((len(commune_list) , len(votation_list) ) , dtype=float)

In [55]:
for x in df [['Commune','Votation','Oui en %']].fillna(50).values :
    X [commune_dict[x[0]]][votation_dict[x[1]]] = x[2]

In [56]:
X

array([[ 66.7,  23.1,  66.2, ...,  55.1,  29.2,  49.6],
       [ 71.1,  26.5,  56.7, ...,  54.9,  22. ,  52. ],
       [ 81.9,  24.6,  77. , ...,  36.9,  19.2,  53.3],
       ..., 
       [ 74.3,  27.6,  72.3, ...,  53.9,  24.7,  54.8],
       [ 52. ,   5.3,  69.5, ...,  23.2,  15.9,  32.9],
       [ 67.2,  19.6,  73.6, ...,  38.2,   6.3,  46.6]])

In [57]:
switzerland_coord = [46.765213, 8.252444]
town_geo_path = r'data/switzerland_borders/admin_level_8.geojson'
geo_json_data = json.load(open(town_geo_path, encoding="utf8"))

In [58]:
color_list = ['#ff0000' , '#00ff00' , '#0000ff' , '#ffff00' , '#ff00ff' , '#00ffff'  , '#000000']


In [59]:
def style_function(feature):
    #print(feature['name'])
    language = languages_series.get(feature['name'], None)
    return {
        'fillOpacity': 0.5,
        'weight': 0,
        'fillColor':  color_language(language)
    }

def draw_map_kmeans (n_clusters) :
    kmeans_res = KMeans(n_clusters=n_clusters, random_state=0).fit(X)
    groups = kmeans_res.labels_
    
    centers = kmeans_res.cluster_centers_
    print( [ [ np.linalg.norm(x-y) for y in centers] for x in centers])
    
    to_map = pd.DataFrame({'Commune' : commune_list , 'Group' : groups})
    
    map1 = folium.Map(location=switzerland_coord, zoom_start=8)
    map1.choropleth(geo_data = geo_json_data, \
                                    data = to_map, \
                                    columns = ['Commune', 'Group'], \
                                    key_on = 'feature.name', \
                                    fill_color = 'RdYlGn', \
                                     fill_opacity = 0.7, \
                                    line_opacity = 0.2, \
                                    legend_name = 'group')
    
    return map1

In [60]:
for i in range (2,6) :  
    draw_map_kmeans(i).save('data/map_ml/kmeans'+str(i)+'.html')

[[0.0, 208.67401237835256], [208.67401237835256, 0.0]]
[[0.0, 144.47986356124335, 196.43007419689584], [144.47986356124335, 0.0, 247.33874679188321], [196.43007419689584, 247.33874679188321, 0.0]]
[[0.0, 221.59382027546033, 236.36037086236308, 193.87074264647023], [221.59382027546033, 0.0, 247.64281574780983, 207.85672978130719], [236.36037086236308, 247.64281574780983, 0.0, 141.34595602506744], [193.87074264647023, 207.85672978130719, 141.34595602506744, 0.0]]
[[0.0, 275.89090092058177, 215.67520469036879, 242.72212200864686, 141.06638785088913], [275.89090092058177, 0.0, 140.64525255314484, 237.04607787057083, 222.98561992569211], [215.67520469036879, 140.64525255314484, 0.0, 194.22622625708948, 221.63966288565692], [242.72212200864686, 237.04607787057083, 194.22622625708948, 0.0, 214.700838379114], [141.06638785088913, 222.98561992569211, 221.63966288565692, 214.700838379114, 0.0]]


In [62]:
def draw_map_DBSCAN (X) : 
    min_samples = 20

    X_array = [ np.array(x_) for x_ in X]
    range_X = range(len(X))
    Xmeans = np.mean([ np.mean(\
                            np.sort([np.linalg.norm(X_array[x]-X_array[y]) \
                             for x in range_X  if x!=y])[:(min_samples-1)] \
                           )\
                   for y in range_X ] )
    groups =  DBSCAN(eps=Xmeans, min_samples=min_samples).fit(X).labels_

    
  
    
    to_map = pd.DataFrame({'Commune' : commune_list , 'Group' : groups})
    
    map1 = folium.Map(location=switzerland_coord, zoom_start=8)
    map1.choropleth(geo_data = geo_json_data, \
                                    data = to_map, \
                                    columns = ['Commune', 'Group'], \
                                    key_on = 'feature.name', \
                                    fill_color = 'RdYlGn', \
                                     fill_opacity = 0.7, \
                                    line_opacity = 0.2, \
                                    legend_name = 'group')
    
    return map1

In [63]:
draw_map_DBSCAN (X).save('data/map_ml/DBSCAN.html')