In [None]:
import pandas as pd
import numpy as np
from difflib import SequenceMatcher
import json
import folium
from sklearn.cluster import KMeans, DBSCAN

In [None]:
# Read votation data
df = pd.read_pickle("data/data.pkl")

In [None]:
df.head()

In [None]:
commune_list = list(set(df['Commune'].values))
print(len(commune_list))
commune_dict = { val : idx for idx , val in enumerate(commune_list)   }

votation_list = list(set(df['Votation'].values))
votation_dict = { val : idx for idx , val in enumerate(votation_list)   }

X = np.ones((len(commune_list) , len(votation_list) ) , dtype=float)

In [None]:
for x in df [['Commune','Votation','Oui en %']].fillna(50).values :
    X [commune_dict[x[0]]][votation_dict[x[1]]] = x[2]

In [None]:
X

In [None]:
def get_good_date (x) :
    date = x.split(' ') [0]
    return '.'.join(date.split('.')[::-1])


#return -1 if date1 < date2 return 1 if date1 > date2 return 0 if if date1 == date2
def compare_date (date1 , date2) :
    date1_s = date1.split('.')
    date2_s = date2.split('.')
    for i in range ( len(date1_s)) :
        if ( int(date1_s[i]) < int(date2_s[i])) :
            return -1
        elif ( int(date1_s[i]) > int(date2_s[i])) :
            return 1
    return 0



df['Date'] = df['Votation'].map( get_good_date)
df.head()

In [None]:
test = df [ df ['Date'].map (lambda x : compare_date ( '2000.00.00' , x ) == -1 )]
test

In [None]:
switzerland_coord = [46.765213, 8.252444]
town_geo_path = r'data/switzerland_borders/municipalities.geojson'
geo_json_data = json.load(open(town_geo_path, encoding="utf8"))

In [None]:
color_list = ['#ff0000' , '#00ff00' , '#0000ff' , '#ffff00' , '#ff00ff' , '#00ffff'  , '#000000']


In [None]:
def style_function(feature):
    #print(feature['name'])
    language = languages_series.get(feature['name'], None)
    return {
        'fillOpacity': 0.5,
        'weight': 0,
        'fillColor':  color_language(language)
    }

def draw_map_kmeans (n_clusters , X ) :
    kmeans_res = KMeans(n_clusters=n_clusters, random_state=0).fit(X)
    groups = kmeans_res.labels_
    
    centers = kmeans_res.cluster_centers_
    print( [ [ np.linalg.norm(x-y) for y in centers] for x in centers])
    
    to_map = pd.DataFrame({'Commune' : commune_list , 'Group' : groups})
    
    map1 = folium.Map(location=switzerland_coord, zoom_start=8)
    map1.choropleth(geo_data = geo_json_data, \
                                    data = to_map, \
                                    columns = ['Commune', 'Group'], \
                                    key_on = 'feature.name', \
                                    fill_color = 'RdYlGn', \
                                     fill_opacity = 0.7, \
                                    line_opacity = 0.2, \
                                    legend_name = 'group')
    
    return map1

In [None]:
for i in range (2,6) :  
    draw_map_kmeans(i,X).save('data/map_ml/kmeans'+str(i)+'.html')

In [None]:
def draw_map_DBSCAN (X) : 
    min_samples = 20

    X_array = [ np.array(x_) for x_ in X]
    range_X = range(len(X))
    Xmeans = np.mean([ np.mean(\
                            np.sort([np.linalg.norm(X_array[x]-X_array[y]) \
                             for x in range_X  if x!=y])[:(min_samples-1)] \
                           )\
                   for y in range_X ] )
    groups =  DBSCAN(eps=Xmeans, min_samples=min_samples).fit(X).labels_

    
  
    
    to_map = pd.DataFrame({'Commune' : commune_list , 'Group' : groups})
    
    map1 = folium.Map(location=switzerland_coord, zoom_start=8)
    map1.choropleth(geo_data = geo_json_data, \
                                    data = to_map, \
                                    columns = ['Commune', 'Group'], \
                                    key_on = 'feature.name', \
                                    fill_color = 'RdYlGn', \
                                     fill_opacity = 0.7, \
                                    line_opacity = 0.2, \
                                    legend_name = 'group')
    
    return map1

In [None]:
draw_map_DBSCAN (X).save('data/map_ml/DBSCAN.html')

In [None]:
import networkx as nx
G = nx.DiGraph()

min_max =[0,0]
max_value = -1


X_len = len(X)
for i in range (X_len) :
    #G.add_edge('g',i , capacity =10^3000)
    #G.add_edge(i,'e' , capacity = 10^3000)
    for j in range (i+1,X_len) :
        dist = 1-np.linalg.norm(X[i]/100-X[j]/100)
        G.add_edge(i,j, capacity = dist)
        G.add_edge(j,i, capacity = dist)
        if ( dist > max_value) :
            min_max = [i,j]
G

In [None]:
cut_value, partition = nx.minimum_cut(G, min_max[0],min_max[1])
reachable, non_reachable = partition

In [None]:
cut_value

In [None]:
len(reachable)


In [None]:
G = nx.DiGraph()
G.add_edge('x','a', capacity = 3.0)
G.add_edge('x','b', capacity = 1.0)
G.add_edge('a','c', capacity = 3.0)
G.add_edge('b','c', capacity = 5.0)
G.add_edge('b','d', capacity = 4.0)
G.add_edge('d','e', capacity = 2.0)
G.add_edge('c','y', capacity = 2.0)
G.add_edge('e','y', capacity = 3.0)


cut_value, partition = nx.minimum_cut(G, 'x', 'y')
reachable, non_reachable = partition
reachable

ml for theme

In [None]:
data_t = pd.read_pickle('data/data_theme.pkl')
data_t.head()

In [None]:
df.head()

In [None]:
data_theme = df.merge(data_t , on = 'Votation' )
data_theme.head()

In [None]:
#data_bg = data by group
for data_bg in data_theme[['Thématique','Votation','Commune','Oui en %']].groupby('Thématique') :
    theme = data_bg[0]
    data_bg = pd.DataFrame(data = data_bg[1])

    votation_list_t = list(set(data_bg['Votation'].values))
    votation_dict_t = { val : idx for idx , val in enumerate(votation_list_t)   }

    Xt = np.ones((len(commune_list) , len(votation_list_t) ) , dtype=float)
    
    for x in data_bg [['Commune','Votation','Oui en %']].fillna(50).values :
        Xt [commune_dict[x[0]]][votation_dict_t[x[1]]] = x[2]
    draw_map_kmeans(2,Xt).save('data/maps_theme_ml/kmeans_'+theme+'.html') 
    