In [3]:
from ipywidgets import interact

In [4]:
%%capture
!mkdir -p data
!wget https://results.cik.bg/pi2021/export.zip -O data/izbori.zip
!unzip -o data/izbori.zip  -d data/ 

In [5]:
import pandas as pd
import seaborn as sns
from matplotlib import cm
from sklearn.cluster import KMeans
import numpy as np
import scipy
pd.set_option('display.max_columns', None)
pd.set_option('precision', 2)


In [6]:
parties = pd.read_csv("data/cik_parties_04.04.2021.txt", sep=";", names=["id", "party"]).set_index("id")

In [63]:
section_data = pd.read_csv('data/sections_04.04.2021.txt', sep=';', 
                           names=["section_id", "admin_id", "admin_name", "EKATTE", "city", "is_mobile", "is_ship", "is_machine"],
                          dtype={"section_id": object, "admin_id": object})

In [64]:
section_votes = []
def get_votes(l, parties):
    entries = l.split(";")
    section_id = entries[0]
    adm_id = entries[1]
    votes = [int(v) if v!='' else 0 for v in entries[2:]]
    res = {
        'section_id': section_id,
        'admin_id': adm_id
    }
    total_votes = 0
    for i in range(0, len(votes), 4):
        party_id = votes[i]
        if party_id > 30:
            continue
        #party_name = parties.loc[party_id]['party']
        party_votes = votes[i+1]
        res[party_id] = party_votes
        total_votes += party_votes
    res['total_votes'] = total_votes
    return res

section_votes = []
for line in open("data/votes_04.04.2021.txt"):
    section_votes.append(get_votes(line.strip(), parties))
    
section_votes = pd.DataFrame(section_votes)

In [65]:
normalized_votes = section_votes[['section_id', 'admin_id']].copy()
for col in range(1, 31):
    normalized_votes[col] = section_votes[col]/section_votes['total_votes']

In [66]:
actual_votes = pd.Series([0.0364, 0.0015, 0.0009, 0.1501, 0.0245, 0.0046, 0.0049, 0.0029, 0.1051, 0.0012, 0.0945, 0.0041, 0.0011, 0.0295, 0.0003, 0.0011, 0.0007, 0.0472, 0.0026, 0.0053, 0.0131, 0.0007, 0.0005, 0.0237, 0.0017, 0.0011, 0.001, 0.2618, 0.1766, 0.0011
], index=[i for i in range(1, 31)])


In [67]:
cols = [i for i in range(1,31)]

In [68]:
kmeans = KMeans(7, random_state=42)
kmeans.fit(normalized_votes[cols]);

In [69]:
pc = {
    4: cm.get_cmap('Reds'),
    9: cm.get_cmap('Purples'),
    28: cm.get_cmap('Blues'),
    29: cm.get_cmap('BuPu'),
    18: cm.get_cmap('Greens'),
    11: cm.get_cmap('Blues')
}
def to_hex(rgba):
    def pad(x):
        if len(x) == 1:
            return "0"+x
        return x
    return "".join([pad(hex(x)[2:]) for x in rgba[:3]])

def party_colors(v):
    bgdf = v.copy()
    bgdf[cols] = 'background-color: white'
    for p, colormap in pc.items():
        bgdf[p] =  [f'background-color: #{to_hex(c)}' for c in colormap(v[p], bytes=True)]
    return bgdf

In [70]:
distances = scipy.spatial.distance_matrix(normalized_votes[cols], kmeans.cluster_centers_)

In [71]:
distances.shape

(12941, 7)

In [72]:
sections_extra = pd.merge(normalized_votes, section_data, on=['section_id'])

In [73]:
sections_extra

Unnamed: 0,section_id,admin_id_x,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,admin_id_y,admin_name,EKATTE,city,is_mobile,is_ship,is_machine
0,010100001,1,1.75e-02,0.00e+00,0.00e+00,0.19,9.98e-03,2.49e-03,0.00e+00,0.00e+00,0.00e+00,0.00e+00,0.06,9.98e-03,0.00e+00,0.02,0.00e+00,0.00e+00,0.00e+00,0.03,0.00e+00,2.49e-03,4.99e-02,0.00e+00,2.49e-03,8.98e-02,2.49e-03,0.00e+00,0.00e+00,0.29,0.20,0.00e+00,1,01. БЛАГОЕВГРАД,2676,гр.Банско,0,0,1
1,010100002,1,2.93e-02,5.33e-03,0.00e+00,0.13,1.33e-02,0.00e+00,0.00e+00,2.67e-03,0.00e+00,0.00e+00,0.11,5.33e-03,2.67e-03,0.02,2.67e-03,0.00e+00,0.00e+00,0.06,0.00e+00,8.00e-03,6.40e-02,0.00e+00,0.00e+00,6.67e-02,0.00e+00,0.00e+00,0.00e+00,0.33,0.15,2.67e-03,1,01. БЛАГОЕВГРАД,2676,гр.Банско,0,0,1
2,010100003,1,2.68e-02,0.00e+00,2.98e-03,0.14,5.95e-03,2.98e-03,5.95e-03,2.98e-03,2.98e-03,0.00e+00,0.05,0.00e+00,0.00e+00,0.02,0.00e+00,0.00e+00,0.00e+00,0.04,0.00e+00,5.95e-03,3.27e-02,0.00e+00,2.98e-03,5.06e-02,0.00e+00,2.98e-03,0.00e+00,0.40,0.20,0.00e+00,1,01. БЛАГОЕВГРАД,2676,гр.Банско,0,0,1
3,010100004,1,2.22e-02,0.00e+00,0.00e+00,0.18,4.93e-03,4.93e-03,0.00e+00,0.00e+00,0.00e+00,2.46e-03,0.08,4.93e-03,2.46e-03,0.03,2.46e-03,0.00e+00,0.00e+00,0.03,4.93e-03,1.23e-02,3.45e-02,2.46e-03,0.00e+00,8.62e-02,2.46e-03,0.00e+00,2.46e-03,0.37,0.11,0.00e+00,1,01. БЛАГОЕВГРАД,2676,гр.Банско,0,0,1
4,010100005,1,3.35e-02,0.00e+00,0.00e+00,0.15,1.68e-02,2.79e-03,0.00e+00,0.00e+00,2.79e-03,0.00e+00,0.10,5.59e-03,2.79e-03,0.03,0.00e+00,0.00e+00,0.00e+00,0.03,0.00e+00,2.79e-03,6.70e-02,0.00e+00,0.00e+00,1.12e-01,0.00e+00,0.00e+00,2.79e-03,0.33,0.11,0.00e+00,1,01. БЛАГОЕВГРАД,2676,гр.Банско,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12936,326700461,32,1.03e-02,1.47e-03,0.00e+00,0.05,6.45e-02,1.47e-03,0.00e+00,2.93e-03,1.47e-03,0.00e+00,0.42,8.80e-03,0.00e+00,0.06,1.47e-03,1.47e-03,2.93e-03,0.04,1.47e-03,0.00e+00,4.40e-03,0.00e+00,0.00e+00,5.87e-03,2.93e-03,1.47e-03,0.00e+00,0.08,0.24,2.93e-03,32,32. Извън страната,100418,"Швейцария, Цюрих",0,0,0
12937,326800462,32,7.09e-03,0.00e+00,0.00e+00,0.06,1.52e-01,0.00e+00,3.55e-03,0.00e+00,2.84e-02,0.00e+00,0.25,7.09e-03,0.00e+00,0.05,0.00e+00,3.55e-03,0.00e+00,0.05,3.55e-03,0.00e+00,1.06e-02,0.00e+00,0.00e+00,7.09e-03,3.55e-03,0.00e+00,0.00e+00,0.06,0.30,0.00e+00,32,32. Извън страната,100419,"Швеция, Гьотеборг",0,0,0
12938,326800463,32,4.88e-03,0.00e+00,0.00e+00,0.06,7.80e-02,4.88e-03,4.88e-03,4.88e-03,2.93e-02,0.00e+00,0.39,9.76e-03,0.00e+00,0.04,0.00e+00,0.00e+00,0.00e+00,0.06,4.88e-03,0.00e+00,0.00e+00,0.00e+00,0.00e+00,9.76e-03,4.88e-03,0.00e+00,0.00e+00,0.06,0.22,4.88e-03,32,32. Извън страната,100420,"Швеция, Малмьо",0,0,0
12939,326800464,32,1.55e-02,1.93e-03,0.00e+00,0.09,6.77e-02,3.87e-03,1.16e-02,1.35e-02,6.19e-02,1.93e-03,0.24,3.87e-03,0.00e+00,0.05,7.74e-03,0.00e+00,0.00e+00,0.05,5.80e-03,5.80e-03,3.87e-03,1.93e-03,0.00e+00,3.87e-03,3.87e-03,1.93e-03,0.00e+00,0.11,0.23,1.93e-03,32,32. Извън страната,100421,"Швеция, Стокхолм",0,0,0


In [114]:
sections_extra['url'] = sections_extra.apply(lambda x: f'<A href="https://results.cik.bg/pi2021/pdf/64/{x["section_id"][:2]}/{x["section_id"]}.pdf">{x["section_id"]} </A>', axis=1)

In [115]:
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

In [121]:
def display_cluster(cluster_id, n=20):
    cluster_distances = distances[:, cluster_id]
    sorted_sections = cluster_distances.argsort()
    
    cluster_center = pd.DataFrame({f"cluster {cluster_id}": pd.Series(kmeans.cluster_centers_[cluster_id], index=cols)}).transpose()
    
    print("Cluster Center:")
    display(cluster_center.round(2).style.apply(party_colors, axis=None))
    
    top20 = sorted_sections[:n]
    bottom20 = sorted_sections[-n:]
    
    
    print(f"Closest {n} sections:")
    display(sections_extra.iloc[top20][['url', 'city','admin_name']+cols].round(2).style.apply(party_colors, axis=None))
    
    print(f"Furthest {n} sections:")
    display(sections_extra.iloc[bottom20][['url', 'city', 'admin_name']+cols].round(2).style.apply(party_colors, axis=None))
    return cluster_id


In [122]:
from ipywidgets import IntSlider

In [123]:
section_data

Unnamed: 0,section_id,admin_id,admin_name,EKATTE,city,is_mobile,is_ship,is_machine
0,010100001,1,01. БЛАГОЕВГРАД,2676,гр.Банско,0,0,1
1,010100002,1,01. БЛАГОЕВГРАД,2676,гр.Банско,0,0,1
2,010100003,1,01. БЛАГОЕВГРАД,2676,гр.Банско,0,0,1
3,010100004,1,01. БЛАГОЕВГРАД,2676,гр.Банско,0,0,1
4,010100005,1,01. БЛАГОЕВГРАД,2676,гр.Банско,0,0,1
...,...,...,...,...,...,...,...,...
12936,326700461,32,32. Извън страната,100418,"Швейцария, Цюрих",0,0,0
12937,326800462,32,32. Извън страната,100419,"Швеция, Гьотеборг",0,0,0
12938,326800463,32,32. Извън страната,100420,"Швеция, Малмьо",0,0,0
12939,326800464,32,32. Извън страната,100421,"Швеция, Стокхолм",0,0,0


In [124]:
normalized_votes

Unnamed: 0,section_id,admin_id,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
0,010100001,1,1.75e-02,0.00e+00,0.00e+00,0.19,9.98e-03,2.49e-03,0.00e+00,0.00e+00,0.00e+00,0.00e+00,0.06,9.98e-03,0.00e+00,0.02,0.00e+00,0.00e+00,0.00e+00,0.03,0.00e+00,2.49e-03,4.99e-02,0.00e+00,2.49e-03,8.98e-02,2.49e-03,0.00e+00,0.00e+00,0.29,0.20,0.00e+00
1,010100002,1,2.93e-02,5.33e-03,0.00e+00,0.13,1.33e-02,0.00e+00,0.00e+00,2.67e-03,0.00e+00,0.00e+00,0.11,5.33e-03,2.67e-03,0.02,2.67e-03,0.00e+00,0.00e+00,0.06,0.00e+00,8.00e-03,6.40e-02,0.00e+00,0.00e+00,6.67e-02,0.00e+00,0.00e+00,0.00e+00,0.33,0.15,2.67e-03
2,010100003,1,2.68e-02,0.00e+00,2.98e-03,0.14,5.95e-03,2.98e-03,5.95e-03,2.98e-03,2.98e-03,0.00e+00,0.05,0.00e+00,0.00e+00,0.02,0.00e+00,0.00e+00,0.00e+00,0.04,0.00e+00,5.95e-03,3.27e-02,0.00e+00,2.98e-03,5.06e-02,0.00e+00,2.98e-03,0.00e+00,0.40,0.20,0.00e+00
3,010100004,1,2.22e-02,0.00e+00,0.00e+00,0.18,4.93e-03,4.93e-03,0.00e+00,0.00e+00,0.00e+00,2.46e-03,0.08,4.93e-03,2.46e-03,0.03,2.46e-03,0.00e+00,0.00e+00,0.03,4.93e-03,1.23e-02,3.45e-02,2.46e-03,0.00e+00,8.62e-02,2.46e-03,0.00e+00,2.46e-03,0.37,0.11,0.00e+00
4,010100005,1,3.35e-02,0.00e+00,0.00e+00,0.15,1.68e-02,2.79e-03,0.00e+00,0.00e+00,2.79e-03,0.00e+00,0.10,5.59e-03,2.79e-03,0.03,0.00e+00,0.00e+00,0.00e+00,0.03,0.00e+00,2.79e-03,6.70e-02,0.00e+00,0.00e+00,1.12e-01,0.00e+00,0.00e+00,2.79e-03,0.33,0.11,0.00e+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12936,326700461,32,1.03e-02,1.47e-03,0.00e+00,0.05,6.45e-02,1.47e-03,0.00e+00,2.93e-03,1.47e-03,0.00e+00,0.42,8.80e-03,0.00e+00,0.06,1.47e-03,1.47e-03,2.93e-03,0.04,1.47e-03,0.00e+00,4.40e-03,0.00e+00,0.00e+00,5.87e-03,2.93e-03,1.47e-03,0.00e+00,0.08,0.24,2.93e-03
12937,326800462,32,7.09e-03,0.00e+00,0.00e+00,0.06,1.52e-01,0.00e+00,3.55e-03,0.00e+00,2.84e-02,0.00e+00,0.25,7.09e-03,0.00e+00,0.05,0.00e+00,3.55e-03,0.00e+00,0.05,3.55e-03,0.00e+00,1.06e-02,0.00e+00,0.00e+00,7.09e-03,3.55e-03,0.00e+00,0.00e+00,0.06,0.30,0.00e+00
12938,326800463,32,4.88e-03,0.00e+00,0.00e+00,0.06,7.80e-02,4.88e-03,4.88e-03,4.88e-03,2.93e-02,0.00e+00,0.39,9.76e-03,0.00e+00,0.04,0.00e+00,0.00e+00,0.00e+00,0.06,4.88e-03,0.00e+00,0.00e+00,0.00e+00,0.00e+00,9.76e-03,4.88e-03,0.00e+00,0.00e+00,0.06,0.22,4.88e-03
12939,326800464,32,1.55e-02,1.93e-03,0.00e+00,0.09,6.77e-02,3.87e-03,1.16e-02,1.35e-02,6.19e-02,1.93e-03,0.24,3.87e-03,0.00e+00,0.05,7.74e-03,0.00e+00,0.00e+00,0.05,5.80e-03,5.80e-03,3.87e-03,1.93e-03,0.00e+00,3.87e-03,3.87e-03,1.93e-03,0.00e+00,0.11,0.23,1.93e-03


In [125]:
interact(display_cluster, cluster_id=[0,1], n=IntSlider(20, 1, 200));

interactive(children=(Dropdown(description='cluster_id', options=(0, 1), value=0), IntSlider(value=20, descrip…

In [18]:
def f(x):
    return x