In [1]:
from ipywidgets import interact

In [2]:
%%capture
!mkdir -p data
!wget https://results.cik.bg/pi2021/export.zip -O data/izbori.zip
!unzip -o data/izbori.zip  -d data/ 

In [17]:
import pandas as pd
import seaborn as sns
from matplotlib import cm
from sklearn.cluster import KMeans
import numpy as np
import scipy
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
pd.set_option('display.max_columns', None)
pd.set_option('precision', 2)


In [4]:
parties = pd.read_csv("data/cik_parties_04.04.2021.txt", sep=";", names=["id", "party"]).set_index("id")

In [5]:
section_data = pd.read_csv('data/sections_04.04.2021.txt', sep=';', 
                           names=["section_id", "admin_id", "admin_name", "EKATTE", "city", "is_mobile", "is_ship", "is_machine"],
                          dtype={"section_id": object, "admin_id": object})

In [6]:
section_votes = []
def get_votes(l, parties):
    entries = l.split(";")
    section_id = entries[0]
    adm_id = entries[1]
    votes = [int(v) if v!='' else 0 for v in entries[2:]]
    res = {
        'section_id': section_id,
        'admin_id': adm_id
    }
    total_votes = 0
    for i in range(0, len(votes), 4):
        party_id = votes[i]
        if party_id > 30:
            continue
        #party_name = parties.loc[party_id]['party']
        party_votes = votes[i+1]
        res[party_id] = party_votes
        total_votes += party_votes
    res['total_votes'] = total_votes
    return res

section_votes = []
for line in open("data/votes_04.04.2021.txt"):
    section_votes.append(get_votes(line.strip(), parties))
    
section_votes = pd.DataFrame(section_votes)

In [7]:
normalized_votes = section_votes[['section_id', 'admin_id']].copy()
for col in range(1, 31):
    normalized_votes[col] = section_votes[col]/section_votes['total_votes']

In [8]:
actual_votes = pd.Series([0.0364, 0.0015, 0.0009, 0.1501, 0.0245, 0.0046, 0.0049, 0.0029, 0.1051, 0.0012, 0.0945, 0.0041, 0.0011, 0.0295, 0.0003, 0.0011, 0.0007, 0.0472, 0.0026, 0.0053, 0.0131, 0.0007, 0.0005, 0.0237, 0.0017, 0.0011, 0.001, 0.2618, 0.1766, 0.0011
], index=[i for i in range(1, 31)])


In [9]:
cols = [i for i in range(1,31)]

In [10]:
kmeans = KMeans(7, random_state=42)
kmeans.fit(normalized_votes[cols]);

In [11]:
pc = {
    4: cm.get_cmap('Reds'),
    9: cm.get_cmap('Purples'),
    28: cm.get_cmap('Blues'),
    29: cm.get_cmap('BuPu'),
    18: cm.get_cmap('Greens'),
    11: cm.get_cmap('Blues')
}
def to_hex(rgba):
    def pad(x):
        if len(x) == 1:
            return "0"+x
        return x
    return "".join([pad(hex(x)[2:]) for x in rgba[:3]])

def party_colors(v):
    bgdf = v.copy()
    bgdf[cols] = 'background-color: white'
    for p, colormap in pc.items():
        bgdf[p] =  [f'background-color: #{to_hex(c)}' for c in colormap(v[p], bytes=True)]
    return bgdf

In [12]:
distances = scipy.spatial.distance_matrix(normalized_votes[cols], kmeans.cluster_centers_)

In [15]:
sections_extra = pd.merge(normalized_votes, section_data, on=['section_id'])

In [16]:
sections_extra['url'] = sections_extra.apply(lambda x: f'<A href="https://results.cik.bg/pi2021/pdf/64/{x["section_id"][:2]}/{x["section_id"]}.pdf">{x["section_id"]} </A>', axis=1)

In [18]:
def display_cluster(cluster_id, n=20):
    cluster_distances = distances[:, cluster_id]
    sorted_sections = cluster_distances.argsort()
    
    cluster_center = pd.DataFrame({f"cluster {cluster_id}": pd.Series(kmeans.cluster_centers_[cluster_id], index=cols)}).transpose()
    
    print("Cluster Center:")
    display(cluster_center.round(2).style.apply(party_colors, axis=None))
    
    top20 = sorted_sections[:n]
    bottom20 = sorted_sections[-n:]
    
    
    print(f"Closest {n} sections:")
    display(sections_extra.iloc[top20][['url', 'city','admin_name']+cols].round(2).style.apply(party_colors, axis=None))
    
    print(f"Furthest {n} sections:")
    display(sections_extra.iloc[bottom20][['url', 'city', 'admin_name']+cols].round(2).style.apply(party_colors, axis=None))
    return cluster_id


In [19]:
from ipywidgets import IntSlider

In [22]:
interact(display_cluster, cluster_id=list(range(7)), n=IntSlider(20, 1, 200));

interactive(children=(Dropdown(description='cluster_id', options=(0, 1, 2, 3, 4, 5, 6), value=0), IntSlider(va…