# Setup

## Get general data & imports

In [1]:
import matplotlib.pyplot as plt
import matplotlib
import re
import pandas as pd
import unicodedata

# Get general data

df_raw = pd.read_csv("MESAS_ESCRUTADAS_Cierre.csv", sep=',')
df_diputados = df_raw.loc[df_raw["Cargo"] == "DIPUTADOS NACIONALES"] # Filter for only diputados

# Clean district names
def stripAccents(s):
    return ''.join(c for c in unicodedata.normalize('NFD', s)
                  if unicodedata.category(c) != 'Mn')

districts_raw = df_diputados["Distrito"].unique()
districts = []
for district in districts_raw:
    
    # Replace space with underscore
    district_clean = district.replace(" ", "_")
    
    # Replace accented characters
    district_clean = stripAccents(district_clean)
    
    # Replace district names with clean names: Entre Ríos --> Entre_Rios
    df_diputados = df_diputados.replace(district, district_clean)
    
    
    districts.append(district_clean)
    
# Remove nan 
df = df_diputados[~df_diputados["Agrupacion"].isnull()]   
    
# Create pandas with votes per party per district
country_dict = {}
for district in districts:
    df_district = df.loc[df["Distrito"] == district]
    parties_district = df_district["Agrupacion"].unique()
    dict_district = {}
    
    for party in parties_district:
        n_votes = df_district.loc[df_district["Agrupacion"]== party]["votos"].sum()
        dict_district[party] = n_votes
        
    country_dict[district] = dict_district

df_votes_tot = pd.DataFrame(country_dict)
df_votes_tot = df_votes_tot.fillna(0)

# Get general data
parties = df["Agrupacion"].unique().tolist()
n_votes_total = df["votos"].sum()

## Functions

In [13]:
# Create mapping to [0, 254]
def createDiscreteLinearMapping(input_min, input_max, output_min, output_max):
    k = (output_max - output_min)/(input_max - input_min)
    m = output_max - input_max * k   
    return lambda x: int(x*k + m)

# Append 'linear' column to dataframe
def appendLinearValues(df, column_name):
    val_max = df[column_name].max()
    val_min = df[column_name].min()
    mapping = createDiscreteLinearMapping(val_min, val_max, 0, 254)
    df_result = df.copy()
    df_result["linear"] = df[column_name].apply(mapping)
    return df_result


# Create dictionary with min, mid and max values of column
def createMinMaxDict(df, column_name):
    val_max = df[column_name].max()
    val_min = df[column_name].min()
    val_mid = (val_max + val_min)/2
    dict_percent = { "val_min": val_min, "val_mid": val_mid, "val_max": val_max}
    return dict_percent


# Create a list of colors
def createColorList(color_map_name):
    cmap = plt.get_cmap(color_map_name, 255)  
    color_list = []
    for i in range(cmap.N):
        rgba = cmap(i)
        hex_color = matplotlib.colors.rgb2hex(rgba)[1:]
        color_list.append(hex_color)
    return color_list

# Append color column to dateframe
def appendHexColors(df, column_name, color_map_name):
    color_list = createColorList(color_map_name)
    df_result = df.copy()
    df_result["color_hex"] = df[column_name].apply(lambda x: color_list[int(x)])
    
    return df_result



# Create svg gradient
def createSvgGradient(color_map_name = 'RdPu'):
    cmap = plt.get_cmap(color_map_name, 255)
    text_list = []
    for i in reversed(range(cmap.N)):
        rgba = cmap(i)
        # rgb2hex accepts rgb or rgba
        percentage = 100 - i/255 * 100
        hex_color = matplotlib.colors.rgb2hex(rgba)
        text_row = f"""<stop offset="{percentage}%" stop-color="{hex_color}"/>"""

        text_list.append(text_row)

    gradient_text = """
    {}
    """.format("\n".join(text_list))
    
    return gradient_text

# Create new svg from template file
def createSvgProvinces(df, dict_min_max, color_map_name, output_file):
    with open("images/argentina_provinces_template.svg", 'r') as f_template:

        text = f_template.read()
        # Fill provinces
        for idx, row in df.iterrows():
            district = row.name
            color_hex = row.color_hex
            template_val = f"\\{{{district}\\}}"
            text = re.sub(template_val, color_hex, text)

        # Insert min, mid and max values in bar
        for key, value in dict_min_max.items():
            template_val = f"\\{{{key}\\}}"
            value_text = f"{value*100:.0f}"
            text = re.sub(template_val, value_text, text)

        # Fix color bar colors
        text_gradient = createSvgGradient(color_map_name)
        text = re.sub("{gradient}", text_gradient, text)


        with open(f'images/{output_file}.svg', 'w') as f_output:
            f_output.write(text)

# Calculations

## Political dominance

### Create dataframe for dominance

In [47]:
# Create data frame for dominance
dict_dominance = {}
dict_winner = {}

for district in districts:
    df_district = df_votes_tot[district]
    
    votes_district_tot = df_district.sum()
    votes_district_winner = df_district.max()
    name_winner = df_district.idxmax()
    votes_district_runner_up = df_district.nlargest(2)[1]
    
    dominance = (votes_district_winner-votes_district_runner_up)/votes_district_tot
    
    dict_dominance[district] = dominance
    dict_winner[district] = name_winner
    
    # print(district)
    # print(f"{name_winner} had a lead of {dominance}")
    # print()
    
df_dominance = pd.DataFrame({"winner": dict_winner, "dominance":dict_dominance})
df_winner = pd.DataFrame({"winner":dict_winner})

### Get color values and create svg

In [48]:
color_map_name = "RdPu"

# Append color column to dataframe
df_dominance = appendLinearValues(df_dominance, "dominance")
df_dominance = appendHexColors(df_dominance, "linear", color_map_name)

# Create svg file
dict_min_max = createMinMaxDict(df_dominance, "dominance")
createSvgProvinces(df_dominance, dict_min_max, color_map_name, "test")

In [69]:
df_diputados

Unnamed: 0,Agrupacion,Cargo,Codigo,Distrito,Establecimiento,Fecha,IdCargo,IdCircuito,IdDistrito,IdSeccion,Mesa,Seccion,electores,envio,idAgrupacion,idAgrupacionInt,tipoVoto,votos
62,,DIPUTADOS NACIONALES,6796,Cordoba,ESC.NAC.DE COMERCIO,14-11-2021 18:30,3,00112,4,8,04933X,Juárez Celman,346,1,,,blancos,1
63,,DIPUTADOS NACIONALES,6796,Cordoba,ESC.NAC.DE COMERCIO,14-11-2021 18:30,3,00112,4,8,04933X,Juárez Celman,346,1,,,nulos,4
64,,DIPUTADOS NACIONALES,6796,Cordoba,ESC.NAC.DE COMERCIO,14-11-2021 18:30,3,00112,4,8,04933X,Juárez Celman,346,1,,,recurridos,0
65,,DIPUTADOS NACIONALES,6796,Cordoba,ESC.NAC.DE COMERCIO,14-11-2021 18:30,3,00112,4,8,04933X,Juárez Celman,346,1,,,comando,0
66,,DIPUTADOS NACIONALES,6796,Cordoba,ESC.NAC.DE COMERCIO,14-11-2021 18:30,3,00112,4,8,04933X,Juárez Celman,346,1,,,impugnados,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2665125,,DIPUTADOS NACIONALES,78,Formosa,JARDIN DE INTANTES FATIMA,15-11-2021 02:25,3,0008A,9,1,00561X,Formosa,334,96,,,comando,0
2665126,,DIPUTADOS NACIONALES,78,Formosa,JARDIN DE INTANTES FATIMA,15-11-2021 02:25,3,0008A,9,1,00561X,Formosa,334,96,,,impugnados,0
2665127,JUNTOS POR FORMOSA LIBRE,DIPUTADOS NACIONALES,78,Formosa,JARDIN DE INTANTES FATIMA,15-11-2021 02:25,3,0008A,9,1,00561X,Formosa,334,96,502.0,146.0,positivo,133
2665128,FRENTE DE TODOS,DIPUTADOS NACIONALES,78,Formosa,JARDIN DE INTANTES FATIMA,15-11-2021 02:25,3,0008A,9,1,00561X,Formosa,334,96,501.0,145.0,positivo,112


## Amount of blank votes

In [9]:
dict_blank = {}

for district in districts:
    df_district = df_diputados.loc[df_diputados["Distrito"] == district]

    n_votes_positive = df_district.loc[df_district["tipoVoto"] == "positivo"]["votos"].sum()
    n_votes_blank = df_district.loc[df_district["tipoVoto"] == "blancos"]["votos"].sum()
    
    percentage_blank = n_votes_blank/(n_votes_positive + n_votes_blank)
    dict_blank[district] = percentage_blank
    
df_blank = pd.DataFrame({"blank":dict_blank})

In [14]:
color_map_name = "RdPu"

# Append color column to dataframe
df_blank = appendLinearValues(df_blank, "blank")
df_blank = appendHexColors(df_blank, "linear", color_map_name)

# Create svg file
dict_min_max = createMinMaxDict(df_blank, "blank")
createSvgProvinces(df_blank, dict_min_max, color_map_name, "blank")

## Which party won in each district

In [2]:
for district in districts:
    party_winning = df_votes_tot[district].idxmax()
    print(f"{district}: {party_winning:}")

Cordoba: JUNTOS POR EL CAMBIO
Corrientes: ECO + VAMOS CORRIENTES
Salta: FRENTE DE TODOS
Santa_Cruz: CAMBIA SANTA CRUZ
Chaco: FRENTE DE TODOS
Entre_Rios: JUNTOS POR ENTRE RÍOS
Misiones: FRENTE JUNTOS POR EL CAMBIO
Santa_Fe: JUNTOS POR EL CAMBIO
Ciudad_Autonoma_de_Buenos_Aires: JUNTOS POR EL CAMBIO
San_Juan: FRENTE DE TODOS - TODOS SAN JUAN
Tucuman: FRENTE DE TODOS
Buenos_Aires: JUNTOS
La_Pampa: JUNTOS POR EL CAMBIO
Neuquen: MOVIMIENTO POPULAR NEUQUINO
Rio_Negro: JUNTOS SOMOS RIO NEGRO
Tierra_del_Fuego,_Antartida_e_Islas_del_Atlantico_Sur: FRENTE DE TODOS
Jujuy: CAMBIA JUJUY
Mendoza: CAMBIA MENDOZA
Chubut: JUNTOS POR EL CAMBIO CHUBUT
La_Rioja: FRENTE DE TODOS
San_Luis: UNIDOS POR SAN LUIS
Catamarca: FRENTE DE TODOS
Santiago_del_Estero: FRENTE CIVICO POR SANTIAGO
Formosa: FRENTE DE TODOS
