This notebook is used to clean, organize and unify all of the electoral data for Catalunya.

# 1. Libraries

In [3]:
import pandas as pd
import os
import re

cwd = os.getcwd()

# 2. Importing the data

Unlike in the Catalonia case, here, our data is unified in one file that includes participation, turnout, and voting outcomes. The only issue we encounter is that in 2023 the data publication format changed for both general and regional elections, so we have to change that.
* For the regional elections, it's just the column names that are different and, also, there are two additional columns ('COD_MUNIPROVI','COD_MESA'), and 'NUM_ELEC' is not a column, so we drop and add these columns, respectively, and we rename the entire column list so it matches the column names in the other years' dataframes, for the concatenation.

* For the general elections, the data is presented completely differently: the votes to each party are presented as columns, so each CUSEC (the most granular territorial section for which votes are counted) only represents one row. In the previous years, each CUSEC was repeated for as many rows as parties in that district (1 CUSEC instance = 1 district) had been received votes. To fix this, we melt the 2023 dataframe on all the columns that we want to mantain (those common to the other datasets). We then continue with the process normally.

In [48]:
# Define the base directory where the subfolders are located
base_path = os.path.join(cwd, 'data/1_election_outcomes/comunitat_valenciana_results')

# Compile regular expressions to match the two file types
# The regex captures the year (one or more digits) and the single digit
pattern_reg = re.compile(r"resultados-elecciones-autonomicas-(\d)-(\d{4}).csv$")
pattern_gen = re.compile(r"resultados-elecciones-generales-(\d)-(\d{4}).csv$")

# Dictionaries to store the DataFrames
dfs_reg = {}
dfs_gen = {}

# Walk through the folder structure
for root, dirs, files in os.walk(base_path):
    for file in files:
        file_path = os.path.join(root, file)
        # Match to either regional or state election file name pattern
        match_reg = pattern_reg.match(file)
        match_gen = pattern_gen.match(file)

        
        
        if match_reg:
            # Save DataFrame in the dictionary with filename as key (or any other unique key)
            digit, year = match_reg.group(1), match_reg.group(2)
            if year == '2023': 
                df = pd.read_csv(file_path, delimiter=',', encoding='utf-8', encoding_errors='ignore')
                df.columns = ['ANYO', 'COD_PROV', 'PROVINCIA','COD_MUNICIPIO', 'COD_MUNIPROVI', 
                              'MUNICIPIO', 'COD_COMARCA', 'COMARCA', 'COD_MESA', 'DISTRITO', 'SECCION', 
                              'MESA', 'CENSO', 'VOTANTES', 'NULOS', 'BLANCOS', 'VOTOS', 
                              'CANDIDATO_COD', 'CANDIDATO_SIGLAS','CANDIDATO_DESC']
                df['NUM_ELEC'] = digit
                df.drop(labels=['COD_MUNIPROVI','COD_MESA'], axis=1, inplace=True)
            else:
                df = pd.read_csv(file_path, delimiter=';', encoding='utf-8', encoding_errors='ignore')
            dfs_reg[file] = df

        elif match_gen:
            digit, year = match_gen.group(1), match_gen.group(2)
            if year == '2023':
                df_wide = pd.read_csv(file_path, delimiter=';', encoding='utf-8', encoding_errors='ignore')
                df_wide = df_wide.iloc[:,0:23]
                df_wide['ANYO'] = year
                df_wide['ANYO'] = df_wide['ANYO'].astype(int)
                df_wide['NUM_ELEC'] = digit
                df_long = df_wide.melt(id_vars=['ANYO', 'NUM_ELEC', 'COD_PROV', 'PROVINCIA', 'MUNICIPIO', 'DISTRITO',
                                                'MESA','CANDIDATURAS', 'CENSO', 'VOTANTES', 
                                               'VALIDOS', 'BLANCOS','ABSTENCIONES', 'NULOS'], 
                           var_name='CANDIDATO_SIGLAS', 
                           value_name='VOTOS')
                df = df_long
            elif year != '2023':
                df = pd.read_csv(file_path, delimiter=';', encoding='utf-8', encoding_errors='ignore')
            # Save DataFrame in the dictionary with filename as key
            dfs_gen[file] = df

# Print keys to check if everything is okay
print(dfs_reg.keys())
print(dfs_gen.keys())

# Create data frames with the results for all elections
df_reg = pd.concat(dfs_reg, ignore_index=True)
df_gen = pd.concat(dfs_gen, ignore_index=True)

dict_keys(['resultados-elecciones-autonomicas-1-1987.csv', 'resultados-elecciones-autonomicas-1-1991.csv', 'resultados-elecciones-autonomicas-1-1995.csv', 'resultados-elecciones-autonomicas-1-1999.csv', 'resultados-elecciones-autonomicas-1-2003.csv', 'resultados-elecciones-autonomicas-1-2007.csv', 'resultados-elecciones-autonomicas-1-2011.csv', 'resultados-elecciones-autonomicas-1-2015.csv', 'resultados-elecciones-autonomicas-1-2019.csv', 'resultados-elecciones-autonomicas-1-2023.csv'])
dict_keys(['resultados-elecciones-generales-1-1986.csv', 'resultados-elecciones-generales-1-1989.csv', 'resultados-elecciones-generales-1-1993.csv', 'resultados-elecciones-generales-1-1996.csv', 'resultados-elecciones-generales-1-2000.csv', 'resultados-elecciones-generales-1-2004.csv', 'resultados-elecciones-generales-1-2008.csv', 'resultados-elecciones-generales-1-2011.csv', 'resultados-elecciones-generales-1-2015.csv', 'resultados-elecciones-generales-1-2016.csv', 'resultados-elecciones-generales-1-20

In [5]:
df_reg.head()

Unnamed: 0,ANYO,NUM_ELEC,COD_PROV,PROVINCIA,COD_COMARCA,COMARCA,COD_MUNICIPIO,MUNICIPIO,DISTRITO,SECCION,...,CENSO,VOTANTES,VALIDOS,BLANCOS,ABSTENCIONES,NULOS,CANDIDATO_COD,CANDIDATO_SIGLAS,CANDIDATO_DESC,VOTOS
0,1987,1,3.0,Alicante,30.0,La Marina Alta,3001,"Atzúbia, l'",1,1,...,443.0,376,376.0,2,67.0,0,1,PSOE,Partido Socialista Obrero Español,20
1,1987,1,3.0,Alicante,30.0,La Marina Alta,3001,"Atzúbia, l'",1,1,...,443.0,376,376.0,2,67.0,0,6,PTE-UC,Partido de los Trabajadores de España-Unidad C...,2
2,1987,1,3.0,Alicante,30.0,La Marina Alta,3001,"Atzúbia, l'",1,1,...,443.0,376,376.0,2,67.0,0,7,LV,Los Verdes,8
3,1987,1,3.0,Alicante,30.0,La Marina Alta,3001,"Atzúbia, l'",1,1,...,443.0,376,376.0,2,67.0,0,8,PDP-CV,Partido Democráta Popular-Centristas Valencianos,1
4,1987,1,3.0,Alicante,30.0,La Marina Alta,3001,"Atzúbia, l'",1,1,...,443.0,376,376.0,2,67.0,0,9,CEV,Coalición Electoral Valenciana,0


In [37]:
df_gen.head()

Unnamed: 0,ANYO,NUM_ELEC,COD_PROV,PROVINCIA,COD_COMARCA,COMARCA,COD_MUNICIPIO,MUNICIPIO,DISTRITO,SECCION,...,CENSO,VOTANTES,VALIDOS,BLANCOS,ABSTENCIONES,NULOS,CANDIDATO_COD,CANDIDATO_SIGLAS,CANDIDATO_DESC,VOTOS
0,1986,1,3.0,Alicante,30.0,La Marina Alta,3001.0,"Atzúbia, l'",1.0,1.0,...,438.0,364.0,355.0,1.0,74.0,9.0,16.0,CP,Coalición Popular (AP-PDP-PL),209.0
1,1986,1,3.0,Alicante,30.0,La Marina Alta,3001.0,"Atzúbia, l'",1.0,1.0,...,438.0,364.0,355.0,1.0,74.0,9.0,27.0,FE-JONS,Falange Española de las JONS,1.0
2,1986,1,3.0,Alicante,30.0,La Marina Alta,3001.0,"Atzúbia, l'",1.0,1.0,...,438.0,364.0,355.0,1.0,74.0,9.0,30.0,IU,Coalición Izquierda Unida,3.0
3,1986,1,3.0,Alicante,30.0,La Marina Alta,3001.0,"Atzúbia, l'",1.0,1.0,...,438.0,364.0,355.0,1.0,74.0,9.0,36.0,LV,Los Verdes,0.0
4,1986,1,3.0,Alicante,30.0,La Marina Alta,3001.0,"Atzúbia, l'",1.0,1.0,...,438.0,364.0,355.0,1.0,74.0,9.0,37.0,MUC,Mesa para la Unidad de los Comunistas,0.0


## Cleaning + creating CUSEC

We now clean the dataframes for uniformity with the Catalunya results by applying these steps:

1. name columns with same names as the Catalonia final dataframe

2. drop columns that are not relevant
    * 'Comarca' columns do not add any value, because they are terrritorial divisions that do not add any new value (it's redundant because if we have the municipality, we do not need to know the *comarca*)
    * 'Mesa' is the "table" in which voters turn in their ballot, this division is too granular and does not add any value to our analysis as it is a "random" division within a territory. 

3. We compute the **turnout rate** and the **blank ballot rate**. Recall the meaning of these variables:
    * **turnout rate**: proportion of people who cast a valid vote out of the total census
    * **blank ballot rate**: proportion of blank votes out of the total participants (turnout, not census)

In [50]:
new_column_names = ['year_election', 'election_number', 'code_province', 'province_name', 'COD_COMARCA', 'COMARCA',
                    'code_municipality',  'municipality_name', 'code_district', 'code_section', 'MESA', 'votes_parties', 
                    'census', 'turnout', 'valid_votes', 'blank_ballots', 'abstention', 'null_votes', 
                    'candidate_code', 'candidate_siglas', 'candidate_fullname', 'candidate_votes']

unnecessary_columns = ['COD_COMARCA', 'COMARCA', 'MESA']

for df in df_reg, df_gen:

    # UNIFORMLY NAMING COLUMNS + DROPPING UNNECESSARY ONES
    df.columns = new_column_names
    df.drop(labels=unnecessary_columns, axis=1, inplace=True)

    # CREATING RATE COLUMNS
    df['turnout_rate'] = df.turnout / df.census
    df['blanc_ballot_rate'] = df.blank_ballots / df.turnout

    # PROPERLY FORMATTING EACH COMPONENT OF THE CODE
    code_2_columns = ['code_province', 'code_district']
    code_3_columns = ['code_municipality', 'code_section']
    df[code_2_columns] = df[code_2_columns].fillna(0).astype(int).astype(str).apply(lambda x: x.str.zfill(2))
    df[code_3_columns] = df[code_3_columns].fillna(0).astype(int).astype(str).apply(lambda x: x.str.zfill(3))

    # CREATING CUSEC CODE
    df['CUSEC'] = df.code_province + df.code_municipality + df.code_district + df.code_section


In [10]:
df_reg.head()

Unnamed: 0,year_election,election_number,code_province,province_name,code_municipality,municipality_name,code_district,code_section,votes_parties,census,...,blank_ballots,abstention,null_votes,candidate_code,candidate_siglas,candidate_fullname,candidate_votes,turnout_rate,blanc_ballot_rate,CUSEC
0,1987,1,3,Alicante,3001,"Atzúbia, l'",1,1,374.0,443.0,...,2,67.0,0,1,PSOE,Partido Socialista Obrero Español,20,0.848758,0.005319,3300101001
1,1987,1,3,Alicante,3001,"Atzúbia, l'",1,1,374.0,443.0,...,2,67.0,0,6,PTE-UC,Partido de los Trabajadores de España-Unidad C...,2,0.848758,0.005319,3300101001
2,1987,1,3,Alicante,3001,"Atzúbia, l'",1,1,374.0,443.0,...,2,67.0,0,7,LV,Los Verdes,8,0.848758,0.005319,3300101001
3,1987,1,3,Alicante,3001,"Atzúbia, l'",1,1,374.0,443.0,...,2,67.0,0,8,PDP-CV,Partido Democráta Popular-Centristas Valencianos,1,0.848758,0.005319,3300101001
4,1987,1,3,Alicante,3001,"Atzúbia, l'",1,1,374.0,443.0,...,2,67.0,0,9,CEV,Coalición Electoral Valenciana,0,0.848758,0.005319,3300101001


# 3. Creating Indictators

## Creating left-right index of political parties

### General elections

First, we compute the proportion of votes that each row represents, out of the total amount of votes in the corresponding election.

In [46]:
df_check = df_gen.groupby(["year_election", 'election_number'])["candidate_votes"].sum()
print(df_check[df_check == 0])  # Look for years with no votes

Series([], Name: candidate_votes, dtype: float64)


In [52]:
# Aggregate total votes per party per year and number of election
df_agg_gen = df_gen.groupby(["year_election", "election_number", "candidate_siglas", "candidate_fullname"], as_index=False)["candidate_votes"].sum()

# Compute total votes per year and election number
total_votes_per_year = df_gen.groupby(["year_election", 'election_number'])["candidate_votes"].sum().reset_index()
total_votes_per_year = total_votes_per_year.rename(columns={"candidate_votes": "total_votes_year"})

# Merge total votes per year into the aggregated DataFrame
df_agg_gen = df_agg_gen.merge(total_votes_per_year, on=["year_election", "election_number"], how='outer')

# Compute vote proportion for each party
df_agg_gen["vote_proportion"] = df_agg_gen["candidate_votes"] / df_agg_gen["total_votes_year"]

df_agg_gen.sample(5)

  key_col = Index(lvals).where(~mask_left, rvals)


Unnamed: 0,year_election,election_number,candidate_siglas,candidate_fullname,candidate_votes,total_votes_year,vote_proportion
247,2016,1.0,SOMVAL,Som Valencians,6612.0,2569820.0,0.002573
119,2000,1.0,UC-CDS,Unión Centrista-Centro Democrático y Social,913.0,2402237.0,0.00038
207,2011,1.0,PSOE,Partido Socialista Obrero Español,696050.0,2575002.0,0.27031
164,2008,1.0,EUPV-IR,Esquerra Unida del País Valencià-Izquierda Rep...,74015.0,2705206.0,0.02736
26,1989,1.0,MV,Movimiento Verde,1369.0,2110743.0,0.000649


We filter by a treshold of 0.5% of total votes the parties that we are going to consider for our analysis.

In [53]:
print('Parties before filtering:', df_agg_gen['candidate_siglas'].nunique())
df_agg_gen_filtered = df_agg_gen[df_agg_gen['vote_proportion'] >= 0.005]

print('Parties after filtering:',df_agg_gen_filtered['candidate_siglas'].nunique())

Parties before filtering: 146
Parties after filtering: 41


In [54]:
unique_parties_gen = df_agg_gen_filtered[['candidate_siglas', 'candidate_fullname']].drop_duplicates()

print('----- UNIQUE PARTIES IN SPANISH GENERAL ELECTIONS (w/ more than 0.5% votes) FROM 1986 TO 2023 -----')
for index, row in unique_parties_gen.iterrows():
    print(f"{row['candidate_siglas']}: {row['candidate_fullname']}")

----- UNIQUE PARTIES IN SPANISH GENERAL ELECTIONS (w/ more than 0.5% votes) FROM 1986 TO 2023 -----
CDS: Centro Democrático y Social
CP: Coalición Popular (AP-PDP-PL)
IU: Coalición Izquierda Unida
LV: Los Verdes
MUC: Mesa para la Unidad de los Comunistas
PRD: Partido Reformista Democrático
PSOE: Partido Socialista Obrero Español
UPV: Unitat del Poble Valencià
UV: Unió Valenciana
IU-EU: Izquierda Unida-Esquerra Unida
LV-LV: Los Verdes-Lista Verde
LVE: Los Verdes Ecologistas
PP: Partido Popular
PTE-UC: Partido de los Trabajadores de España-Unidad Comunista
RUIZ-MATEOS: Agrupación Ruiz-Mateos
EU-PV: Esquerra Unida del Pais Valencià
LV: Los Verdes/Els Verds
PP: Partido Popular/Partit Popular
EUPV: Esquerra Unida del País Valencià
EV: Els Verds-Los Verdes
UPV-BN: Unitat del Poble Valencià-Bloc Nacionalista
BLOC-VERDS: Bloc Nacionalista Valencià-Els Verds-Valencians pel Canvi
LVE: Los Verdes-Ecopacifistas
PSOE-Prog.: Partido Socialista Obrero Español
PSOE-Prog.: Partido Socialista Obrero Esp

Finally, we create the indices using the same criteria: **-3** being extreme left and **3** being extreme right.
We also include an *independentist* index because, while it is not a generalized movement, some parties do support ideas of independence of the whole Països Catalans region or very strong Valencian nationalism, and we believe this can be an important factor in voting behaviour and, thus, a confounder.

In [55]:
political_index_gen = {
    'LV': -2 , 'LV-LV': -2, 'LVE': -2, 'EV': -2, 'LV-E': -2, 'LV-LV': -2, # Green Party - quite left, not extreme
    'PODEMOS-COMPROMÍS': -2,'PODEMOS-COMPROMÍS-EUPV': -2, 'COMPROMÍS-Q': -2, # Podemos/Compromís - left
    'MÉS COMPROMÓS': -2, 'PODEMOS-EUPV': -2, 
    'VOX': 3,
    'BLOC-VERDS': -2, 'BLOC-EV': -2, 'BLOC-IDPV-EV-EE': -2, 'UPV-BN': -2, # Bloc Nacionalista - quite left, not extreme
    'PSOE-Prog.': -1, 'PSOE': -1, # PSOE - left but quite more moderate than the above
    'ERPV': -2, 'EUPV-IR': -2, # Esquerra Republicana - left
    "C's": 2,  'Cs': 2, 'PP': 2, 'RUIZ-MATEOS': 2, 'CP': 2, 'UV': 2,# parties similar/adjacent to PP - right, not extreme
    'CDS': 1, 'PRD': 1, 'UPyD': 1, # moderate alternatives to the right parties above
    'P.CANNABIS': -3, 'MUC': -3, 'UPV': -3, 'PTE-UC': -3,# extreme left
    'IU': -3, 'IU-EU': -3, 'EUPV-EV': -3, 'EUPV-LV': -3, 'EU-PV': -3, 'EUPV': -3,  'EUPV-UPeC': -3, 'ENTESA': -3, # Esquerra Unida / Izquierda Unida - extreme left / Marxist
    'PACMA': -2,
}
nationalist_index_gen = {
    'LV': 0 , 'LV-LV': 0, 'LVE': 0, 'EV': 0, 'LV-E': 0, 'LV-LV': 0, # Green Party
    'PODEMOS-COMPROMÍS': 0, 'PODEMOS-COMPROMÍS-EUPV': 0, 'COMPROMÍS-Q': 0, # Podemos/Compromís
    'MÉS COMPROMÓS': 0, 'PODEMOS-EUPV': 0, 
    'VOX': 0,
    'BLOC-VERDS': 1, 'BLOC-EV': 1, 'BLOC-IDPV-EV-EE': 1, 'UPV-BN': 1, # Bloc Nacionalista - nationalist
    'PSOE-Prog.': 0, 'PSOE': 0, # PSOE - moderate left
    'ERPV': 1, 'EUPV-IR': 0, # Esquerra Republicana del País Valencià - pro-independence
    "C's": 0, 'Cs': 0, 'PP': 0, 'RUIZ-MATEOS': 0, 'CP': 0, 'UV': 0,  # Right-wing, not nationalist
    'CDS': 0, 'PRD': 0, 'UPyD': 0, # Moderate right parties
    'P.CANNABIS': 0, 'MUC': 0, 'UPV': 1, 'PTE-UC': 0, # UPV is nationalist
    'IU': 0, 'IU-EU': 0, 'EUPV-EV': 0, 'EUPV-LV': 0, 'EU-PV': 0, 'EUPV': 0, 'EUPV-UPeC': 0, 'ENTESA': 0, # IU - leftist, but not nationalist
    'PACMA': 0, # PACMA is not nationalist
}

df_agg_gen_filtered['party_index'] = df_agg_gen_filtered['candidate_siglas'].str.strip().map(political_index_gen)
df_agg_gen_filtered['indep_bool'] = df_agg_gen_filtered['candidate_siglas'].str.strip().map(nationalist_index_gen)

df_agg_gen_filtered.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_agg_gen_filtered['party_index'] = df_agg_gen_filtered['candidate_siglas'].str.strip().map(political_index_gen)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_agg_gen_filtered['indep_bool'] = df_agg_gen_filtered['candidate_siglas'].str.strip().map(nationalist_index_gen)


Unnamed: 0,year_election,election_number,candidate_siglas,candidate_fullname,candidate_votes,total_votes_year,vote_proportion,party_index,indep_bool
0,1986,1.0,CDS,Centro Democrático y Social,182549.0,2073585.0,0.088035,1.0,0.0
1,1986,1.0,CP,Coalición Popular (AP-PDP-PL),599438.0,2073585.0,0.289083,2.0,0.0
4,1986,1.0,IU,Coalición Izquierda Unida,98380.0,2073585.0,0.047444,-3.0,0.0
5,1986,1.0,LV,Los Verdes,11780.0,2073585.0,0.005681,-2.0,0.0
6,1986,1.0,MUC,Mesa para la Unidad de los Comunistas,30054.0,2073585.0,0.014494,-3.0,0.0


### Regional elections

We repeat the same process as above

In [56]:
# Aggregate total votes per party per year and number of election
df_agg_reg = df_reg.groupby(["year_election", "election_number", "candidate_siglas", "candidate_fullname"], as_index=False)["candidate_votes"].sum()

# Compute total votes per year and election number
total_votes_per_year = df_reg.groupby(["year_election", 'election_number'])["candidate_votes"].sum().reset_index()
total_votes_per_year = total_votes_per_year.rename(columns={"candidate_votes": "total_votes_year"})

# Merge total votes per year into the aggregated DataFrame
df_agg_reg = df_agg_reg.merge(total_votes_per_year, on=["year_election", "election_number"])

# Compute vote proportion for each party
df_agg_reg["vote_proportion"] = df_agg_reg["candidate_votes"] / df_agg_reg["total_votes_year"]

df_agg_reg.sample(10)

Unnamed: 0,year_election,election_number,candidate_siglas,candidate_fullname,candidate_votes,total_votes_year,vote_proportion
151,2015,1,JUNTS,Los Verdes Ecopacifistas-Junts,271,2437261,0.000111
28,1991,1,UPV,Unitat del Poble Valencià,73580,1983209,0.037101
202,2023,1,PUM+J,POR UN MUNDO MAS JUSTO,1373,2435951,0.000564
132,2011,1,UxV,Units x València,3203,2387232,0.001342
29,1991,1,UV,Unión Valenciana,207561,1983209,0.104659
162,2015,1,PSOE,Partido Socialista Obrero Español,508448,2437261,0.208615
143,2015,1,EUPV-VERDS-ERPV-AS:AC,Esquerra Unida País Valencià-Los Verdes-Esquer...,35255,2437261,0.014465
183,2019,1,Poble Democràtic,Poble Democràtic PODEM,2922,2630123,0.001111
91,2007,1,PCPE,Partido Comunista de los Pueblos de España,3702,2398106,0.001544
167,2015,1,UPyD,Unión Progreso y Democracia,28687,2437261,0.01177


In [16]:
print('Parties before filtering:', df_agg_reg['candidate_siglas'].nunique())
df_agg_reg_filtered = df_agg_reg[df_agg_reg['vote_proportion'] >= 0.005]

print('Parties after filtering:',df_agg_reg_filtered['candidate_siglas'].nunique())

Parties before filtering: 132
Parties after filtering: 44


In [32]:
unique_parties_reg = df_agg_reg_filtered[['candidate_siglas', 'candidate_fullname']].drop_duplicates()

print('----- UNIQUE PARTIES IN GENERALITAT VALENCIANA ELECTIONS (w/ more than 0.5% votes) FROM 1986 TO 2023 -----')
for index, row in unique_parties_reg.iterrows():
    print(f"{row['candidate_siglas']}: {row['candidate_fullname']}")

----- UNIQUE PARTIES IN GENERALITAT VALENCIANA ELECTIONS (w/ more than 0.5% votes) FROM 1986 TO 2023 -----
CDS: Centro Democrático y Social
CEV: Coalición Electoral Valenciana
FAP: Federación de Partidos de Alianza Popular
IU-UPV: Coalición Izquierda Unida-Unitat del Poble Valencià
LV: Los Verdes
PDP-CV: Partido Democráta Popular-Centristas Valencianos
PSOE: Partido Socialista Obrero Español
PTE-UC: Partido de los Trabajadores de España-Unidad Comunista
UV: Unión Valenciana
EUPV: Esquerra Unida del País Valencià
PP: Partido Popular
PSPV-PSOE: Partido Socialista Obrero Español
UPV: Unitat del Poble Valencià
EU-EV: Esquerra Unida-Els Verds
UPV-BN: Unitat del Poble Valencià-Bloc Nacionalista
UV-FICVA-CCV: Unión Valenciana-Independents-Centristes
BNV-EV: Bloc Nacionalista Valencià-Els Verds
PSOE-P: Coalición Electoral PSOE-Progresistas
UV: Unió Valenciana
BLOC-EV: Bloc Nacionalista Valencià-Esquerra Verda
ENTESA: Esquerra Unida+Els Verds/Los Verdes+Esquerra Valenciana
L'ENTESA: Esquerra Un

In [18]:
political_index_reg = {
    'LV': -2 , 'VERDES': -2, 'EU-EV': -2, 'VERDS': -2, 'EUPV-VERDS-ERPV-AS:AC': -2, 'EUPV-EV-ERPV-AS:AC': -2, # Green Party - quite left, not extreme
    'COMPROMÍS PV': -2,  'UNIDES PODEM-EUPV': -2, 'COMPROMS': -2, 'COALICIÓ COMPROMÍS': -2, 
    'PODEMOS/PODEM': -2, 'COMPROMÍS': -2, 'COMPROMíS': -2, 'UP-EUPV': -2, # Podemos/Compromís - left
    'BLOC-EV': -2, 'UPV-BN': -2, 'IU-UPV': -2, 'BNV-EV': -2, # Bloc Nacionalista - quite left, not extreme
    'PSOE': -1, 'PSPV-PSOE': -1, 'PSOE-P': -1, 'P.S.O.E.': -1, # PSOE - left but quite more moderate than the above
    'GANEMOS': -2, # left coalition
    'VOX': 3, # extreme right
    "C's": 2, 'PP': 2,  'UV': 2, 'C Va': 2, 'Cs': 2, 'PARTIDO POPULAR': 2, 'FAP': 2, 'CEV': 2, # parties similar/adjacent to PP or Ciudadanos - right, not extreme
    'CDS': 1, 'UPyD': 1, 'UNIO-UNIÓN': 1, 'PDP-CV': 1, 'UV-LVE': 1, # moderate alternatives to the right parties above
    'UV-FICVA-CCV': 2, # (valencia) nationalist right
    'UPV': -3, 'PTE-UC': -3,# extreme left
    'EUPV': -3, 'ENTESA': -3, "L'ENTESA": -3, # Esquerra Unida / Izquierda Unida - extreme left / Marxist
    'PACMA': -2,
}


nationalist_index_reg = {
    'LV': 0 , 'VERDES': 0, 'EU-EV': 0, 'VERDS': 0, 'EUPV-VERDS-ERPV-AS:AC': 0, 'EUPV-EV-ERPV-AS:AC': 0, # Green Party - not nationalist
    'COMPROMÍS PV': 0,  'UNIDES PODEM-EUPV': 0, 'COMPROMS': 0, 'COALICIÓ COMPROMÍS': 0, 
    'PODEMOS/PODEM': 0, 'COMPROMÍS': 0, 'COMPROMíS': 0, 'UP-EUPV': 0, # Podemos/Compromís - not nationalist
    'BLOC-EV': 1, 'UPV-BN': 1, 'IU-UPV': 1, 'BNV-EV': 1, # Bloc Nacionalista - nationalist
    'PSOE': 0, 'PSPV-PSOE': 0, 'PSOE-P': 0, 'P.S.O.E.': 0, # PSOE - not nationalist
    'GANEMOS': 0, # left coalition, not nationalist
    'VOX': 0, # extreme right but not nationalist for Valencia
    "C's": 0, 'PP': 0,  'UV': 0, 'C Va': 0, 'Cs': 0, 'PARTIDO POPULAR': 0, 'FAP': 0, 'CEV': 0, # not nationalist
    'CDS': 0, 'UPyD': 0, 'UNIO-UNIÓN': 0, 'PDP-CV': 0, 'UV-LVE': 0, # not nationalist
    'UV-FICVA-CCV': 1, # nationalist
    'UPV': 1, 'PTE-UC': 0, # UPV was nationalist, PTE-UC not
    'EUPV': 0, 'ENTESA': 0, "L'ENTESA": 0, # not nationalist
    'PACMA': 0, # animal rights, not nationalist
}

df_agg_reg_filtered['party_index'] = df_agg_reg_filtered['candidate_siglas'].str.strip().map(political_index_reg)
df_agg_reg_filtered['indep_bool'] = df_agg_reg_filtered['candidate_siglas'].str.strip().map(nationalist_index_reg)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_agg_reg_filtered['party_index'] = df_agg_reg_filtered['candidate_siglas'].str.strip().map(political_index_reg)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_agg_reg_filtered['indep_bool'] = df_agg_reg_filtered['candidate_siglas'].str.strip().map(nationalist_index_reg)


## Votes to incumbent government

### General elections

The idea in this section is to create a column with the vote proportion per location and year to the incumbent government.
- General elections: see https://es.wikipedia.org/wiki/Anexo:Presidentes_del_Gobierno_de_Espa%C3%B1a. Incumbent government for each election (i.e., party in the government before that election):
  - 1996: PSOE,
  - 2000: PP,
  - 2004: PP,
  - 2008: PSOE,
  - 2011: PSOE,
  - 2015: PP,
  - 2016: PP,
  - 2019: PP (but Sánchez won a vote of no confidence towards the PP government) - will consider as incumbent PP for both elections of 2019,
  - 2023: PSOE + UP.

In [19]:
political_index_gen = {
    'LV': -2 , 'LV-LV': -2, 'LVE': -2, 'EV': -2, 'LV-E': -2, 'LV-LV': -2, # Green Party - quite left, not extreme
    'PODEMOS-COMPROMÍS': -2,'PODEMOS-COMPROMÍS-EUPV': -2, 'COMPROMÍS-Q': -2, # Podemos/Compromís - left
    'MÉS COMPROMÓS': -2, 'PODEMOS-EUPV': -2, 
    'VOX': 3,
    'BLOC-VERDS': -2, 'BLOC-EV': -2, 'BLOC-IDPV-EV-EE': -2, 'UPV-BN': -2, # Bloc Nacionalista - quite left, not extreme
    'PSOE-Prog.': -1, 'PSOE': -1, # PSOE - left but quite more moderate than the above
    'ERPV': -2, 'EUPV-IR': -2, # Esquerra Republicana - left
    "C's": 2,  'Cs': 2, 'PP': 2, 'RUIZ-MATEOS': 2, 'CP': 2, 'UV': 2,# parties similar/adjacent to PP - right, not extreme
    'CDS': 1, 'PRD': 1, 'UPyD': 1, # moderate alternatives to the right parties above
    'P.CANNABIS': -3, 'MUC': -3, 'UPV': -3, 'PTE-UC': -3,# extreme left
    'IU': -3, 'IU-EU': -3, 'EUPV-EV': -3, 'EUPV-LV': -3, 'EU-PV': -3, 'EUPV': -3,  'EUPV-UPeC': -3, 'ENTESA': -3, # Esquerra Unida / Izquierda Unida - extreme left / Marxist
    'PACMA': -2,
}

In [34]:
df_agg_gen_filtered[df_agg_gen_filtered['year_election'] == '2023'].candidate_siglas.unique()

array([], dtype=object)

In [20]:
incumbent_parties_by_year_gen = {
    "1996": {"PSC", 'PSOE-Prog.', 'PSOE'},
    "2000": {"PP"},
    "2004": {"PP"},
    "2008": {"PSC", 'PSOE-Prog.', 'PSOE'},
    "2011": {"PSC", 'PSOE-Prog.', 'PSOE'},
    "2015": {"PP"},
    "2016": {"PP"},
    "2019": {"PP"},
    "2023": {"PSC", 'PSOE-Prog.', 'PSOE'} # ARREGLAR: "SUMAR - ECP"}
}

def is_incumbent(row):
    year = str(row["year_election"])  # Convert year to string for dictionary lookup
    party = row["candidate_siglas"]
    return 1 if year in incumbent_parties_by_year_gen and party in incumbent_parties_by_year_gen[year] else 0

df_agg_gen_filtered["incumbent_party"] = df_agg_gen_filtered.apply(is_incumbent, axis=1)

df_agg_gen_filtered.tail(10)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_agg_gen_filtered["incumbent_party"] = df_agg_gen_filtered.apply(is_incumbent, axis=1)


Unnamed: 0,year_election,election_number,candidate_siglas,candidate_fullname,candidate_votes,total_votes_year,vote_proportion,party_index,indep_bool,incumbent_party
242,2016,1,PODEMOS-COMPROMÍS-EUPV,Compromís-Podemos-Eupv: A la Valenciana,655895.0,2569820.0,0.25523,-2.0,0.0,0
243,2016,1,PP,Partido Popular,917405.0,2569820.0,0.356992,2.0,0.0,1
244,2016,1,PSOE,Partido Socialista Obrero Español,537984.0,2569820.0,0.209347,-1.0,0.0,0
255,2019,2,Cs,Ciudadanos-Partido de la Ciudadanía,195367.0,2506637.0,0.07794,2.0,0.0,0
258,2019,2,MÉS COMPROMÍS,Més Compromís,175016.0,2506637.0,0.069821,,,0
260,2019,2,PACMA,Partido Animalista contra el Maltrato Animal,27125.0,2506637.0,0.010821,-2.0,0.0,0
264,2019,2,PODEMOS-EUPV,Unidas Podemos-Unides Podem,337714.0,2506637.0,0.134728,-2.0,0.0,0
265,2019,2,PP,Partido Popular,583108.0,2506637.0,0.232626,2.0,0.0,1
266,2019,2,PSOE,Partido Socialista Obrero Español,697276.0,2506637.0,0.278172,-1.0,0.0,0
271,2019,2,VOX,VOX,466900.0,2506637.0,0.186266,3.0,0.0,0


### Regional elections

Below we map the incumbent parties for the elections: PP ruled in valencia for 20 consecutive years, so it is the incumbent party up to 2015. In the 2015 election, the left coalition of PSOE + Compromís took over, being substituted by PSOE + Unides Podem in 2019. (Notice that a party becomes the incumbent party in the successive election, so the party elected in 2019 is the incumbent party of 2023.)

Notice also that in the dictionary we included the `candidate_siglas` value as it appears for that year, as every year the party initials were listed differently for some parties (for instance for PSOE: `PSOE`, `P.S.O.E`, `PSV` (Valencian branch of the party))

In [23]:
# Define the dictionary with integer keys
incumbent_parties_by_year_reg = {
    "1999": {"PP"}, #PP
    "2003": {"PP"}, #PP
    "2006": {"PP"}, #PP
    "2011": {"PP"}, #PP
    "2015": {"PP"}, #PP
    "2015": {"PP"}, #PP
    "2019": {'P.S.O.E.', 'COMPROMíS'},  # PSOE + COMPROMÍS
    "2023": {"PSOE",'UP-EUPV'} # PSOE + UNIDES PODEM
}

def is_incumbent(row):
    year = str(row["year_election"])  # Convert year to string for dictionary lookup
    party = row["candidate_siglas"]
    return 1 if year in incumbent_parties_by_year_reg and party in incumbent_parties_by_year_reg[year] else 0

df_agg_reg_filtered["incumbent_party"] = df_agg_reg_filtered.apply(is_incumbent, axis=1)

df_agg_reg_filtered[['year_election', 'election_number', 'candidate_siglas', 'party_index', 'indep_bool','incumbent_party']].tail(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_agg_reg_filtered["incumbent_party"] = df_agg_reg_filtered.apply(is_incumbent, axis=1)


Unnamed: 0,year_election,election_number,candidate_siglas,party_index,indep_bool,incumbent_party
182,2019,1,PP,2,0,0
186,2019,1,UNIDES PODEM-EUPV,-2,0,0
187,2019,1,VOX,3,0,0
191,2023,1,COMPROMS,-2,0,0
193,2023,1,Cs,2,0,0
198,2023,1,PACMA,-2,0,0
200,2023,1,PP,2,0,0
201,2023,1,PSOE,-1,0,1
206,2023,1,UP-EUPV,-2,0,1
207,2023,1,VOX,3,0,0


# 4. Final Index computation

Below we, first, filter the initial dataframe by those parties that we considered relevant and, next, we compute the final indices. We do that by doing a weighted aggregation of the index columns' values by the vote proportion. We do that for each unique combination of `CUSEC` + `year_election` + `election_number`.

In [26]:
# FOR GENERAL ELECTIONS:
df_gen_filtered = df_gen[df_gen['candidate_siglas'].isin(df_agg_gen_filtered['candidate_siglas'])]

df_gen_final = df_gen_filtered.merge(
    df_agg_gen_filtered[['year_election', 'election_number', 'candidate_siglas', 'vote_proportion', 'party_index', 'indep_bool', 'incumbent_party']], 
    on=['year_election', 'election_number', 'candidate_siglas'], 
    how='left')

# FOR REGIONAL ELECTIONS:
df_reg_filtered = df_reg[df_reg['candidate_siglas'].isin(df_agg_reg_filtered['candidate_siglas'])]

df_reg_final = df_reg_filtered.merge(
    df_agg_reg_filtered[['year_election', 'election_number', 'candidate_siglas', 'vote_proportion', 'party_index', 'indep_bool', 'incumbent_party']], 
    on=['year_election', 'election_number', 'candidate_siglas'], 
    how='left')

In [None]:
df_reg_final.columns

['province_name', 'municipality_name', 'votes_parties', 'census', 'turnout', 'valid_votes',
       'blank_ballots', 'abstention', 'null_votes', 'turnout_rate', 'blanc_ballot_rate',]

Index(['year_election', 'election_number', 'code_province', 'province_name',
       'code_municipality', 'municipality_name', 'code_district',
       'code_section', 'votes_parties', 'census', 'turnout', 'valid_votes',
       'blank_ballots', 'abstention', 'null_votes', 'candidate_code',
       'candidate_siglas', 'candidate_fullname', 'candidate_votes',
       'turnout_rate', 'blanc_ballot_rate', 'CUSEC', 'vote_proportion',
       'party_index', 'indep_bool', 'incumbent_party'],
      dtype='object')

In [28]:
#### GENERAL elections ####
df_gen_final['weighted_party_index'] = df_gen_final['party_index'] * df_gen_final['vote_proportion']
df_gen_final['weighted_indep_index'] = df_gen_final['indep_bool'] * df_gen_final['vote_proportion']
df_gen_final['weighted_incumbent_index'] = df_gen_final['incumbent_party'] * df_gen_final['vote_proportion']

# we create and agg_dict with a map of the columns we want to sum (the weighted indices)
agg_dict = {
    'weighted_party_index': 'sum',
    'weighted_indep_index': 'sum',
    'weighted_incumbent_index': 'sum'
}

# + a map of the columns for which we want to keep the first value 
# (these values are equal accross all instances of a unique CUSEC, year and election number combination)
cols_to_keep = ['province_name', 'municipality_name', 'votes_parties', 'census', 'turnout', 'valid_votes',
                'blank_ballots', 'abstention', 'null_votes', 'turnout_rate', 'blanc_ballot_rate',]
for col in cols_to_keep:
    agg_dict[col] = 'first'

# aggregate weighted values to get one row per CUSEC
df_gen_final = df_gen_final.groupby(['year_election', 'election_number', 'CUSEC'], as_index=False).agg(agg_dict)

# rename columns for clarity
rename_map = {'weighted_party_index': 'political_index',
                             'weighted_indep_index': 'indep_index',
                             'weighted_incumbent_index': 'incumbent_index'}
df_gen_final.rename(columns=rename_map, inplace=True)

#### REGIONAL ELECTIONS #### 
# we apply the exact same process 
df_reg_final['weighted_party_index'] = df_reg_final['party_index'] * df_reg_final['vote_proportion']
df_reg_final['weighted_indep_index'] = df_reg_final['indep_bool'] * df_reg_final['vote_proportion']
df_reg_final['weighted_incumbent_index'] = df_reg_final['incumbent_party'] * df_reg_final['vote_proportion']

df_reg_final = df_reg_final.groupby(['year_election', 'election_number', 'CUSEC'], as_index=False).agg(agg_dict)

df_reg_final.rename(columns=rename_map, inplace=True)

In [29]:
df_gen_final.head()

Unnamed: 0,year_election,election_number,CUSEC,political_index,indep_index,incumbent_index,province_name,municipality_name,votes_parties,census,turnout,valid_votes,blank_ballots,abstention,null_votes,turnout_rate,blanc_ballot_rate
0,1986,1,1299001001,0.008611,0.019424,0.0,,Extranjeros Castellón,173.0,1047.0,174.0,173.0,0.0,873.0,1.0,0.166189,0.0
1,1986,1,399001001,0.008611,0.019424,0.0,,Extranjeros Alicante,812.0,3327.0,829.0,812.0,0.0,2498.0,17.0,0.249173,0.0
2,1986,1,4699001001,0.008611,0.019424,0.0,,Extranjeros Valencia,1756.0,7398.0,1756.0,1756.0,0.0,5642.0,0.0,0.237361,0.0
3,1986,1,3300101001,0.008611,0.019424,0.0,Alicante,"Atzúbia, l'",354.0,438.0,364.0,355.0,1.0,74.0,9.0,0.83105,0.002747
4,1986,1,3300201001,0.017221,0.038848,0.0,Alicante,Agost,599.0,748.0,614.0,602.0,3.0,134.0,12.0,0.820856,0.004886


In [57]:
df_reg_final.head()

Unnamed: 0,year_election,election_number,CUSEC,political_index,indep_index,incumbent_index,province_name,municipality_name,votes_parties,census,turnout,valid_votes,blank_ballots,abstention,null_votes,turnout_rate,blanc_ballot_rate
0,1987,1,1299001001,0.148676,0.080168,0.0,,Extranjeros Castellón,137.0,361.0,139,137.0,0,222.0,2,0.385042,0.0
1,1987,1,399001001,0.148676,0.080168,0.0,,Extranjeros Alicante,505.0,1294.0,569,506.0,1,725.0,63,0.439722,0.001757
2,1987,1,4699001001,0.148676,0.080168,0.0,,Extranjeros Valencia,1249.0,2983.0,1255,1249.0,0,1728.0,6,0.420717,0.0
3,1987,1,3300101001,0.148676,0.080168,0.0,Alicante,"Atzúbia, l'",374.0,443.0,376,376.0,2,67.0,0,0.848758,0.005319
4,1987,1,3300201001,0.297351,0.160336,0.0,Alicante,Agost,584.0,702.0,589,584.0,0,113.0,5,0.839031,0.0
