# Session attendees exploration

Notebook for merging two datasets:

- [../data/session_29-12-2020_attendees.csv](../data/session_29-12-2020_attendees.csv): dataset with the list of attendees to the session (including not only senators but also the session's president and secretaires). This dataset was manually enhanced with the senators vote decission.
- [../data/senators_data.csv](../data/senators_data.csv): dataset downloaded form the [Senate webpage](https://www.senado.gob.ar/), with information about the senators (their labor period, their party, etc).

In [5]:
import os
import pandas as pd

from utilities import preprocess_name

In [6]:
here = os.getcwd()
project_path = os.path.dirname(here)
data_path = os.path.join(project_path, "data")

In [7]:
session_attendees_path = os.path.join(data_path, "session_29-12-2020_attendees.csv")
session_attendees = pd.read_csv(
    session_attendees_path, sep="|", names=["role", "name"]
    )
session_attendees.head(2)

Unnamed: 0,role,name
0,PRESIDENTA,Cristina Fernandez De Kirchner
1,PRESIDENTA PROVISIONAL,Claudia Ledesma Abdala


In [8]:
session_votes_path = os.path.join(data_path, "session_29-12-2020_votes.csv")
session_votes = pd.read_csv(session_votes_path, sep=",")
session_votes.head(2)

Unnamed: 0,name,vote
0,Ana Claudia Almirón,positivo
1,Roberto Gustavo Basualdo,negativo


In [9]:
session_attendees.shape

(82, 2)

In [10]:
session_attendees = session_attendees.merge(session_votes, how="left", on="name")
session_attendees.shape

(82, 3)

In [11]:
senators_data_path = os.path.join(data_path, "senators_data.csv")
senators_data = pd.read_csv(senators_data_path,)
senators_data.head(2)

Unnamed: 0,Senador,Nº de Orden,Período Legal,Período Real,Provincia,Partido Político o alianza,Reemplazo,Observaciones
0,VEGA MARÍA CLARA DEL VALLE,514,20-12-2019 al 09-12-2023,20-12-2019 al Sin Fecha,LA RIOJA,CAMBIEMOS FUERZA CÍVICA RIOJANA,BRIZUELA Y DORIA DE CARA OLGA INES,COMPLETA PERIODO DE SENADORA OLGA I. BRIZUELA ...
1,MONTENEGRO GERARDO ANTENOR,446,10-12-2019 al 09-12-2025,10-12-2019 al Sin Fecha,SANTIAGO DEL ESTERO,FRENTE DE TODOS,MONTENEGRO GERARDO ANTENOR,Sin Observaciones


In [12]:
senators_data.shape

(72, 8)

In [13]:
session_attendees["prep_name"] = session_attendees.name.apply(preprocess_name)
senators_data["prep_senador"] = senators_data.Senador.apply(preprocess_name) 

In [14]:
session_attendees[["prep_name", "name"]].head()

Unnamed: 0,prep_name,name
0,cristina de fernandez kirchner,Cristina Fernandez De Kirchner
1,abdala claudia ledesma,Claudia Ledesma Abdala
2,lousteau martin,Martín Lousteau
3,closs fabian maurice,Maurice Fabián Closs
4,elena laura machado rodriguez,Laura Elena Rodríguez Machado


In [15]:
senators_data[["prep_senador", "Senador"]].head()

Unnamed: 0,prep_senador,Senador
0,clara del maria valle vega,VEGA MARÍA CLARA DEL VALLE
1,antenor gerardo montenegro,MONTENEGRO GERARDO ANTENOR
2,dario edgardo kueider,KUEIDER EDGARDO DARÍO
3,maris olalla stella,OLALLA STELLA MARIS
4,estela sapag silvia,SAPAG SILVIA ESTELA


In [16]:
names_mapping = (
    senators_data[["prep_senador","Senador"]]
    .set_index("prep_senador")
    .to_dict("index")
)

In [17]:
def map_name(name: str, mapping: dict=names_mapping) -> str:
    canon = list()
    name_set = set(name.split())
    for key, value in mapping.items():
        key_set = set(key.split())
        if name_set.issubset(key_set) or key_set.issubset(name_set):
            canon.append(value["Senador"])
    return canon

In [18]:
def postprocess_name(name: list):
    if name:
        assert len(name)==1, f"{name} with several matches."
        return " ".join(name)
    else:
        return None

In [19]:
session_attendees["senador"] = session_attendees.prep_name.apply(map_name)
session_attendees["senador"] = session_attendees.senador.apply(postprocess_name)
session_attendees.head(2)

Unnamed: 0,role,name,vote,prep_name,senador
0,PRESIDENTA,Cristina Fernandez De Kirchner,,cristina de fernandez kirchner,
1,PRESIDENTA PROVISIONAL,Claudia Ledesma Abdala,negativo,abdala claudia ledesma,LEDESMA ABDALA DE ZAMORA CLAUDIA


In [20]:
session_attendees = (
    session_attendees
    .merge(
        senators_data,
        how= "left",
        left_on = "senador",
        right_on = "Senador"
    )
    .drop(columns=["prep_name", "prep_senador", "senador"])
)
session_attendees.head(2)

Unnamed: 0,role,name,vote,Senador,Nº de Orden,Período Legal,Período Real,Provincia,Partido Político o alianza,Reemplazo,Observaciones
0,PRESIDENTA,Cristina Fernandez De Kirchner,,,,,,,,,
1,PRESIDENTA PROVISIONAL,Claudia Ledesma Abdala,negativo,LEDESMA ABDALA DE ZAMORA CLAUDIA,509.0,10-12-2019 al 09-12-2025,10-12-2019 al Sin Fecha,SANTIAGO DEL ESTERO,FRENTE CÍVICO POR SANTIAGO,ZAMORA GERARDO,Sin Observaciones


In [21]:
session_attendees = (
    session_attendees.loc[
        session_attendees.role=="PRESENTES",
        ["name", "vote", "Senador", "Provincia", "Partido Político o alianza"]
    ]
    .rename(columns={
        "Senador": "senator",
        "Provincia": "province",
        "Partido Político o alianza": "party"
    })
)

In [22]:
session_senators_path = session_attendees_path.replace("_attendees.csv", "_senators.csv")
session_attendees.to_csv(session_senators_path, index=False)