# Data description

In [None]:
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from utilities import save_dataframe

In [None]:
here = os.getcwd()
project_path = os.path.dirname(here)
data_path = os.path.join(project_path, "data")
visualizations_path = os.path.join(project_path, "visualizations")

In [None]:
senators_path = os.path.join(data_path, "session_29-12-2020_senators.csv")
senators = pd.read_csv(senators_path)
senators.head(2)

In [None]:
senators.describe()

In [None]:
senators_count = senators.party.value_counts().to_frame().reset_index()
save_dataframe(
    senators_count, folder=visualizations_path, filename="senators_count"
)

In [None]:
senators_vote = (
    senators
    .groupby(["party", "vote"])
    .size()
    .reset_index()
    .merge(senators_count)
    .assign(party=lambda x: x.party.str.title())
    .rename(columns={0:"n_votes", "count":"n_senators"})
    .sort_values(by=["n_senators","party"], ascending=[False,True])
    .reset_index(drop=True)
    .pivot_table(
        index=["party"], columns=["vote"],
        values=["n_votes"],fill_value=0, margins_name=["chicho", "cacho"])
)
senators_vote.columns = senators_vote.columns.get_level_values(1)
senators_vote.columns.name = ""

In [None]:
fig, ax = plt.subplots(figsize=(10,5))
senators_vote.plot(kind='bar', stacked=True, ax=ax)
ax.set_xlabel("")
ax.set_ylabel("Cantidad de senadores")
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
ax.legend(title="Voto")
plt.title("Voto por alianza o partido")
plt.tight_layout()
plt.savefig(os.path.join(visualizations_path,"senators_vote.png"))

In [None]:
senators_vote.reset_index(inplace=True)
save_dataframe(
    senators_vote[["party","positivo", "negativo", "ausente", "abstención"]],
    folder=visualizations_path, filename="senators_vote"
)