In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

dtypes = {
    "Con": "category",
    "Contp": "category",
    "Reg": "category",
    "Stage":"category"
}

custom_date_parser = lambda x: datetime.strptime(x, "%Y-%m-%d")

df_pax = pd.read_csv(
    "csv_in/pax_data_1868_agreements_17-04-21.csv",
    dtype=dtypes,
    parse_dates=['Dat'],
     date_parser=custom_date_parser
)

df_gender_pax = pd.read_csv('csv_in/pax_data_371_agreements_15-04-21.csv',
    dtype=dtypes,
    parse_dates=['Dat'],
    date_parser=custom_date_parser
)

In [2]:
df_pax.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1868 entries, 0 to 1867
Columns: 267 entries, Con to ImSrc
dtypes: category(4), datetime64[ns](1), float64(5), int64(245), object(12)
memory usage: 3.8+ MB


In [3]:
df_gender_pax.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 371 entries, 0 to 370
Data columns (total 80 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Con            371 non-null    category      
 1   Contp          371 non-null    category      
 2   PP             371 non-null    int64         
 3   PPName         371 non-null    object        
 4   Reg            371 non-null    category      
 5   AgtId          371 non-null    int64         
 6   Ver            371 non-null    int64         
 7   Agt            371 non-null    object        
 8   Dat            371 non-null    datetime64[ns]
 9   Status         371 non-null    object        
 10  Lgt            371 non-null    int64         
 11  N_characters   371 non-null    int64         
 12  Agtp           371 non-null    object        
 13  Stage          371 non-null    category      
 14  StageSub       366 non-null    object        
 15  Part           366 non-

In [4]:
#Comprobación de que los casos que aparecen en el dataset de mujeres coinciden con los casos del dataset total.
filtro = df_pax["GeWom"] == 1
df_pax.loc[filtro, "Con"].count() == df_gender_pax["Con"].count()





True

In [5]:
#Valores nulos en las vases de datos
df_pax.isNull()


AttributeError: 'DataFrame' object has no attribute 'isNull'

In [None]:
# Evolución anual porcentual del número de tratados con referencias a mujeres
by_year = df_pax.groupby([df_pax['Dat'].dt.year])

geWon_by_year = by_year["GeWom"].agg(["sum","count"]).reset_index()

geWon_by_year.to_csv("csv_out/agrupados_total.csv")


In [None]:
fig,ax=plt.subplots(figsize=(10,8))

ax.plot(geWon_by_year.reset_index()["Dat"], geWon_by_year["sum"]*100/geWon_by_year["count"])

In [None]:
# Evolución anual porcentual por región del número de tratados con referencias a mujeres

fig,ax=plt.subplots(6,1,figsize=(10,10))
for i,reg in enumerate(df_pax["Reg"].cat.categories):
    df_reg = df_pax.loc[df_pax["Reg"]==reg]
    
    geWon_reg_by_year = df_reg.groupby([df_reg['Dat'].dt.year])["GeWom"].agg(["sum","count"]).reset_index()
    geWon_reg_by_year.to_csv("csv_out/agrupados_total_{0}.csv".format(reg))
    
    
    ax[i].plot(geWon_reg_by_year.reset_index()["Dat"], geWon_reg_by_year["sum"]*100/geWon_reg_by_year["count"])
    ax[i].set_title(reg)




In [None]:
# Total de tratados firmados antes y despues del 2001
filtro_T_pre_UNSCR1325 = df_pax['Dat'].dt.year <2001
filtro_T_pos_UNSCR1325 = df_pax['Dat'].dt.year >=2001
pre_UNSCR1325 = df_pax.loc[filtro_T_pre_UNSCR1325, "Con"].count()
pos_UNSCR1325 = df_pax.loc[filtro_T_pos_UNSCR1325, "Con"].count()

print("Totales pre UNSCR1325: {0} - Totales pos UNSCR1325: {1}".format(pre_UNSCR1325,pos_UNSCR1325))

In [None]:
# Total de tratados con referencias a mujeres  antes y despues del 2001
filtro_W_pre_UNSCR1325 = df_gender_pax['Dat'].dt.year <2001
filtro_W_pos_UNSCR1325 = df_gender_pax['Dat'].dt.year >=2001
pre_UNSCR1325 = df_gender_pax.loc[filtro_W_pre_UNSCR1325, "Con"].count()
pos_UNSCR1325 = df_gender_pax.loc[filtro_W_pos_UNSCR1325, "Con"].count()

print("Totales gender pre UNSCR1325: {0} - Totales gender pos UNSCR1325: {1}".format(pre_UNSCR1325,pos_UNSCR1325))

In [None]:
# Total de referencias por cada bloque de la base de datos gender PAX
bloques_pre_UNSCR1325_totales = df_gender_pax.loc[filtro_W_pre_UNSCR1325, ["WggPar","WggGrp","WggIntLaw","WggNewInst","WggVio","WggTraJus","WggInstRef","WggDdr","WggDev"]].agg(["sum","count"])
bloques_pre_UNSCR1325_totales.to_csv("csv_out/bloques_pre_UNSCR1325_totales.csv")

bloques_pos_UNSCR1325_totales = df_gender_pax.loc[filtro_W_pos_UNSCR1325, ["WggPar","WggGrp","WggIntLaw","WggNewInst","WggVio","WggTraJus","WggInstRef","WggDdr","WggDev"]].agg(["sum","count"])
bloques_pos_UNSCR1325_totales.to_csv("csv_out/bloques_pos_UNSCR1325_totales.csv")

In [None]:
# referencias a violencia de genero en gender PAX
WggVio_pre_UNSCR1325_totales = df_gender_pax.loc[filtro_W_pre_UNSCR1325, ["WggVio","WggVioSex","WggVioGen","WggVioProt","WggVioOth"]].agg(["sum","count"])
WggVio_pre_UNSCR1325_totales.to_csv("csv_out/WggVio_pre_UNSCR1325_totales.csv")

WggVio_pos_UNSCR1325_totales = df_gender_pax.loc[filtro_W_pos_UNSCR1325, ["WggVio","WggVioSex","WggVioGen","WggVioProt","WggVioOth"]].agg(["sum","count"])
WggVio_pos_UNSCR1325_totales.to_csv("csv_out/WggVio_pos_UNSCR1325_totales.csv")


In [None]:
# referencias a Participacion en gender PAX
WggPar_pre_UNSCR1325_totales = df_gender_pax.loc[filtro_W_pre_UNSCR1325, ["WggPar","WggGenQuot","WggEffPart","WggCitizen","WggParOth"]].agg(["sum","count"])
WggPar_pre_UNSCR1325_totales.to_csv("csv_out/WggPar_pre_UNSCR1325_totales.csv")

WggPar_pos_UNSCR1325_totales = df_gender_pax.loc[filtro_W_pos_UNSCR1325, ["WggPar","WggGenQuot","WggEffPart","WggCitizen","WggParOth"]].agg(["sum","count"])
WggPar_pos_UNSCR1325_totales.to_csv("csv_out/WggPar_pos_UNSCR1325_totales.csv")
