# Compute the Rice index of the votes by age range

In [None]:
import pandas as pd

In [None]:
INPATH_MP = "../processed_data/parlementaires.csv"
INPATH_VOTES = "../processed_data/votes_finaux.csv"
THISYEAR = 2022

In [None]:
# Load the list of MPs

dfp = pd.read_csv(INPATH_MP)
dfp.rename(columns={'Unnamed: 0.1':'ID'}, inplace=True)


# Load the votes

dfv = pd.read_csv(INPATH_VOTES)

In [None]:
# Auxiliary function

def extract_age(dataframe, age_list, mask=None):
    """
    Inputs:
        dataframe: panda dataframe of parlementaires.csv
        age_list: 2D list with age range (age not year): [range index][lower limit, upper limit]
        mask: preprocess dataframe by taking only indices defined by mask
    Output:
        numOfPOI: list of Parlementaire Of Interest corresponding to age_list
    """
    
    if(mask): # if there was a mask provided to the function then apply it
        dataframe = dataframe.loc[mask]

    dataframe['birthDate'] = pd.to_datetime(dataframe['birthDate']) # convert birthdate into datetime format

    current_year = THISYEAR
    numOfPOI = []
    for range in age_list:
        upper_limit = current_year - range[0]
        lower_limit = current_year - range[1]
        df_age = dataframe.loc[(dataframe['birthDate'].dt.year > lower_limit) & (dataframe['birthDate'].dt.year<upper_limit)]
        ID_list = df_age["ID"].astype(str).values.tolist()
        #ID_list = [ID_string + ".0" for ID_string in ID_list]
        numOfPOI.append(ID_list)
        
    return numOfPOI

In [None]:
# Separate the MPs by age ranges

age_list = [[0,40],[40,50],[50,60],[60,70],[70,120]]
mp_id_by_age = extract_age(dfp, age_list, mask=None)


# 
dfv_ages = list()
for i, ages in enumerate(mp_id_by_age):
    other_ages = sum(mp_id_by_age[:i-1], []) + sum(mp_id_by_age[i+1:], [])
    dfv_age = dfv.drop(other_ages, axis=1) 
    dfv_ages.append(dfv_age)

## Rice index of young MPs

In [None]:
# Auxiliary functions
    
def count(string, row):
    """ Count the number of occurrences of `string` in the row """
    
    if string in row.value_counts():
        return row.value_counts()[string]
    else:
        return 0


def rice_index(row):
    """ Compute the Rice index of the row """
    
    row_young = []
    
    count_yes = count('Oui', row)
    count_no = count('Non', row)
    return abs(count_yes - count_no) / (count_yes + count_no)

### Compute the relative Rice index of young MPs (young_RI / total_RI)


In [None]:
# First compute the Rice index for each vote, without any age distinction

rice_indices_all_ages = dfv.apply(lambda row: rice_index(row), axis=1)


# Compute the relative Rice index for each vote and each age class

for i, votes_age in enumerate(dfv_ages):    
    votes_age = votes_age.copy()
    rice_indices_ages = votes_age.apply(lambda row: rice_index(row), axis=1) / rice_indices_all_ages
    votes_age.insert(0, f'Relative Rice index by age (range {i})', rice_indices_ages)
    votes_age.sort_values(f'Relative Rice index by age (range {i})')
    votes_age.to_csv(f"../processed_data/relative_RI_age_range_{i}.csv")