In [None]:
import pandas as pd
import numpy as np
import openpyxl
import warnings
import sys
import os

# Get the directory where this notebook is located
notebook_dir = os.path.dirname(os.path.abspath(''))

# Add the PythonPrep directory to the path
pythonprep_dir = os.path.join(os.path.dirname(notebook_dir), 'PythonPrep') if 'PythonPrep' not in notebook_dir else notebook_dir
sys.path.append(pythonprep_dir)

from paths import main_path

warnings.filterwarnings("ignore")


In [None]:
path_input = main_path + "/Democracy/Democracy_Main/MainAnalysis/input"
file_path = f"{path_input}/outcomes/retirement_age/PAG_30122023023144784.csv"
df = pd.read_csv(file_path)
df = df[['Country', 'Year', 'Indicator', 'Value']]

df


Unnamed: 0,Country,Year,Indicator,Value
0,Australia,1970,"Effective labour market exit age, men",65.7
1,Australia,1980,"Effective labour market exit age, men",62.7
2,Australia,1990,"Effective labour market exit age, men",61.1
3,Australia,2000,"Effective labour market exit age, men",60.6
4,Australia,2005,"Effective labour market exit age, men",62.1
...,...,...,...,...
1170,European Union (27 countries),2016,"Effective labour market exit age, men",62.3
1171,European Union (27 countries),2017,"Effective labour market exit age, men",62.6
1172,European Union (27 countries),2018,"Effective labour market exit age, men",62.9
1173,European Union (27 countries),2019,"Effective labour market exit age, men",63.1


In [49]:
years_to_keep = [1970, 1980, 1990, 2000, 2010, 2020]
dfm = df[(df['Indicator'] == 'Effective labour market exit age, men') & (df['Year'].isin(years_to_keep))]
dfw = df[(df['Indicator'] == 'Effective labour market exit age, women') & (df['Year'].isin(years_to_keep))]

In [50]:
dfm['growth_rate'] = dfm.groupby('Country')['Value'].pct_change() * 100
dfw['growth_rate'] = dfw.groupby('Country')['Value'].pct_change() * 100

In [51]:
dfm = dfm[['Country', 'Year', 'Indicator', 'growth_rate']].dropna()
dfw = dfw[['Country', 'Year', 'Indicator', 'growth_rate']].dropna()

In [52]:
dfw

Unnamed: 0,Country,Year,Indicator,growth_rate
13,Australia,1980,"Effective labour market exit age, women",-7.849294
14,Australia,1990,"Effective labour market exit age, women",1.022147
15,Australia,2000,"Effective labour market exit age, women",-1.011804
17,Australia,2010,"Effective labour market exit age, women",5.110733
23,Australia,2020,"Effective labour market exit age, women",2.593193
...,...,...,...,...
1152,Romania,1980,"Effective labour market exit age, women",-5.362776
1153,Romania,1990,"Effective labour market exit age, women",-3.833333
1154,Romania,2000,"Effective labour market exit age, women",1.733102
1156,Romania,2010,"Effective labour market exit age, women",2.044293


In [53]:
df_pivotm = dfm.pivot(index='Country', columns='Year', values='growth_rate')
df_pivotm.columns = [f"effective_ret_age_men_{col}" for col in df_pivotm.columns]
df_pivotm['effective_ret_age_men_2000_2020'] = df_pivotm.mean(axis=1)
df_pivotw = dfw.pivot(index='Country', columns='Year', values='growth_rate')
df_pivotw.columns = [f"effective_ret_age_women_{col}" for col in df_pivotw.columns]
df_pivotw['effective_ret_age_women_2000_2020'] = df_pivotm.mean(axis=1)


In [54]:
df_merged = df_pivotm.merge(df_pivotw, on='Country', how='inner')
df = df_merged.reset_index()
df = df[['Country', 'effective_ret_age_men_2000_2020', 'effective_ret_age_women_2000_2020']]

In [55]:
df

Unnamed: 0,Country,effective_ret_age_men_2000_2020,effective_ret_age_women_2000_2020
0,Argentina,-0.698584,-0.698584
1,Australia,-0.445989,-0.445989
2,Austria,2.383251,2.383251
3,Belgium,-0.566246,-0.566246
4,Brazil,-1.775856,-1.775856
5,Bulgaria,0.219807,0.219807
6,Canada,-0.072624,-0.072624
7,Chile,-0.293998,-0.293998
8,China (People's Republic of),0.738805,0.738805
9,Colombia,-2.160453,-2.160453


In [56]:
df1 = df.rename(columns={
    'Country': 'country'
})

In [57]:
df1.country.nunique()

52

In [62]:
df1

Unnamed: 0,country,effective_ret_age_men_2000_2020,effective_ret_age_women_2000_2020
0,Argentina,-0.698584,-0.698584
1,Australia,-0.445989,-0.445989
2,Austria,2.383251,2.383251
3,Belgium,-0.566246,-0.566246
4,Brazil,-1.775856,-1.775856
5,Bulgaria,0.219807,0.219807
6,Canada,-0.072624,-0.072624
7,Chile,-0.293998,-0.293998
8,China,0.738805,0.738805
9,Colombia,-2.160453,-2.160453


In [60]:
country_rename_dict = {
    "China (People's Republic of)": "China",
}

df1['country'] = df1['country'].replace(country_rename_dict)

In [None]:
df1.to_csv(f"{path_input}/outcomes/old_outcomes/retirement_age.csv", index=False)



In [None]:
##############

In [None]:
df = pd.read_csv(main_path + '/Democracy/PythonData/average-effective-retirement-men.csv')
df1 = pd.read_csv(main_path + '/Democracy/PythonData/average-effective-retirement-women.csv')




In [24]:
df

Unnamed: 0,Entity,Code,Year,"Average effective age of retirement, men (OECD)"
0,Argentina,ARG,1982,66.600000
1,Argentina,ARG,1983,66.400000
2,Argentina,ARG,1984,66.200000
3,Argentina,ARG,1985,65.800000
4,Argentina,ARG,1986,66.600000
...,...,...,...,...
2148,United States,USA,2014,65.900000
2149,United States,USA,2015,66.100000
2150,United States,USA,2016,66.800000
2151,United States,USA,2017,67.599998


In [25]:
df = df[(df['Year'] >= 2001) & (df['Year'] <= 2019)]
df1 = df1[(df1['Year'] >= 2001) & (df1['Year'] <= 2019)]

In [26]:
df['growth_rate_m'] = df.groupby('Entity')['Average effective age of retirement, men (OECD)'].pct_change() * 100
df['growth_rate_w'] = df1.groupby('Entity')['Average effective age of retirement, women (OECD)'].pct_change() * 100


In [27]:
df = df.drop(['Code', 'Average effective age of retirement, men (OECD)'], axis=1)

In [28]:
df

Unnamed: 0,Entity,Year,growth_rate_m,growth_rate_w
19,Argentina,2001,,
20,Argentina,2002,0.142653,-2.008608
21,Argentina,2003,0.142450,-1.464129
22,Argentina,2004,-1.280228,-0.445765
23,Argentina,2005,-1.152738,-1.641791
...,...,...,...,...
2148,United States,2014,0.918836,-0.308166
2149,United States,2015,0.303490,0.154560
2150,United States,2016,1.059002,0.925926
2151,United States,2017,1.197603,0.764528


In [35]:
periods = {
    '2001-2019': (2002, 2018),
}

result_df = pd.DataFrame()

for period, (start_year, end_year) in periods.items():
    period_df = df[(df['Year'] >= start_year) & (df['Year'] <= end_year)]
    avg_ret_m = period_df.groupby('Entity')['growth_rate_m'].mean().rename(period)
    
    result_df = pd.concat([result_df, avg_ret_m], axis=1)

result_df.reset_index(inplace=True)
result_df['country'] = result_df['index']
result_df = result_df.drop('index', axis=1)
result_df = result_df[['country'] + [col for col in result_df.columns if col != 'country']]

result_df.rename(columns={
    '2001-2019': 'ret_men_2001_2018',
}, inplace=True)

result_df.head()

Unnamed: 0,country,ret_men_2001_2018
0,Argentina,-0.296411
1,Australia,0.298101
2,Austria,0.390202
3,Belgium,0.309075
4,Brazil,-0.014541


In [46]:
periods = {
    '2001-2019': (2002, 2018),
}

result_df1 = pd.DataFrame()

for period, (start_year, end_year) in periods.items():
    period_df = df[(df['Year'] >= start_year) & (df['Year'] <= end_year)]
    avg_ret_m = period_df.groupby('Entity')['growth_rate_w'].mean().rename(period)
    
    result_df1 = pd.concat([result_df1, avg_ret_w], axis=1)

result_df1.reset_index(inplace=True)
result_df1['country'] = result_df1['index']
result_df1 = result_df1.drop('index', axis=1)
result_df1 = result_df1[['country'] + [col for col in result_df1.columns if col != 'country']]

result_df1.rename(columns={
    '2001-2019': 'ret_women_2001_2018',
}, inplace=True)

result_df1 = result_df1.rename(columns={
    'ret_women_2002_2018':'ret_women_2001_2018'
})
result_df1.head()

Unnamed: 0,country,ret_women_2001_2018
0,Argentina,-0.464886
1,Australia,0.40048
2,Austria,0.252171
3,Belgium,0.353911
4,Brazil,-0.047265


In [47]:
result_df.columns = result_df.columns.str.lower().str.strip()
result_df1.columns = result_df1.columns.str.lower().str.strip()

if result_df.index.name == 'country':
    result_df = result_df.reset_index()
if result_df1.index.name == 'country':
    result_df1 = result_df1.reset_index()

print("Columns in result_df:", result_df.columns.tolist())
print("Columns in result_df1:", result_df1.columns.tolist())

result_df['country'] = result_df['country'].astype(str)
result_df1['country'] = result_df1['country'].astype(str)

merged_df = pd.merge(result_df, result_df1, on='country', how='outer')

print(merged_df.head())

Columns in result_df: ['country', 'ret_men_2001_2018']
Columns in result_df1: ['country', 'ret_women_2001_2018']
     country  ret_men_2001_2018  ret_women_2001_2018
0  Argentina          -0.296411            -0.464886
1  Australia           0.298101             0.400480
2    Austria           0.390202             0.252171
3    Belgium           0.309075             0.353911
4     Brazil          -0.014541            -0.047265


In [50]:
merged_df = merged_df.dropna()

In [None]:
merged_df.to_csv(f"{path_input}/outcomes/old_outcomes/retirement_age.csv", index=False)

