# Lectura y procesado de las distintas tablas

## 1. Salario Medio

In [116]:
import pandas as pd 

avg_salary = pd.read_csv("datos/avg_salary_oecd_95_22.csv", usecols=["REF_AREA", "TIME_PERIOD", "Unit of measure", "OBS_VALUE"]) \
               .rename(columns={"REF_AREA":"Country",
                                "TIME_PERIOD":"Year",
                                "Unit of measure":"Currency",
                                "OBS_VALUE":"Average Salary"})

avg_salary.sort_values(['Country', 'Year'], inplace=True)

In [117]:
avg_salary.head()

Unnamed: 0,Country,Currency,Year,Average Salary
890,AUS,Australian dollar,1995,38243.74364
891,AUS,Australian dollar,1996,40124.963008
892,AUS,Australian dollar,1997,41921.611476
893,AUS,Australian dollar,1998,43097.816839
894,AUS,Australian dollar,1999,44579.886525


## 2. Indice de Precios sobre el Consumo

In [118]:
cpi = pd.read_csv("datos/cpi_oecd_95_22.csv", usecols=["LOCATION", "TIME", "Value"]) \
        .rename(columns={"LOCATION":"Country",
                         "TIME":"Year",
                         "Value":"CPI"})

cpi.sort_values(['Country', 'Year'], inplace=True)

In [119]:
cpi.head()

Unnamed: 0,Country,Year,CPI
0,AUS,1995,4.627767
1,AUS,1996,2.615385
2,AUS,1997,0.224887
3,AUS,1998,0.860135
4,AUS,1999,1.483129


## 3. Producto Interior Bruto

In [120]:
import pandas as pd 
import numpy as np

gdp = pd.read_csv("datos/gdp_oecd_95_22.csv", usecols=["LOCATION", "TIME", "Value"]) \
        .rename(columns={"LOCATION":"Country",
                         "TIME":"Year",
                         "Value":"GDP"})

gdp.sort_values(['Country', 'Year'], inplace=True)

Calculamos una nueva columna que sea el crecimiento de un año a otro

In [122]:

gdp.sort_values(['Country', 'Year'], inplace=True)

# Calcular el cambio porcentual del PIB
gdp['GDP Growth'] = gdp.groupby('Country')['GDP'].pct_change() * 100

# Llenar NaN para los primeros años
gdp['GDP Growth'] = gdp.groupby('Country')['GDP Growth'].fillna(np.nan)


gdp.head()

Unnamed: 0,Country,Year,GDP,GDP Growth
1644,ALB,1997,8556.079,
1645,ALB,1998,9451.895,10.469936
1646,ALB,1999,10792.583,14.18433
1647,ALB,2000,11927.776,10.51827
1648,ALB,2001,13159.431,10.32594


## 4. Brecha de Genero en los Salarios

In [123]:
import pandas as pd 

gender_gap = pd.read_csv("datos/gender_gap_oecd_95_22.csv", usecols=["REF_AREA", "TIME_PERIOD", "OBS_VALUE"]) \
               .rename(columns={"REF_AREA":"Country",
                                "TIME_PERIOD":"Year",
                                "OBS_VALUE":"Gender Gap"})

gender_gap.sort_values(['Country', 'Year'], inplace=True)

In [124]:
gender_gap.head()

Unnamed: 0,Country,Year,Gender Gap
20,AUS,1995,14.478114
21,AUS,1997,15.254237
22,AUS,1998,13.242009
23,AUS,1999,14.285714
24,AUS,2000,17.2


## 5. Ratio entre Salario Mínimo y el Medio/Mediano

In [125]:
import pandas as pd 

min_to_avg_salary = pd.read_csv("datos/min_to_avg_salary_oecd_95_22.csv", usecols=["REF_AREA", "TIME_PERIOD", "AGGREGATION_OPERATION", "OBS_VALUE"]) \
                      .rename(columns={"REF_AREA":"Country",
                                       "TIME_PERIOD":"Year",
                                       "AGGREGATION_OPERATION":"AVG_MED",
                                       "OBS_VALUE":"min_to_avg_salary"})

# Dividimos la tabla en dos columnas en funcion de del atributo
min_to_avg_salary = min_to_avg_salary.pivot_table(index=['Country', 'Year'],
                                           columns='AVG_MED',
                                           values='min_to_avg_salary',
                                           aggfunc='first').reset_index()

min_to_avg_salary.columns = ['Country', 'Year', 'Ratio Minimum and Average Salaries', 'Ratio Minimum and Median Salaries']
min_to_avg_salary.sort_values(['Country', 'Year'], inplace=True)

In [126]:
min_to_avg_salary.head()

Unnamed: 0,Country,Year,Ratio Minimum and Average Salaries,Ratio Minimum and Median Salaries
0,AUS,1995,54.190476,61.402878
1,AUS,1996,52.501878,60.449827
2,AUS,1997,51.269615,59.9
3,AUS,1998,52.150838,60.715447
4,AUS,1999,50.908399,59.110429


## 6. Población de cada pais

In [127]:
population = pd.read_csv("datos/population_oecd_95_22.csv", usecols=["REF_AREA", "TIME_PERIOD", "OBS_VALUE"]) \
               .rename(columns={"REF_AREA":"Country",
                                "TIME_PERIOD":"Year",
                                "OBS_VALUE":"Population"})

population.sort_values(['Country', 'Year'], inplace=True)

In [128]:
population.head()

Unnamed: 0,Country,Year,Population
644,AUS,1995,18004882.0
645,AUS,1996,18224767.0
646,AUS,1997,18423037.0
647,AUS,1998,18607584.0
648,AUS,1999,18812264.0


# Obtención del Dataset Final

In [131]:
dataset = avg_salary.merge(gender_gap, on=['Country', 'Year'], how='outer')
dataset = dataset.merge(cpi, on=['Country', 'Year'], how='left')
dataset = dataset.merge(gdp, on=['Country', 'Year'], how='left')
dataset = dataset.merge(min_to_avg_salary, on=['Country', 'Year'], how='left')
dataset = dataset.merge(population, on=['Country', 'Year'], how='left')

dataset.head()

Unnamed: 0,Country,Currency,Year,Average Salary,Gender Gap,CPI,GDP,GDP Growth,Ratio Minimum and Average Salaries,Ratio Minimum and Median Salaries,Population
0,AUS,Australian dollar,1995,38243.74364,14.478114,4.627767,404102.992,,54.190476,61.402878,18004882.0
1,AUS,Australian dollar,1996,40124.963008,,2.615385,424472.904,5.040772,52.501878,60.449827,18224767.0
2,AUS,Australian dollar,1997,41921.611476,15.254237,0.224887,450962.701,6.240633,51.269615,59.9,18423037.0
3,AUS,Australian dollar,1998,43097.816839,13.242009,0.860135,478691.653,6.148835,52.150838,60.715447,18607584.0
4,AUS,Australian dollar,1999,44579.886525,14.285714,1.483129,510947.993,6.738438,50.908399,59.110429,18812264.0


In [133]:
dataset.to_csv("dataset_oecd_95_22.csv")