# MH of older workers, retirement age and working conditions

## Preprocessing SHARE data

Load libraries

In [1]:
import os
import sys

src_path = os.path.abspath("../")
sys.path.append(src_path)

from utils.common import *
from utils.retirement import *
from utils.share import *

import_libraries()

Preprocess **SHARELIFE data**, apply first filters and create first variables

In [2]:
# Load data
folder_path = "/Users/alexandralugova/Documents/GitHub/MH-old-workers/data/datasets/sharew7_rel8-0-0_ALL_datasets_stata/"
file_names = ["cv_r.dta", "technical_variables.dta", "dn.dta", "re.dta"]
merge_columns = ["mergeid", "hhid7", "mergeidp7", "coupleid7", "country", "language"]

sharelife = import_share_stata(
    folder_path=folder_path, file_names=file_names, merge_columns=merge_columns
)

In [3]:
sharelife = sharelife_preprocessing(sharelife)

Gender, country, 1st year in country - formatted, age 50+ filter - applied
Years of education - calculated
Current ISCO - identified, those changed job - deleted
Years of contribution, 1st year of contribution - calculated
Those worked less than 10 years / started work before age of 10 - deleted


In [4]:
sharelife

Unnamed: 0,mergeid,country,gender,yrbirth,mobirth,age2017,yr1country,yrseducation,isco,yrscontribution2017,yr1contribution
0,AT-001215-01,Austria,1,1939.0,3.0,78.0,1939.0,15.0,2221,62.0,1956
1,AT-001492-01,Austria,1,1952.0,2.0,65.0,1952.0,11.0,4110,49.0,1969
2,AT-002136-01,Austria,0,1951.0,11.0,66.0,1951.0,20.0,2261,40.0,1978
3,AT-002136-03,Austria,1,1953.0,6.0,64.0,1953.0,14.0,3251,48.0,1970
4,AT-002525-02,Austria,0,1942.0,10.0,75.0,1942.0,11.0,4412,62.0,1956
...,...,...,...,...,...,...,...,...,...,...,...
39899,SK-995042-01,Slovakia,1,1958.0,2.0,59.0,1958.0,10.0,9211,44.0,1974
39900,SK-995042-02,Slovakia,0,1955.0,5.0,62.0,1955.0,10.0,8211,47.0,1971
39901,SK-996004-01,Slovakia,1,1961.0,10.0,56.0,1961.0,13.0,4414,37.0,1981
39902,SK-999958-01,Slovakia,0,1946.0,6.0,71.0,1946.0,11.0,8342,54.0,1964


Preprocess **additional data from SHARE waves 6-8** where isco is available

In [5]:
# Load data
datasets = []

folders = [
    f"/Users/alexandralugova/Documents/GitHub/MH-old-workers/data/datasets/sharew{i}_rel8-0-0_ALL_datasets_stata"
    for i in [6, 7, 8]
]
file_names = ["cv_r.dta", "dn.dta", "ep.dta"]


for folder in folders:
    wave = int(folder.split("sharew")[1].split("_")[0])
    merge_columns = [
        "mergeid",
        f"hhid{wave}",
        f"mergeidp{wave}",
        f"coupleid{wave}",
        "country",
        "language",
    ]

    folder_dataset = import_share_stata(
        folder_path=folder,
        file_names=file_names,
        merge_columns=merge_columns,
        convert_categoricals=True,
    )
    folder_dataset["wave"] = wave

    datasets.append(folder_dataset)

sharelife_add = pd.concat(datasets, sort=False, axis=0).reset_index(drop=True)

In [6]:
sharelife_add = sharelife_add_preprocessing(sharelife_add, sharelife)

Gender, country, 1st year in country - formatted, age 50+ filter - applied
Years of education - calculated
Current ISCO - identified, those changed job - deleted
Years of contribution, 1st year of contribution - calculated
Those worked less than 10 years / started work before age of 10 - deleted


In [7]:
sharelife_add

Unnamed: 0,mergeid,country,gender,yrbirth,mobirth,age2015,age2017,age2020,yr1country,yrseducation,isco,yrscontribution2017,yr1contribution
0,AT-001492-02,Austria,0,1951.0,11.0,64.0,,,1951,13.0,3115,48.0,1970
1,AT-017298-01,Austria,0,1943.0,6.0,72.0,,,1943,5.0,7131,50.0,1968
2,AT-037219-02,Austria,1,1953.0,3.0,62.0,,,1953,13.0,1120,43.0,1972
3,AT-038101-02,Austria,1,1959,8.0,,,61,1959,17.0,5414,37.0,1976
4,AT-272996-01,Austria,0,1953.0,11.0,62.0,,,1953,18.0,2412,44.0,1974
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1627,SI-884726-01,Slovenia,1,1961.0,9.0,54.0,,,1961,12.0,2521,38.0,1980
1628,SI-893992-01,Slovenia,0,1961.0,12.0,54.0,,,1961,11.0,6111,39.0,1979
1629,SI-914744-01,Slovenia,0,1954.0,9.0,61.0,,,1954,12.0,3118,43.0,1975
1630,SI-919029-01,Slovenia,1,1955.0,12.0,60.0,,,1955,17.0,1223,41.0,1977


In [8]:
# Concat main and additional datasets
df = pd.concat([sharelife, sharelife_add], axis=0).reset_index(drop=True)

Preprocess **main data from SHARE waves 4-6**

In [9]:
# Load data
datasets = []

folders = [
    f"/Users/alexandralugova/Documents/GitHub/MH-old-workers/data/datasets/sharew{i}_rel8-0-0_ALL_datasets_stata"
    for i in [4, 5, 6]
]
file_names = ["cv_r.dta", "dn.dta", "ep.dta", "ch.dta", "gv_health.dta", "as.dta"]


for folder in folders:
    wave = int(folder.split("sharew")[1].split("_")[0])
    merge_columns = [
        "mergeid",
        f"hhid{wave}",
        f"mergeidp{wave}",
        f"coupleid{wave}",
        "country",
        "language",
    ]

    folder_dataset = import_share_stata(
        folder_path=folder,
        file_names=file_names,
        merge_columns=merge_columns,
        convert_categoricals=True,
    )
    folder_dataset["wave"] = wave

    datasets.append(folder_dataset)

share = pd.concat(datasets, sort=False, axis=0).reset_index(drop=True)

In [10]:
share = share_preprocessing(share, df)

Those without ISCO codes - deleted
Current year, age, number of children and living with a partner - imputed
Currently not working and eligible to special pensions - deleted
Job status, industry of employment - added
Household income, investments, life insurance - added
Physical and mental health indicators - added


In [11]:
share

Unnamed: 0,mergeid,wave,year,age,nb_children,nb_grandchildren,partnerinhh,job_status,industry,thinc,...,investment,life_insurance,sphus,sphus2,chronic,chronic2,eurod,eurodcat,affective_suffering,motivation_lack
0,AT-001492-02,4,2011,60.0,6.0,7.0,1,Employee,Manufacturing,42001.308594,...,0.0,0.0,4,1,2.0,1,1.0,0,0,0
1,AT-001492-02,4,2011,60.0,6.0,7.0,1,Employee,Manufacturing,42000.503906,...,0.0,0.0,4,1,2.0,1,1.0,0,0,0
2,AT-001492-02,4,2011,60.0,6.0,7.0,1,Employee,Manufacturing,42000.742188,...,0.0,0.0,4,1,2.0,1,1.0,0,0,0
3,AT-001492-02,4,2011,60.0,6.0,7.0,1,Employee,Manufacturing,42001.257812,...,0.0,0.0,4,1,2.0,1,1.0,0,0,0
4,AT-001492-02,4,2011,60.0,6.0,7.0,1,Employee,Manufacturing,42001.980469,...,0.0,0.0,4,1,2.0,1,1.0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85410,SI-999649-01,6,2015,55.0,2.0,2.0,1,Private sector employee,Manufacturing,14297.854492,...,1.0,1.0,2,0,0.0,0,1.0,0,0,0
85411,SI-999649-01,6,2015,55.0,2.0,2.0,1,Private sector employee,Manufacturing,58336.046875,...,1.0,1.0,2,0,0.0,0,1.0,0,0,0
85412,SI-999649-01,6,2015,55.0,2.0,2.0,1,Private sector employee,Manufacturing,23655.869141,...,1.0,1.0,2,0,0.0,0,1.0,0,0,0
85413,SI-999649-01,6,2015,55.0,2.0,2.0,1,Private sector employee,Manufacturing,14535.304688,...,1.0,1.0,2,0,0.0,0,1.0,0,0,0


In [12]:
# Merge with Sharelife data
df = share.merge(df, on=["mergeid"], how="left")

Final preprocessing for **full SHARE dataset**

In [13]:
df = share_final_preprocessing(df)

Current years of contribution - calculated
Data types - corrected
Retirement age, work horizon and work horizon change by reforms - calculated
Longitudinal weights imputed in STATA - added


In [14]:
df

Unnamed: 0,mergeid,wave,year,age,nb_children,nb_grandchildren,partnerinhh,job_status,industry,thinc,...,yr1country,yrseducation,isco,yr1contribution,yrscontribution,retirement_age,work_horizon,work_horizon_change,dw_w4,my_wgt
0,AT-009262-01,4,2011,56.0,1.0,2.0,1,Self-employed,Wholesale and retail trade,29620.000000,...,1955.0,12.0,5211,1973,39.0,60.0,4.0,0.0,679.236943,1173.063477
1,AT-009262-01,4,2011,56.0,1.0,2.0,1,Self-employed,Wholesale and retail trade,29620.000000,...,1955.0,12.0,5211,1973,39.0,60.0,4.0,0.0,679.236943,1173.063477
2,AT-009262-01,4,2011,56.0,1.0,2.0,1,Self-employed,Wholesale and retail trade,29620.000000,...,1955.0,12.0,5211,1973,39.0,60.0,4.0,0.0,679.236943,1173.063477
3,AT-009262-01,4,2011,56.0,1.0,2.0,1,Self-employed,Wholesale and retail trade,29620.000000,...,1955.0,12.0,5211,1973,39.0,60.0,4.0,0.0,679.236943,1173.063477
4,AT-009262-01,4,2011,56.0,1.0,2.0,1,Self-employed,Wholesale and retail trade,29620.000000,...,1955.0,12.0,5211,1973,39.0,60.0,4.0,0.0,679.236943,1173.063477
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32345,SI-994179-01,6,2015,56.0,1.0,1.0,1,Private sector employee,Manufacturing,24126.246094,...,1959.0,12.0,3343,1979,36.0,60.0,4.0,0.0,71.858020,273.041016
32346,SI-994179-01,6,2015,56.0,1.0,1.0,1,Private sector employee,Manufacturing,14939.283203,...,1959.0,12.0,3343,1979,36.0,60.0,4.0,0.0,71.858020,273.041016
32347,SI-994179-01,6,2015,56.0,1.0,1.0,1,Private sector employee,Manufacturing,33814.000000,...,1959.0,12.0,3343,1979,36.0,60.0,4.0,0.0,71.858020,273.041016
32348,SI-994179-01,6,2015,56.0,1.0,1.0,1,Private sector employee,Manufacturing,19210.865234,...,1959.0,12.0,3343,1979,36.0,60.0,4.0,0.0,71.858020,273.041016


In [15]:
df.to_csv(
    "/Users/alexandralugova/Documents/GitHub/MH-old-workers/data/datasets/results/share_clean_w456.csv",
    index=False,
)  # Save resulting dataset