# MH of older workers, retirement age and working conditions

## Preprocessing SHARE data

Load libraries

In [1]:
import os
import sys

src_path = os.path.abspath("../")
sys.path.append(src_path)

from utils.common import *
from utils.retirement import *
from utils.share import *

import_libraries()

Preprocess **SHARELIFE data**, apply first filters and create first variables

In [2]:
file_names = ["cv_r.dta", "technical_variables.dta", "dn.dta", "re.dta"]
sharelife = import_share_stata1(file_names=file_names, waves=[7])

In [3]:
sharelife = sharelife_preprocessing(sharelife)

Gender, country, 1st year in country - formatted, age 50+ filter - applied
Years of education - calculated
Current ISCO - identified, those changed job - deleted
Years of contribution, 1st year of contribution - calculated
Those worked less than 10 years / started work before age of 10 - deleted


Preprocess **additional data from SHARE waves 6-8** where isco is available

In [4]:
file_names = ["cv_r.dta", "dn.dta", "ep.dta"]
sharelife_add = import_share_stata1(file_names=file_names, waves=[6, 7, 8])

In [5]:
sharelife_add = sharelife_add_preprocessing(sharelife_add, sharelife)

Gender, country, 1st year in country - formatted, age 50+ filter - applied
Years of education - calculated
Current ISCO - identified, those changed job - deleted
Years of contribution, 1st year of contribution - calculated
Those worked less than 10 years / started work before age of 10 - deleted


In [6]:
# Concat main and additional datasets
df = pd.concat([sharelife, sharelife_add], axis=0).reset_index(drop=True)

Preprocess **main data from SHARE waves 4-6**

In [7]:
file_names = ["cv_r.dta", "dn.dta", "ep.dta", "ch.dta", "gv_health.dta", "as.dta"]
share = import_share_stata1(
    file_names=file_names, waves=[4, 5, 6], convert_categoricals=True
)

In [8]:
share = share_preprocessing(share, df)

Those without ISCO codes - deleted
Current year, age, number of children and living with a partner - imputed
Currently not working and eligible to special pensions - deleted
Job status, industry of employment - added
Household income, investments, life insurance - added
Physical and mental health indicators - added


In [9]:
# Merge with Sharelife data
df = share.merge(df, on=["mergeid"], how="left")

Final preprocessing for **full SHARE dataset**

In [10]:
df = share_final_preprocessing(df)

Current years of contribution - calculated
Data types - corrected
Retirement age, work horizon and work horizon change by reforms - calculated
Longitudinal weights imputed in STATA - added


In [11]:
df.duplicated().sum()

0

In [12]:
df

Unnamed: 0,mergeid,hhid,wave,year,age,nb_children,nb_grandchildren,partnerinhh,job_status,industry,...,yr1country,yrseducation,isco,yr1contribution,yrscontribution,retirement_age,work_horizon,work_horizon_change,dw_w4,my_wgt
0,AT-009262-01,AT-009262-A,4,2011,56.0,1.0,2.0,1,Self-employed,Wholesale and retail trade,...,1955.0,12.0,5211,1973,39.0,60.0,4.0,0.0,679.236943,1173.063477
1,AT-009376-01,AT-009376-A,4,2011,53.0,2.0,0.0,1,Civil servant,"Transport, storage and communication",...,1958.0,3.0,3512,1973,39.0,60.0,7.0,0.0,667.741527,1417.228760
2,AT-011070-01,AT-011070-A,4,2011,59.0,2.0,0.0,1,Civil servant,Education,...,1952.0,14.0,2330,1974,38.0,60.0,1.0,0.0,323.719923,559.074463
3,AT-018536-01,AT-018536-A,4,2011,51.0,1.0,1.0,0,Employee,Manufacturing,...,1960.0,11.0,7211,1975,37.0,60.0,9.0,0.0,719.208342,1526.463013
4,AT-020977-02,AT-020977-A,4,2011,57.0,2.0,0.0,1,Self-employed,Other community,...,1954.0,6.0,2642,1979,20.0,60.0,3.0,0.0,559.672543,966.572083
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5589,SI-982513-01,SI-982513-A,6,2015,58.0,2.0,0.0,0,Self-employed,Manufacturing,...,1957.0,13.0,2146,1977,39.0,60.0,2.0,0.0,79.721030,302.864777
5590,SI-983385-01,SI-983385-A,6,2015,60.0,2.0,2.0,1,Self-employed,"Agriculture, hunting, forestry, fishing",...,1955.0,16.0,5211,1985,30.0,65.0,5.0,0.0,106.589878,404.941071
5591,SI-991974-01,SI-991974-A,6,2015,58.0,0.0,0.0,1,Public sector employee,Other community,...,1957.0,11.0,9129,1976,21.0,65.0,7.0,0.0,76.036545,288.918274
5592,SI-993692-01,SI-993692-A,6,2015,55.0,2.0,0.0,1,Self-employed,Other community,...,1960.0,16.0,2353,1984,32.0,63.0,8.0,0.0,112.548171,427.652557


In [13]:
df.to_csv(
    "/Users/alexandralugova/Documents/GitHub/MH-old-workers/data/datasets/results/share_clean_w456.csv",
    index=False,
)  # Save resulting dataset