# MH of older workers, retirement age and working conditions

## Preprocessing SHARE data

Load libraries

In [1]:
import os
import sys

src_path = os.path.abspath("../")
sys.path.append(src_path)

from utils.common import *
from utils.retirement import *
from utils.share import *

import_libraries()

Preprocess **SHARELIFE data**, apply first filters and create first variables

In [2]:
file_names = ["cv_r.dta", "technical_variables.dta", "dn.dta", "re.dta"]
sharelife = import_share_stata1(file_names=file_names, waves=[7])

In [3]:
sharelife = sharelife_preprocessing(sharelife)

Initial n obs: 63248
Gender, country, 1st year in country - formatted, age 50+ filter - applied
N obs after processing gender and age: 56486
Years of education - calculated
N obs after processing education years: 56486
Current ISCO - identified
N obs after isco job changes: 41945
Years of contribution, 1st year of contribution - calculated
N obs after contribution years: 41945


Preprocess **additional data from SHARE waves 6-8** where isco is available

In [4]:
file_names = ["cv_r.dta", "dn.dta", "ep.dta"]
sharelife_add = import_share_stata1(
    file_names=file_names, waves=[6, 7, 8], convert_categoricals=True
)

In [5]:
sharelife_add = sharelife_add_preprocessing(sharelife_add, sharelife)

N obs initial: 192020
N obs dropping missing isco: 10679
N obs after drop already present in Sharelife: 6843
Gender, country, 1st year in country - formatted, age 50+ filter - applied
N obs after gender and age: 3262
Years of education - calculated
N obs after education: 3262
Current ISCO - identified, those changed job - deleted
N obs after job and isco: 2527
Years of contribution, 1st year of contribution - calculated
N obs after contribution years: 2527


In [6]:
# Concat main and additional datasets
df = pd.concat([sharelife, sharelife_add], axis=0).reset_index(drop=True)

Preprocess **main data from SHARE waves 4-6**

In [7]:
file_names = ["cv_r.dta", "dn.dta", "ep.dta", "ch.dta", "gv_health.dta", "as.dta"]
share = import_share_stata1(
    file_names=file_names, waves=[4, 5, 6], convert_categoricals=True
)

In [8]:
share = share_preprocessing(share, df)

Initial n obs: 192150
Those without ISCO codes - deleted
N obs with ISCO: 74637
N obs after age calculation: 74637
N obs after defining number of children: 74637
Current year, age, number of children and living with a partner - imputed
N obs after leaving only employed: 23465
N obs after deleting special conditions pension: 19572
Currently not working and eligible to special pensions - deleted
N obs after defining industry: 19572
Job status, industry of employment - added
N obs after defining finance: 19572
Household income, investments, life insurance - added
N obs after dropping missing sphus:19561
N obs after dropping missing chronic:19559
N obs after dropping missing eurod:19136
Physical and mental health indicators - added
N obs after health: 19136


In [9]:
# Merge with Sharelife data
df = share.merge(df, on=["mergeid"], how="left")

**Final full SHARE dataset**

In [10]:
df = share_final_preprocessing(df)

N obs initial: 19136
Current years of contribution - calculated, those with less 10 years - deleted
Data types - corrected
N obs after data types: 17871
Retirement age, work horizon and work horizon change by reforms - calculated
N obs after work horizon change: 13548
Longitudinal and crossectional weights - added
N obs after weights: 13484


In [11]:
df.country.unique()

array(['Austria', 'Belgium', 'Czech Republic', 'Switzerland', 'Germany',
       'Denmark', 'Estonia', 'Spain', 'France', 'Hungary', 'Italy',
       'Netherlands', 'Poland', 'Portugal', 'Sweden', 'Slovenia',
       'Luxembourg', 'Greece'], dtype=object)

In [12]:
df.mergeid.nunique()

5871

In [13]:
df.mergeid.value_counts().value_counts()

count
1    1922
2    1412
3    1410
4    1127
Name: count, dtype: int64

In [14]:
df.groupby("country").work_horizon_change_minimum.describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Austria,972.0,0.993827,0.078365,0.0,1.0,1.0,1.0,1.0
Belgium,1778.0,0.656637,0.828796,0.0,0.0,0.0,1.0,3.0
Czech Republic,826.0,1.075993,1.105002,0.33,0.33,0.5,0.67,4.33
Denmark,911.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Estonia,1411.0,0.465273,0.43254,0.0,0.0,0.5,1.0,1.0
France,883.0,0.087588,0.167087,0.0,0.0,0.0,0.0,1.0
Germany,1427.0,0.005578,0.021291,0.0,0.0,0.0,0.0,0.09
Greece,48.0,0.479167,0.850271,0.0,0.0,0.0,0.25,2.0
Hungary,58.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Italy,885.0,0.959605,1.342112,0.0,0.0,0.0,1.5,6.25


In [15]:
df.to_csv(
    "/Users/alexandralugova/Documents/GitHub/MH-old-workers/data/datasets/results/share_clean_w46.csv",
    index=False,
)  # Save resulting dataset