# MH of older workers, retirement age and working conditions

## Preprocessing EWCS data

Load libraries

In [2]:
import os
import sys

src_path = os.path.abspath("../")
sys.path.append(src_path)

from utils.common import *
from utils.ewcs import *

import_libraries()
import pyreadstat

Load data

In [2]:
ewcs, meta_ewcs = pyreadstat.read_dta(
    "/Users/alexandralugova/Documents/GitHub/MH-old-workers/data/datasets/EWCS 1991-2015 UKDA ISCO.DTA"
)
df10, meta_df10 = pyreadstat.read_dta(
    "/Users/alexandralugova/Documents/GitHub/MH-old-workers/data/datasets/UKDA-2010/stata/stata11/ewcs_2010_version_ukda_6_dec_2011.dta"
)

Apply some initial formatting

In [3]:
ewcs = ewcs_preprocessing(ewcs, meta_ewcs)

Calculate social environment index

In [4]:
soc = social_environment_index(ewcs, df10)
ewcs = ewcs.merge(soc, on=["id", "year"], how="left")

JQI social environment
        count       mean        std        min        25%        50%  \
year                                                                   
2010  30836.0  82.983566  14.530474  12.500000  75.000000  87.500000   
2015  32121.0  81.258367  15.335162   4.545455  72.727273  81.818182   

            75%    max  
year                    
2010  93.750000  100.0  
2015  90.909091  100.0  


Calculate prospects index

In [5]:
pro = prospects_index(ewcs, df10)
ewcs = ewcs.merge(pro, on=["id", "year"], how="left")

JQI prospects
        count       mean        std  min   25%   50%   75%    max
year                                                             
2010  38556.0  55.655086  23.024819  0.0  37.5  50.0  75.0  100.0
2015  75016.0  58.081776  24.106244  0.0  50.0  62.5  75.0  100.0


Calculate intensity index (greater index - lower intensity, inverse to original)

In [6]:
int = intensity_index(ewcs, df10)
ewcs = ewcs.merge(int, on=["id", "year"], how="left")

JQI intensity
        count       mean        std       min        25%        50%  \
year                                                                  
2010  27063.0  56.353951  15.846348  4.651163  46.511628  58.139535   
2015  29742.0  46.339840  13.663985  2.325581  37.209302  46.511628   

            75%        max  
year                        
2010  67.441860  95.348837  
2015  55.813953  83.720930  


Add intensity slim original

In [7]:
ewcs = ewcs.rename(columns={"intens_slim": "jqi_intensity_slim"})

Adjust indexes with design weights

In [8]:
indexes = [col for col in ewcs.columns if col.startswith("jqi")]

for index in indexes:
    ewcs[f"{index}_w"] = ewcs[f"{index}"] * ewcs["w4"]

    old_min = ewcs[f"{index}_w"].min()
    old_max = ewcs[f"{index}_w"].max()
    new_min = 0
    new_max = 100

    # ewcs[f"{index}_w"] = (
    #    (ewcs[f"{index}_w"] - old_min) / (old_max - old_min)
    # ) * (new_max - new_min) + new_min

Calculate overall work quality index, normal and weighted

In [9]:
ewcs = sum_wq_index(ewcs)

JQI working quality index (no weights applied)
count    32404.000000
mean        64.073738
std         14.215559
min          0.000000
25%         55.306274
50%         65.647126
75%         74.489796
max        100.000000
Name: jqi_sum, dtype: float64


In [18]:
ewcs[[col for col in ewcs.columns if col.startswith("jqi")]].describe()

Unnamed: 0,jqi_monthly_earnings,jqi_skills_discretion,jqi_physical_environment,jqi_intensity_slim,jqi_working_time_quality,jqi_social_environment,jqi_prospects,jqi_intensity,jqi_monthly_earnings_w,jqi_skills_discretion_w,jqi_physical_environment_w,jqi_intensity_slim_w,jqi_working_time_quality_w,jqi_social_environment_w,jqi_prospects_w,jqi_intensity_w,jqi_sum,jqi_sum_w
count,65202.0,87447.0,87396.0,87074.0,87474.0,62843.0,75016.0,43095.0,65202.0,87447.0,87396.0,87074.0,87474.0,62843.0,75016.0,43095.0,32404.0,32404.0
mean,1292.053881,54.523368,82.444204,41.405899,80.877826,82.103914,58.081776,52.436397,1308.800347,55.222525,82.287359,42.16492,80.765759,83.221899,59.240747,53.229981,64.073738,426.540919
std,894.318605,21.352796,15.171728,25.429236,25.474538,14.969458,24.106244,15.867991,1321.659853,41.771544,51.353039,39.29712,55.504969,51.794004,45.633881,35.91451,14.215559,256.389994
min,0.061566,0.0,0.0,0.0,0.0,4.545455,0.0,2.325581,0.061566,0.0,0.0,0.0,0.0,3.419318,0.0,1.118388,0.0,40.274174
25%,678.17511,39.495897,74.358971,20.0,75.0,75.0,50.0,41.860465,504.532195,26.543939,46.110233,15.165782,42.246874,47.048745,28.552163,28.620025,55.306274,242.474423
50%,1119.056641,56.655251,85.897438,40.0,92.625,86.363636,62.5,53.488372,927.416344,44.496044,69.355974,31.757358,67.584193,70.259827,47.922033,43.877567,65.647126,363.010284
75%,1672.594849,71.136307,93.589745,61.111111,100.0,93.75,75.0,65.116279,1669.3191,72.333791,103.227782,56.798218,104.515792,104.282914,77.494812,67.557572,74.489796,536.864293
max,16216.21582,100.0,100.0,100.0,100.0,100.0,100.0,95.348837,39009.339588,557.253507,579.118,509.930977,589.389375,579.118,569.1671,379.659558,100.0,3098.650791


Aggregate by country and isco

In [10]:
indexes = [col for col in ewcs.columns if col.startswith("jqi")]
ewcs = ewcs[["country", "isco"] + indexes]  # choose columns

# 4 digits
ewcs_4digits = (
    ewcs.groupby(["country", "isco"]).mean().reset_index(drop=False)
)  # aggregate on year-country-isco level
ewcs_4digits = ewcs_4digits.dropna().reset_index(drop=True)  # drop missing values

# 3 digits
ewcs["isco"] = ewcs["isco"] // 10
ewcs_3digits = (
    ewcs.groupby(["country", "isco"]).mean().reset_index(drop=False)
)  # aggregate on year-country-isco level
ewcs_3digits = ewcs_3digits.dropna().reset_index(drop=True)  # drop missing values

Delete isco codes with less than 10 observations

In [11]:
# 4 digits
isco_counts = ewcs_4digits["isco"].value_counts()
values_to_keep = isco_counts[isco_counts >= 10].index
ewcs_4digits = ewcs_4digits[ewcs_4digits["isco"].isin(values_to_keep)].reset_index(
    drop=True
)
print(f"4 digit isco codes: {ewcs_4digits.isco.nunique()} out of 436")
# 3 digits
isco_counts = ewcs_3digits["isco"].value_counts()
values_to_keep = isco_counts[isco_counts >= 10].index
ewcs_3digits = ewcs_3digits[ewcs_3digits["isco"].isin(values_to_keep)].reset_index(
    drop=True
)
print(f"3 digit isco codes: {ewcs_3digits.isco.nunique()} out of 130")

4 digit isco codes: 301 out of 436
3 digit isco codes: 120 out of 130


In [12]:
301 / 436

0.6903669724770642

In [13]:
120 / 130

0.9230769230769231

Final steps

In [14]:
ewcs_4digits.to_csv(
    "/Users/alexandralugova/Documents/GitHub/MH-old-workers/data/datasets/results/work_quality_indexes_country_4digits.csv",
    index=False,
)  # save resulting dataset

ewcs_3digits.to_csv(
    "/Users/alexandralugova/Documents/GitHub/MH-old-workers/data/datasets/results/work_quality_indexes_country_3digits.csv",
    index=False,
)  # save resulting dataset

In [5]:
df = pd.read_csv(
    "/Users/alexandralugova/Documents/GitHub/MH-old-workers/data/datasets/results/work_quality_indexes_country_3digits.csv"
)

In [6]:
df.describe()

Unnamed: 0,isco,jqi_monthly_earnings,jqi_skills_discretion,jqi_physical_environment,jqi_intensity_slim,jqi_working_time_quality,jqi_social_environment,jqi_prospects,jqi_intensity,jqi_monthly_earnings_w,jqi_skills_discretion_w,jqi_physical_environment_w,jqi_intensity_slim_w,jqi_working_time_quality_w,jqi_social_environment_w,jqi_prospects_w,jqi_intensity_w,jqi_sum,jqi_sum_w
count,3446.0,3446.0,3446.0,3446.0,3446.0,3446.0,3446.0,3446.0,3446.0,3446.0,3446.0,3446.0,3446.0,3446.0,3446.0,3446.0,3446.0,3446.0,3446.0
mean,472.791642,1401.792425,57.406258,82.418325,43.372117,82.475219,82.788918,59.612155,52.644003,1477.713578,60.419001,85.848572,45.956389,85.888083,87.104222,63.106655,55.672988,63.86079,445.104321
std,257.791423,716.885553,15.521725,10.135572,12.761427,13.594434,6.88528,12.740693,9.051601,924.038247,25.154956,26.296442,19.968486,28.450239,27.233036,23.595679,22.418293,10.769736,181.032944
min,100.0,143.447372,11.529144,36.217949,6.666667,16.625,43.75,0.0,6.976744,163.451552,7.871451,16.088718,3.03078,13.321457,21.796524,0.0,6.036474,7.064369,71.956141
25%,243.0,871.476917,45.086337,76.119439,35.0,75.826471,78.863636,51.136364,47.093023,831.055509,41.955901,67.860777,32.606265,66.945652,69.437907,46.945301,40.916232,56.548283,327.301197
50%,412.0,1270.805254,57.867381,84.469511,43.19224,85.667688,83.096591,60.0,52.52508,1278.882221,56.472838,82.993326,43.662104,83.460374,83.747404,60.168951,52.049967,64.924263,418.054773
75%,731.0,1762.416361,70.839485,90.780117,51.296297,92.625,87.463914,68.181818,58.139535,1873.062743,75.41153,100.421155,55.956724,102.468957,101.7132,75.974322,65.931414,71.927919,529.85101
max,962.0,6309.148438,97.842735,100.0,100.0,100.0,100.0,100.0,90.697674,6955.451291,238.865552,307.781727,194.579223,333.4302,333.599027,282.2761,236.324177,94.777364,1716.446564
