# Add working conditions data from EWCS

### Load libraries

In [1]:
import pandas as pd
import pyreadstat

### Load data

In [2]:
df, meta = pyreadstat.read_dta(
    "/Users/alexandralugova/Documents/GitHub/MH-old-workers/data/datasets/EWCS 1991-2015 UKDA ISCO.DTA"
)
variable_labels = meta.column_labels

### Choose only necessary data, tranform where needed

Identify and name the aggregated indexes for work conditions

In [None]:
labels_to_search = [
    "JQI Monthly earnings",
    "JQI Skills and discretion index",
    "JQI social environment",
    "JQI Physical environment index",
    "JQI Intensity index",
    "JQI Prospects index",
    "JQI Working time quality index",
]

# Iterate through metadata to find labels and print corresponding variable names
for index, label in enumerate(meta.column_labels):
    if label in labels_to_search:
        print(label, ":", meta.column_names[index])

In [None]:
df = df.rename(
    columns={
        "adincome_mth": "jqi_monthly_earnings",
        "wq": "jqi_skills_discretion",
        "goodsoc": "jqi_social_environment",
        "envsec": "jqi_physical_environment",
        "intens": "jqi_intensity",
        "prosp": "jqi_prospects",
        "wlb": "jqi_working_time_quality",
    }
)

Choose columns

In [19]:
df = df[
    [
        "countid",
        "year",
        "ISCO_08",
        "jqi_monthly_earnings",
        "jqi_skills_discretion",
        "jqi_social_environment",
        "jqi_physical_environment",
        "jqi_intensity",
        "jqi_prospects",
        "jqi_working_time_quality",
    ]
]

Define country names and choose the needed

In [21]:
countid_mapping = meta.value_labels["COUNTID"]
df["countid"] = df["countid"].map(countid_mapping)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['countid'] = df['countid'].map(countid_mapping)


In [23]:
countries = [
    "Austria",
    "Belgium",
    "Czech Republic",
    "Denmark",
    "Estonia",
    "France",
    "Germany",
    "Italy",
    "Slovenia",
    "Spain",
    "Switzerland",
]
df = df[df["countid"].isin(countries)].reset_index(drop=True)

Leave only waves 5 and 6 (2010 and 2015)

In [25]:
df = df[df.year >= 2010].reset_index(drop=True)

Drop lines with missing isco codes

In [34]:
df = df.dropna(subset="ISCO_08").reset_index(drop=True)

Rename some variables

In [36]:
df = df.rename(columns={"countid": "country", "ISCO_08": "isco"})

Calculate social environment, job intensity, job prospects and working time quality indexes for 2010

In [37]:
df

Unnamed: 0,country,year,isco,jqi_monthly_earnings,jqi_skills_discretion,jqi_social_environment,jqi_physical_environment,jqi_intensity,jqi_prospects,jqi_working_time_quality
0,Belgium,2015,8141,1530.153076,8.536879,94.444443,66.666664,31.759258,25.000000,69.12500
1,Belgium,2015,5141,,64.617447,,91.025642,13.148149,50.000000,84.50000
2,Belgium,2015,1323,1800.180054,76.372818,,76.923080,27.037039,50.000000,56.31250
3,Belgium,2015,7115,1575.157593,42.832993,97.916664,98.717949,9.444445,62.500000,86.84375
4,Belgium,2015,8322,1278.127930,30.789835,74.166664,97.435898,30.833334,41.666664,74.28125
...,...,...,...,...,...,...,...,...,...,...
34701,Slovenia,2010,9111,99.932480,41.638947,,84.615387,,,
34702,Slovenia,2010,5131,624.578003,48.814400,,84.615387,,,
34703,Slovenia,2010,5141,437.204590,60.634476,,80.769234,,,
34704,Slovenia,2010,3113,749.493591,89.690376,,97.435898,,,


Aggregate on the level of isco, year and country