In [None]:
import os
import pandas as pd

In [None]:
OUT_PATH = os.path.join('../..', 'data', 'processed', 'census')

In [None]:
column_name_mapper = {
  'obs_value': 'value',
  'c2021_eastat_7_name': 'variable_name',
  'c2021_age_7_name': 'age',
  'c_sex_name': 'gender',
}

Load in the raw data

In [None]:
raw_data = pd.read_csv(
    '../../data/raw/census-employment.csv'
)
raw_data.columns = raw_data.columns.str.lower()
raw_data = raw_data.rename(columns=column_name_mapper).drop(columns=['geography', 'geography_type', 'measures_name'])
raw_data.variable_name = raw_data.variable_name.str.strip()

Add together unemployment and economically inactive figures to come up with a NEET-like figure.

In [None]:
fake_neet = raw_data.loc[raw_data.variable_name.isin([
    'Economically active (excluding full-time students): Unemployed',
    'Economically inactive (excluding full-time students)'
]), :]

fake_neet.loc[:, 'variable_name'] = 'Unemployed or economically inactive and not in full-time education'
fake_neet = fake_neet.groupby(['date', 'geography_name', 'geography_code', 'age', 'gender', 'variable_name']).sum().reset_index()

Append the raw data and fake neet frames, pivot by variable name and then calculate rates against the Total column. Then melt into long format and save to a CSV.

In [None]:
data = pd.concat([
  raw_data,
  fake_neet
]).pivot(index=['date', 'geography_code', 'geography_name', 'age', 'gender'], columns='variable_name', values='value')
data.div(data.Total, axis=0).mul(100).round(1).drop(columns='Total').melt(value_name='rate', ignore_index=False).to_csv(os.path.join(OUT_PATH, 'employment-status.csv'))