In [7]:
import os
import pandas as pd

In [2]:
raw = pd.read_excel(
  '../../data/reference/Copy of NatCen 2023 - Administrative data on factors associated with youth NEET rates - Dataset.xlsx',
  sheet_name='Local Authority Data',
  
).set_index(['Local Authority Code', 'Local Authority Name', 'Group'])

raw.columns = raw.columns.str.strip()

In [3]:
def z_score(series: pd.Series):
    return (series - series.mean()) / series.std()

def construct_weights(df, weighting=2):
    w = pd.Series(1, index=df.columns)
    key_metrics = [
      'Qualification below level 2 (age 16-24)',
      'Pupils with SEN support',
      'Disability (age < 25)',
      'IMD Health',
      'Economic inactivity (NEET)'
    ]
    w.loc[w.index.isin(key_metrics)] = weighting
    return w

def weighted_average(df, column_weights):
    res = df.mul(column_weights.to_dict()) / column_weights.sum()
    return res.sum(axis=1)

def score(df):
    total = pd.Series(df.sum(axis=1), name='Total Score')
    double_weighted = pd.Series(df.pipe(weighted_average, df.pipe(construct_weights, 2)), name='Weighted scores (double)')
    triple_weighted = pd.Series(df.pipe(weighted_average, df.pipe(construct_weights, 3)), name='Weighted scores (triple)')

    return pd.concat([
        df, total, double_weighted, triple_weighted
    ], axis=1)

data = raw.apply(z_score).pipe(score)

In [8]:
os.makedirs('../../data/processed/yff/', exist_ok=True)
data.melt(ignore_index=False).to_csv('../../data/processed/yff/neet-factors.csv')