In [6]:
import os
import pandas as pd

from ons import get_sheet_names, detect_id_row, get_headers, clean_age_column, clean_measure_column, clean_measure_type_column, add_group, load_data, create_date_column, melt_table

In [7]:
a06_file = '../../data/raw/a06-latest.xls'

Get a list of sheets

In [8]:
sheets = get_sheet_names(a06_file)

Find the row with the IDs

In [9]:
id_row = detect_id_row(a06_file)

Get the headers, based on the ID row

In [10]:
headers = pd.concat(
    [
        get_headers(a06_file, s, id_row=id_row)
        .pipe(clean_age_column)
        .pipe(clean_measure_column)
        .pipe(clean_measure_type_column)
        .pipe(add_group, s)
        for s in sheets[1:]
    ]).sort_index()

Save metadata file

In [11]:
METADATA_FILE = '../../data/metadata/labour-market/a06-codes.csv'
os.makedirs(os.path.dirname(METADATA_FILE), exist_ok=True)
headers.to_csv(METADATA_FILE)

Load and reshape the data for all sheets

In [12]:
data = pd.concat([
  load_data(a06_file, s, id_row)
    .pipe(create_date_column)
    .pipe(melt_table)
    .merge(headers, left_on='variable_name', right_index=True)
  for s
  in sheets[1:]
]).sort_values(by=['date', 'variable_name'])

Save the data file

In [13]:
DATA_FILE = '../../data/processed/labour-market/a06.csv'
os.makedirs(os.path.dirname(DATA_FILE), exist_ok=True)
data.to_csv(DATA_FILE, index=None)