In [1]:
import os
import pandas as pd
import zipfile

In [2]:
def read_csvs_from_zip(zip_path: str, csv_path: str, *args, **kwargs) -> pd.DataFrame:
    with zipfile.ZipFile(zip_path) as zip:
        with zip.open(csv_path) as csv:
            data = pd.read_csv(csv, *args, **kwargs)
    return data

Load `children_in_poverty` data

In [3]:
children_in_poverty = pd.read_excel(
    '../../data/raw/neet-factors/children-in-low-income-families-local-area-statistics-2014-to-2022.ods',
    sheet_name='4_Absolute_Local_Authority',
    skiprows=9,
    usecols=[1, 17],
    names=['geography_code', 'Children in poverty'],
    index_col=0
).iloc[:, 0].mul(100).round(1)

Load `children_looked_after` data

In [4]:
children_looked_after = read_csvs_from_zip(
          '../../data/raw/neet-factors/cla-all.zip',
          'data/cla_number_and_rate_per_10k_children.csv',
          usecols=[
            'time_period',
            'geographic_level',
            'population_count',
            'new_la_code',
            'rate_per_10000'
          ],
          index_col='new_la_code'
        )

children_looked_after = children_looked_after.loc[
  (children_looked_after.geographic_level == 'Local authority') &
  (children_looked_after.population_count == 'Children looked after at 31 March each year') &
  (children_looked_after.time_period == 2022),
  ["rate_per_10000"]
].rename(columns={
  'rate_per_10000': 'Children looked after'
})

Load `health_disability` data

In [12]:
health_disability_2021 = pd.read_csv('../../data/raw/neet-factors/health_disability_2021.csv', index_col='Lower tier local authorities Code')

sum = health_disability_2021.loc[
  (health_disability_2021['Disability (3 categories)'] == 'Disabled under the Equality Act') &
  (health_disability_2021['Age (C) (4 categories)'].isin(['Aged 15 years and under', 'Aged 16 to 24 years' ])),
  "Observation"
].groupby('Lower tier local authorities Code').sum()

total = health_disability_2021.loc[
  (health_disability_2021['Age (C) (4 categories)'].isin(['Aged 15 years and under', 'Aged 16 to 24 years' ])),
  "Observation"
].groupby('Lower tier local authorities Code').sum()

health_disability_2021 = (sum/total).mul(100).to_frame('Disability (age < 25)')

In [52]:
family_disability_2021 = pd.read_csv(
  filepath_or_buffer='../../data/raw/neet-factors/family_disability_2021.csv',
  index_col='Lower tier local authorities Code'
)

sum = family_disability_2021.loc[
  family_disability_2021['Disability - Equality act disabled (4 categories) Code'].isin([1]),
  "Observation"
].groupby('Lower tier local authorities Code').sum()

total = family_disability_2021.loc[
  :,
  "Observation"
].groupby('Lower tier local authorities Code').sum()

family_disability_2021 = (sum/total).mul(100).to_frame('Disability (age < 25)')

Load `family_disability_2021` data

Create base data frame

In [39]:
local_authorities = pd.read_csv('../../data/reference/local_authorities.csv', index_col=[0])

Collate all layers into a single file

In [40]:
data = (
  local_authorities
    .join(children_in_poverty)
    .join(children_looked_after)
    .join(health_disability_2021)
    .join(family_disability_2021)
    .set_index(['Local Authority Name', 'Group'], append=True)
  )

Save to a CSV file

In [41]:
SOURCES_CSV='../../data/processed/yff/neet-factors-sources.csv'
os.makedirs(os.path.dirname(SOURCES_CSV), exist_ok=True)
data.melt(ignore_index=False).to_csv(SOURCES_CSV)

In [42]:
data

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Children in poverty,Children looked after,Disability (age < 25)
Local Authority Code,Local Authority Name,Group,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
E06000001,Hartlepool,"Services, Manufacturing and Mining Legacy",21.7,155,10.124733
E06000002,Middlesbrough,Manufacturing Traits,31.8,150,8.978449
E06000003,Redcar and Cleveland,"Services, Manufacturing and Mining Legacy",22.0,125,10.040150
E06000004,Stockton-on-Tees,"Services, Manufacturing and Mining Legacy",19.0,133,9.688022
E06000005,Darlington,"Services, Manufacturing and Mining Legacy",20.5,123,9.393366
...,...,...,...,...,...
E09000029,Sutton,Suburban Traits,8.3,48,7.783485
E09000030,Tower Hamlets,London Cosmopolitan,21.8,52,6.975956
E09000031,Waltham Forest,Ethnically Diverse Metropolitan Living,16.8,53,5.819617
E09000032,Wandsworth,London Cosmopolitan,9.4,41,6.468367


In [43]:
children_looked_after


Unnamed: 0_level_0,Children looked after
new_la_code,Unnamed: 1_level_1
E06000001,155
E06000002,150
E06000003,125
E06000004,133
E06000005,123
...,...
E09000024,26
E09000026,37
E09000027,29
E09000029,48


In [44]:
data.columns

Index(['Children in poverty', 'Children looked after',
       'Disability (age < 25)'],
      dtype='object')