In [24]:
import os
import pandas as pd
TOP_DIR = '../../..'
RAW_DATA_DIR = f'{TOP_DIR}/data-raw'
DATA_DIR = f'{TOP_DIR}/data'

In [25]:
data = pd.read_csv(
    f'{RAW_DATA_DIR}/school-pupils-characteristics/data/spc_pupils_fsm.csv', 
    usecols=['time_period','region_code', 'new_la_code', 'phase_type_grouping','fsm', 'percent_of_pupils']
    )

In [33]:
# only getting data for the 3 northern regions
fsm = data.loc[data.region_code.isin(['E12000001', 'E12000002', 'E12000003'])].copy()

# renaming columns
fsm.rename(columns={'time_period': 'date', 'new_la_code': 'la_code'}, inplace=True)

# the geo codes are split into two columns, so reshaping to merge these into one long column.
fsm_regions = fsm[(~fsm.region_code.isna()) & (fsm.la_code.isna())].copy()
fsm_la = fsm[(~fsm.region_code.isna()) & (~fsm.la_code.isna())].copy()
fsm_regions.drop(columns='la_code', inplace=True)
fsm_regions.rename(columns={'region_code': 'geography_code'}, inplace=True)
fsm_la.drop(columns='region_code', inplace=True)
fsm_la.rename(columns={'la_code': 'geography_code'}, inplace=True)
fsm = pd.concat([fsm_regions, fsm_la]).set_index('date')
fsm = fsm.round(1)

# filtering by thismeasure according to gov.uk https://explore-education-statistics.service.gov.uk/methodology/schools-pupils-and-their-characteristics-methodology
fsm = fsm.loc[fsm['fsm']=='known to be eligible for free school meals (used for FSM in Performance Tables)']
fsm['variable_name'] = 'percent_of_pupils'
fsm.rename(columns={'percent_of_pupils': 'value'}, inplace=True)

In [34]:
# write to file
fsm.to_csv(os.path.join(DATA_DIR, 'school-pupils-characteristics/free_school_meals.csv'))