In [53]:
import os
import pandas as pd
TOP_DIR = '../../..'
RAW_DATA_DIR = f'{TOP_DIR}/data-raw'
DATA_DIR = f'{TOP_DIR}/data'

In [54]:
data = pd.read_csv(
    f'{RAW_DATA_DIR}/school-pupils-characteristics/data/spc_pupils_fsm.csv', 
    usecols=['time_period','region_code', 'new_la_code', 'phase_type_grouping','fsm', 'percent_of_pupils']
    )

In [55]:
# only getting data for the 3 northern regions
fsm = data.loc[data.region_code.isin(['E12000001', 'E12000002', 'E12000003'])].copy()

# renaming columns
fsm.rename(columns={'time_period': 'date', 'new_la_code': 'la_code'}, inplace=True)

# the geo codes are split into two columns, so reshaping to merge these into one long column.
fsm_regions = fsm[(~fsm.region_code.isna()) & (fsm.la_code.isna())].copy()
fsm_la = fsm[(~fsm.region_code.isna()) & (~fsm.la_code.isna())].copy()
fsm_regions.drop(columns='la_code', inplace=True)
fsm_regions.rename(columns={'region_code': 'geography_code'}, inplace=True)
fsm_la.drop(columns='region_code', inplace=True)
fsm_la.rename(columns={'la_code': 'geography_code'}, inplace=True)
fsm = pd.concat([fsm_regions, fsm_la]).set_index('date')
fsm = fsm.round(1)

# filtering by thismeasure according to gov.uk https://explore-education-statistics.service.gov.uk/methodology/schools-pupils-and-their-characteristics-methodology
fsm = fsm.loc[fsm['fsm']=='known to be eligible for free school meals (used for FSM in Performance Tables)']
fsm['variable_name'] = 'percent_of_pupils'
fsm.rename(columns={'percent_of_pupils': 'value'}, inplace=True)


In [56]:
# @TODO Temporarily getting rid of any geo codes that dont have all years. Will need some sort of fill na or speak to stuart.
fsm.reset_index(inplace=True)
#dates = fsm[['date', 'geography_code']]
safe_codes = fsm.geography_code.value_counts().reset_index()
fsm = fsm.merge(safe_codes, on='geography_code')
fsm = fsm[fsm['count'] == 56].set_index('geography_code')
fsm

Unnamed: 0_level_0,date,phase_type_grouping,fsm,value,variable_name,count
geography_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
E12000001,202223,Non-maintained special school,known to be eligible for free school meals (us...,34.6,percent_of_pupils,56
E12000001,202223,State-funded AP school,known to be eligible for free school meals (us...,63.7,percent_of_pupils,56
E12000001,202223,State-funded nursery,known to be eligible for free school meals (us...,3.1,percent_of_pupils,56
E12000001,202223,State-funded primary,known to be eligible for free school meals (us...,30.2,percent_of_pupils,56
E12000001,202223,State-funded secondary,known to be eligible for free school meals (us...,27.6,percent_of_pupils,56
...,...,...,...,...,...,...
E06000007,201516,Non-maintained special school,known to be eligible for free school meals (us...,70.3,percent_of_pupils,56
E06000007,201516,State-funded secondary,known to be eligible for free school meals (us...,9.3,percent_of_pupils,56
E06000007,201516,State-funded nursery,known to be eligible for free school meals (us...,0.0,percent_of_pupils,56
E06000007,201516,State-funded special school,known to be eligible for free school meals (us...,39.4,percent_of_pupils,56


In [57]:
# write to file
fsm.to_csv(os.path.join(DATA_DIR, 'school-pupils-characteristics/free_school_meals.csv'))