# Overall counts

This notebook calculates the overall counts for both the standard and symbol surveys (number of responses for each survey, and number of schools these were from).

This is saved as a dictionary.

## Set-up

### Packages and file paths

In [1]:
# Import required packages
from dataclasses import dataclass
import os
import pandas as pd
import pickle

In [2]:
# File paths
@dataclass(frozen=True)
class Paths:
    '''Stores paths to data and files'''
    data = '../data/survey_data'
    standard = 'standard_synthetic_data_raw.csv'
    symbol = 'symbol_synthetic_data_raw.csv'
    overall_counts = 'nd_overall_counts.pkl'


paths = Paths()

### Import raw data

In [3]:
standard = pd.read_csv(os.path.join(paths.data, paths.standard))
standard.head()

Unnamed: 0,gender,transgender,sexual_orientation,neurodivergent,birth_parent1,birth_parent2,birth_you,birth_you_age,autonomy_pressure,autonomy_express,...,peer_talk_listen_lab,peer_talk_helpful_lab,peer_talk_if_lab,accept_peer_lab,year_group_lab,fsm_lab,sen_lab,ethnicity_lab,english_additional_lab,school_lab
0,4.0,2.0,6.0,3.0,2.0,,2.0,1.0,2.0,5.0,...,Fully,Somewhat helpful,,Not at all,Year 10,Non-FSM,Non-SEN,Ethnic minority,,School E
1,,2.0,1.0,3.0,3.0,2.0,3.0,8.0,4.0,2.0,...,,,Very uncomfortable,Slightly,Year 10,Non-FSM,Non-SEN,Ethnic minority,No,School D
2,2.0,3.0,4.0,1.0,1.0,1.0,1.0,1.0,5.0,4.0,...,,,Very comfortable,Not at all,Year 10,Non-FSM,Non-SEN,White British,No,School E
3,2.0,5.0,5.0,,2.0,2.0,,3.0,1.0,2.0,...,,,Uncomfortable,,Year 10,Non-FSM,Non-SEN,White British,No,School G
4,5.0,3.0,4.0,1.0,1.0,3.0,3.0,2.0,5.0,2.0,...,Slightly,Somewhat helpful,,Not at all,,Non-FSM,Non-SEN,White British,Yes,School B


In [4]:
symbol = pd.read_csv(os.path.join(paths.data, paths.symbol))
symbol.head()

Unnamed: 0,symbol_family,symbol_home,symbol_friends,symbol_choice,symbol_things,symbol_health,symbol_future,symbol_school,symbol_free,symbol_life,...,symbol_school_lab,symbol_free_lab,symbol_life_lab,gender_lab,year_group_lab,fsm_lab,sen_lab,ethnicity_lab,english_additional_lab,school_lab
0,2.0,2,1.0,1.0,3.0,1,1.0,2.0,2.0,3.0,...,Ok,Ok,Sad,Male,Year 11,FSM,SEN,Ethnic minority,Yes,School B
1,,1,1.0,2.0,2.0,3,2.0,,2.0,1.0,...,,Ok,Happy,Female,Year 10,Non-FSM,Non-SEN,Ethnic minority,Yes,School A
2,,1,2.0,2.0,2.0,1,2.0,2.0,1.0,1.0,...,Ok,Happy,Happy,Female,Year 11,FSM,Non-SEN,White British,Yes,School B
3,,2,3.0,,2.0,1,2.0,1.0,2.0,3.0,...,Happy,Ok,Sad,Female,Year 11,Non-FSM,SEN,White British,Yes,School B
4,3.0,1,2.0,2.0,2.0,2,1.0,2.0,2.0,1.0,...,Ok,Ok,Happy,Female,Year 10,FSM,Non-SEN,White British,No,School B


## Counts for entire survey

Note: This assumes that all pupils in the dataframes are valid entries (i.e. responses where there was no actual response to any question have been deleted). Likewise, assumes all pupils have a school provided (which should be the case).

In [5]:
dict = {
    'standard_pupils': len(standard),
    'standard_schools': len(standard.school_lab.unique()),
    'symbol_pupils': len(symbol),
    'symbol_schools': len(symbol.school_lab.unique())}
dict['total_pupils'] = dict['standard_pupils'] + dict['symbol_pupils']
dict['total_schools'] = dict['standard_schools'] + dict['symbol_schools']
dict

{'standard_pupils': 800,
 'standard_schools': 7,
 'symbol_pupils': 60,
 'symbol_schools': 2,
 'total_pupils': 860,
 'total_schools': 9}

## Save dictionary

In [6]:
with open(os.path.join(paths.data, paths.overall_counts), 'wb') as f:
    pickle.dump(dict, f)