# Preview of the data structures from processing steps in creation of the synthetic standard survey school dashboard

Please note: This will differ for the symbol survey and public dashboards. 

For a **diagram and written summary** of the data processing for the synthetic dashboard, please see `data_guide.md`. This notebook accompanies that guide, providing a preview of the data columns and types at key points in the process.

In [1]:
# Import packages required to produced this notebook
import pandas as pd

In [2]:
def describe_data(filepath):
    '''
    Describe the shape of the data, preview the first five rows, and print
    the name and type of every column

    filepath:
    filename : string
        Filepath of dataset to import and describe
    '''
    df = pd.read_csv(filepath)

    # Print shape of dataframe
    print(df.shape)

    # Preview first 5 rows of dataframe
    display(df.head())

    # Print the name and type of every column
    with pd.option_context('display.max_rows', None,
                           'display.max_columns', None):
        print(df.dtypes)

### Raw data

In [3]:
describe_data('data/survey_data/KailoBeeWellStandard_DATA_2023-11-06_1152.csv')

(23, 860)


Unnamed: 0,record_id,redcap_event_name,redcap_survey_identifier,id_and_consent_timestamp,consent_answer,id_and_consent_complete,survey_questions_default_order_timestamp,gender,transgender,sexual_orientation,...,places_barriers_shuffle_6___1,places_barriers_shuffle_6___2,places_barriers_shuffle_6___3,places_barriers_shuffle_6___4,places_barriers_shuffle_6___5,places_barriers_shuffle_6___6,places_barriers_shuffle_6___7,places_barriers_shuffle_6___8,places_barriers_shuffle_6___9,survey_questions_shuffle_6_complete
0,1,survey_completion_arm_1,,2023-10-06 12:11:47,1,2,,,,,...,,,,,,,,,,
1,2,survey_completion_arm_1,,2023-10-06 13:06:38,1,2,,,,,...,,,,,,,,,,
2,3,survey_completion_arm_1,,2023-10-10 16:00:13,1,2,,,,,...,,,,,,,,,,
3,4,survey_completion_arm_2,,2023-10-10 16:00:27,1,2,,,,,...,,,,,,,,,,
4,5,survey_completion_arm_1,,2023-10-11 11:39:58,1,2,,,,,...,,,,,,,,,,


record_id                                     int64
redcap_event_name                            object
redcap_survey_identifier                     object
id_and_consent_timestamp                     object
consent_answer                                int64
id_and_consent_complete                       int64
survey_questions_default_order_timestamp     object
gender                                      float64
transgender                                 float64
sexual_orientation                          float64
neurodivergent                              float64
birth_parent1                               float64
birth_parent2                               float64
birth_you                                   float64
birth_you_age                               float64
autonomy_pressure                           float64
autonomy_express                            float64
autonomy_decide                             float64
autonomy_told                               float64
autonomy_mys

### Headings

Headings is a dataset which only has column headings and does not contain any column entries.

In [4]:
head = pd.read_csv('data/survey_data/headings.csv')

# Print shape of dataframe
print(head.shape)

# Print all the columns in the dataframe
head.columns.tolist()

(0, 122)


['Unnamed: 0',
 'record_id',
 'gender',
 'transgender',
 'sexual_orientation',
 'neurodivergent',
 'birth_parent1',
 'birth_parent2',
 'birth_you',
 'birth_you_age',
 'autonomy_pressure',
 'autonomy_express',
 'autonomy_decide',
 'autonomy_told',
 'autonomy_myself',
 'autonomy_choice',
 'life_satisfaction',
 'optimism_future',
 'optimism_best',
 'optimism_good',
 'optimism_work',
 'wellbeing_optimistic',
 'wellbeing_useful',
 'wellbeing_relaxed',
 'wellbeing_problems',
 'wellbeing_thinking',
 'wellbeing_close',
 'wellbeing_mind',
 'esteem_satisfied',
 'esteem_qualities',
 'esteem_well',
 'esteem_value',
 'esteem_good',
 'stress_control',
 'stress_overcome',
 'stress_confident',
 'stress_way',
 'appearance_happy',
 'appearance_feel',
 'negative_lonely',
 'negative_unhappy',
 'negative_like',
 'negative_cry',
 'negative_school',
 'negative_worry',
 'negative_sleep',
 'negative_wake',
 'negative_shy',
 'negative_scared',
 'lonely',
 'support_ways',
 'support_look',
 'sleep',
 'physical_da

### Synthetic pupil dataset

In [5]:
describe_data('data/survey_data/synthetic_data_raw.csv')

(800, 280)


Unnamed: 0,gender,transgender,sexual_orientation,neurodivergent,birth_parent1,birth_parent2,birth_you,birth_you_age,autonomy_pressure,autonomy_express,...,peer_talk_listen_lab,peer_talk_helpful_lab,peer_talk_if_lab,accept_peer_lab,year_group_lab,fsm_lab,sen_lab,ethnicity_lab,english_additional_lab,school_lab
0,4.0,2.0,6.0,3.0,2.0,1.0,2.0,1.0,2.0,5.0,...,Fully,Somewhat helpful,Very uncomfortable,Not at all,Year 10,Non-FSM,Non-SEN,Ethnic minority,No,School E
1,1.0,2.0,1.0,3.0,3.0,2.0,3.0,8.0,4.0,2.0,...,Mostly,Very helpful,Very uncomfortable,Slightly,Year 10,,,Ethnic minority,,School D
2,2.0,3.0,4.0,1.0,1.0,1.0,1.0,1.0,5.0,4.0,...,Mostly,Very helpful,Very comfortable,Not at all,Year 10,Non-FSM,Non-SEN,White British,No,School E
3,2.0,5.0,5.0,2.0,2.0,2.0,1.0,3.0,1.0,2.0,...,Fully,Somewhat helpful,Uncomfortable,Mostly,Year 10,,Non-SEN,White British,No,School G
4,5.0,3.0,4.0,1.0,1.0,3.0,3.0,2.0,5.0,2.0,...,Slightly,Somewhat helpful,Uncomfortable,Not at all,,Non-FSM,Non-SEN,White British,Yes,School B


gender                      float64
transgender                 float64
sexual_orientation          float64
neurodivergent              float64
birth_parent1               float64
birth_parent2               float64
birth_you                   float64
birth_you_age               float64
autonomy_pressure           float64
autonomy_express            float64
autonomy_decide             float64
autonomy_told               float64
autonomy_myself             float64
autonomy_choice             float64
life_satisfaction           float64
optimism_future             float64
optimism_best               float64
optimism_good               float64
optimism_work               float64
wellbeing_optimistic        float64
wellbeing_useful            float64
wellbeing_relaxed           float64
wellbeing_problems          float64
wellbeing_thinking          float64
wellbeing_close             float64
wellbeing_mind              float64
esteem_satisfied            float64
esteem_qualities            

### Aggregated dataset with scores and RAG ratings

In [6]:
describe_data('data/survey_data/aggregate_scores_rag.csv')

(2016, 17)


Unnamed: 0,variable,mean,count,school_lab,year_group_lab,gender_lab,fsm_lab,sen_lab,total_pupils,group_n,group_wt_mean,group_wt_std,lower,upper,rag,variable_lab,description
0,autonomy_score,17.865854,82.0,School A,All,All,All,All,487.0,7.0,17.831622,0.297966,17.533657,18.129588,average,Autonomy,How 'in control' young people feel of their life
1,life_satisfaction_score,5.105263,114.0,School A,All,All,All,All,725.0,7.0,5.055172,0.32948,4.725693,5.384652,average,Life satisfaction,How satisfied young people feel with their life
2,optimism_score,12.02521,119.0,School A,All,All,All,All,729.0,7.0,12.065844,0.261534,11.804309,12.327378,average,Optimism,Young people's hopefulness and confidence for ...
3,wellbeing_score,21.151163,86.0,School A,All,All,All,All,538.0,7.0,20.964684,0.452018,20.512666,21.416702,average,Psychological wellbeing,How positive and generally happy young people ...
4,esteem_score,12.739726,73.0,School A,All,All,All,All,476.0,7.0,12.689076,0.379053,12.310023,13.068129,average,Self-esteem,How much young people value themselves


variable           object
mean              float64
count             float64
school_lab         object
year_group_lab     object
gender_lab         object
fsm_lab            object
sen_lab            object
total_pupils      float64
group_n           float64
group_wt_mean     float64
group_wt_std      float64
lower             float64
upper             float64
rag                object
variable_lab       object
description        object
dtype: object


### Aggregated dataset with non-demographic question responses

In [7]:
describe_data('data/survey_data/aggregate_responses.csv')

(6678, 13)


Unnamed: 0,cat,cat_lab,count,percentage,measure,n_responses,school_lab,year_group_lab,gender_lab,fsm_lab,sen_lab,group,measure_lab
0,"[1, 2, 3, 4, 5, nan]","['1 - Completely not true', '2', '3', '4', '5 ...","[19, 28, 20, 17, 27, 17]","[14.84375, 21.875, 15.625, 13.28125, 21.09375,...",autonomy_pressure,128.0,School A,All,All,All,All,autonomy,I feel pressured in my life
1,"[1, 2, 3, 4, 5, nan]","['1 - Completely not true', '2', '3', '4', '5 ...","[27, 29, 21, 25, 26, 0]","[21.09375, 22.65625, 16.40625, 19.53125, 20.31...",autonomy_express,128.0,School A,All,All,All,All,autonomy,I generally feel free to express my ideas and ...
2,"[1, 2, 3, 4, 5, nan]","['1 - Completely not true', '2', '3', '4', '5 ...","[25, 25, 13, 32, 20, 13]","[19.53125, 19.53125, 10.15625, 25.0, 15.625, 1...",autonomy_decide,128.0,School A,All,All,All,All,autonomy,I feel like I am free to decide for myself how...
3,"[1, 2, 3, 4, 5, nan]","['1 - Completely not true', '2', '3', '4', '5 ...","[23, 24, 16, 31, 21, 13]","[17.96875, 18.75, 12.5, 24.21875, 16.40625, 10...",autonomy_told,128.0,School A,All,All,All,All,autonomy,In my daily life I often have to do what I am ...
4,"[1, 2, 3, 4, 5, nan]","['1 - Completely not true', '2', '3', '4', '5 ...","[30, 22, 23, 20, 23, 10]","[23.4375, 17.1875, 17.96875, 15.625, 17.96875,...",autonomy_myself,128.0,School A,All,All,All,All,autonomy,I feel I can pretty much be myself in daily si...


cat                object
cat_lab            object
count              object
percentage         object
measure            object
n_responses       float64
school_lab         object
year_group_lab     object
gender_lab         object
fsm_lab            object
sen_lab            object
group              object
measure_lab        object
dtype: object


### Aggregated dataset with overall counts

In [8]:
describe_data('data/survey_data/overall_counts.csv')

(63, 6)


Unnamed: 0,count,school_lab,year_group_lab,gender_lab,fsm_lab,sen_lab
0,128.0,School A,All,All,All,All
1,60.0,School A,Year 8,All,All,All
2,55.0,School A,Year 10,All,All,All
3,16.0,School A,All,Girl,All,All
4,24.0,School A,All,Boy,All,All


count             float64
school_lab         object
year_group_lab     object
gender_lab         object
fsm_lab            object
sen_lab            object
dtype: object


### Aggregated dataset with demographic question responses

In [9]:
describe_data('data/survey_data/aggregate_demographic.csv')

(210, 11)


Unnamed: 0,cat,cat_lab,count,percentage,measure,n_responses,school_lab,school_group,school_group_lab,plot_group,measure_lab
0,"[1, 2, 3, 4, 5, 6, nan]","['Girl', 'Boy', 'Non-binary', 'I describe myse...","[16, 24, 15, 20, 17, 22, 14]","[12.5, 18.75, 11.71875, 15.625, 13.28125, 17.1...",gender,128.0,School A,1,Your school,gender,Gender
1,"[1, 2, 3, 4, 5, nan]","['Yes', 'No', 'Prefer not to say', 'I describe...","[25, 23, 19, 29, 30, 2]","[19.53125, 17.96875, 14.84375, 22.65625, 23.43...",transgender,128.0,School A,1,Your school,gender,Do you consider yourself to be transgender?
2,"[1, 2, 3, 4, 5, 6, nan]","['Bi/pansexual', 'Gay/lesbian', 'Heterosexual/...","[22, 26, 15, 20, 29, 16, 0]","[17.1875, 20.3125, 11.71875, 15.625, 22.65625,...",sexual_orientation,128.0,School A,1,Your school,sexual_orientation,Sexual orientation
3,"[1, 2, 3, nan]","['Yes', 'No', 'Unsure', 'No response']","[47, 30, 38, 13]","[36.71875, 23.4375, 29.6875, 10.15625]",neurodivergent,128.0,School A,1,Your school,neuro,Do you identify as neurodivergent?
4,"[1, 2, 3, nan]","['Yes', 'No', ""I don't know"", 'No response']","[39, 45, 40, 4]","[30.46875, 35.15625, 31.25, 3.125]",birth_parent1,128.0,School A,1,Your school,birth,Was birth parent 1 born outside the UK?


cat                  object
cat_lab              object
count                object
percentage           object
measure              object
n_responses         float64
school_lab           object
school_group          int64
school_group_lab     object
plot_group           object
measure_lab          object
dtype: object
