# Aggregate responses

Aggregate the synthetic person-level data to find the proportion who gave each response to each question.

## Set-up

### Packages and file paths

In [17]:
# Import required packages
from collections import defaultdict
from dataclasses import dataclass
import numpy as np
import os
import pandas as pd

In [18]:
# File paths
@dataclass(frozen=True)
class Paths:
    '''Stores paths to data and files'''
    survey = '../data/survey_data'
    synthetic_data = 'synthetic_data_raw.csv'
    aggregate = 'aggregate_responses.csv'


paths = Paths()

### Import raw data

In [19]:
data = pd.read_csv(os.path.join(paths.survey, paths.synthetic_data))
data.head()

Unnamed: 0,gender,transgender,sexual_orientation,neurodivergent,birth_parent1,birth_parent2,birth_you,birth_you_age,autonomy_pressure,autonomy_express,...,peer_talk_listen_lab,peer_talk_helpful_lab,peer_talk_if_lab,accept_peer_lab,year_group_lab,fsm_lab,sen_lab,ethnicity_lab,english_additional_lab,school_lab
0,4.0,2.0,6.0,3.0,2.0,1.0,2.0,1.0,,5.0,...,Fully,Somewhat helpful,Very uncomfortable,Not at all,Year 10,Non-FSM,Non-SEN,Ethnic minority,No,School E
1,1.0,2.0,1.0,3.0,3.0,2.0,3.0,8.0,4.0,2.0,...,Mostly,Very helpful,Very uncomfortable,Slightly,Year 10,Non-FSM,Non-SEN,Ethnic minority,No,
2,2.0,,4.0,,,1.0,1.0,1.0,5.0,4.0,...,,Very helpful,Very comfortable,Not at all,Year 10,Non-FSM,Non-SEN,White British,No,School E
3,2.0,5.0,5.0,2.0,2.0,2.0,1.0,3.0,,2.0,...,Fully,Somewhat helpful,Uncomfortable,Mostly,Year 10,Non-FSM,Non-SEN,White British,No,School G
4,5.0,3.0,4.0,1.0,1.0,3.0,3.0,2.0,5.0,2.0,...,Slightly,Somewhat helpful,Uncomfortable,Not at all,Year 8,Non-FSM,Non-SEN,,Yes,School B


In [20]:
data.columns

Index(['gender', 'transgender', 'sexual_orientation', 'neurodivergent',
       'birth_parent1', 'birth_parent2', 'birth_you', 'birth_you_age',
       'autonomy_pressure', 'autonomy_express',
       ...
       'peer_talk_listen_lab', 'peer_talk_helpful_lab', 'peer_talk_if_lab',
       'accept_peer_lab', 'year_group_lab', 'fsm_lab', 'sen_lab',
       'ethnicity_lab', 'english_additional_lab', 'school_lab'],
      dtype='object', length=281)

## Find the proportion giving each response to each measure, within a given group

In [21]:
# Find columns that we want to count responses for
response_col = [col for col in data.columns if col.endswith('_lab')]
response_col.remove('school_lab')


def aggregate(dataset):
    '''
    Aggregates a given column by finding the proportion giving each answer
    for that column
    Inputs:
    - dataset - dataframe, to use for operation
    '''
    # Initialise list to store the counts for each measure
    responses = []

    # Look through each of the columns of interest
    for col in response_col:

        # Find the name of the numeric version of the column, then filter
        # to just the column and it's numeric version
        numeric = col.replace('_lab', '')

        # Count the number of pupils with each response for that column
        # Then convert to percentages
        df = dataset[[numeric, col]].value_counts(dropna=False).reset_index(name='count')
        df[col] = df[col].fillna('Missing')
        df['percentage'] = round((df['count'] / df['count'].sum()) * 100, 1)

        # Reformat dataframe, sorting by the numeric version of the column,
        # and renaming the categories with generic name
        df = df.sort_values(by=numeric)
        df = df.rename(columns={
            numeric: 'cat',
            col: 'cat_lab'})

        # Convert to series with each column as array and add name of measure
        series = pd.Series(df.to_dict(orient='list'))
        series['measure'] = numeric

        # Count number of non-NaN responses for that column
        series['n_responses'] = dataset[numeric].count()

        # Append to list
        responses.append(series.to_frame().T)
    
    # Combine into a single dataframe and return
    return(pd.concat(responses))

In [22]:
# Create the groups - school alone or combined with a filter
groups = [['school_lab']]
filters = ['year_group_lab', 'gender_lab', 'fsm_lab', 'sen_lab']
for group in filters:
    groups.append(['school_lab'] + [group])
groups

[['school_lab'],
 ['school_lab', 'year_group_lab'],
 ['school_lab', 'gender_lab'],
 ['school_lab', 'fsm_lab'],
 ['school_lab', 'sen_lab']]

In [23]:
res_list = []

# For each of the grouping methods
for grouping in groups:
    # Group the dataframe and loop through those subsets of the dataframe
    for group_name, df_group in data.groupby(grouping):
        # Perform aggregation
        res = aggregate(df_group)
        # Save name of group for that filter (e.g. school = school A)
        for i in range(len(grouping)):
            res[grouping[i]] = group_name[i]
        # Append result to list
        res_list.append(res)
    # Print progress status
    print(f'Complete: {grouping}')

# Combine results from list into a single dataframe
result = pd.concat(res_list)

Complete: ['school_lab']
Complete: ['school_lab', 'year_group_lab']
Complete: ['school_lab', 'gender_lab']
Complete: ['school_lab', 'fsm_lab']
Complete: ['school_lab', 'sen_lab']


In [24]:
# Set NaN for the filter labs as All
result[filters] = result[filters].fillna('All')

In [25]:
# Hide results where n<10
result.loc[result['n_responses'] < 10, ['count', 'percentage', 'n_responses']] = np.nan

In [26]:
# Preview dataframe
result

Unnamed: 0,cat,cat_lab,count,percentage,measure,n_responses,school_lab,year_group_lab,gender_lab,fsm_lab,sen_lab
0,"[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, nan]","[Girl, Boy, Non-binary, I describe myself in a...","[14, 24, 18, 17, 12, 16, 11]","[12.5, 21.4, 16.1, 15.2, 10.7, 14.3, 9.8]",gender,101,School A,All,All,All,All
0,"[1.0, 2.0, 3.0, 4.0, 5.0, nan]","[Yes, No, Prefer not to say, I describe myself...","[23, 16, 14, 25, 27, 7]","[20.5, 14.3, 12.5, 22.3, 24.1, 6.2]",transgender,105,School A,All,All,All,All
0,"[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, nan]","[Bi/pansexual, Gay/lesbian, Heterosexual/strai...","[14, 23, 12, 18, 27, 15, 3]","[12.5, 20.5, 10.7, 16.1, 24.1, 13.4, 2.7]",sexual_orientation,109,School A,All,All,All,All
0,"[1.0, 2.0, 3.0, nan]","[Yes, No, Unsure, Missing]","[42, 29, 30, 11]","[37.5, 25.9, 26.8, 9.8]",neurodivergent,101,School A,All,All,All,All
0,"[1.0, 2.0, 3.0, nan]","[Yes, No, I don't know, Missing]","[35, 40, 34, 3]","[31.2, 35.7, 30.4, 2.7]",birth_parent1,109,School A,All,All,All,All
...,...,...,...,...,...,...,...,...,...,...,...
0,"[8.0, 10.0]","[Year 8, Year 10]","[22, 19]","[53.7, 46.3]",year_group,41,School G,All,All,All,SEN
0,"[0.0, 1.0, nan]","[Non-FSM, FSM, Missing]","[14, 25, 2]","[34.1, 61.0, 4.9]",fsm,39,School G,All,All,All,SEN
0,[1.0],[SEN],[41],[100.0],sen,41,School G,All,All,All,SEN
0,"[1.0, 2.0, nan]","[Ethnic minority, White British, Missing]","[12, 24, 5]","[29.3, 58.5, 12.2]",ethnicity,36,School G,All,All,All,SEN


## Add groups for each measure

This uses add_keys() like `1_create_synthetic_data.ipynb`.

In [27]:
# Initialise dictionary of groups
groups = defaultdict(str)

# Define function for adding multiple keys with the same value
def add_keys(value, keys):
    '''
    Add multiple keys with the same value to the dictionary
    Inputs:
    value: String which is the value for all the keys
    keys: Array with the keys
    '''
    groups.update(dict.fromkeys(keys, value))

# Add the groups to the dictionary
add_keys('demographic', [
    'gender',
    'transgender',
    'sexual_orientation',
    'neurodivergent',
    'birth_parent1',
    'birth_parent2',
    'birth_you',
    'birth_you_age',
    'young_carer',
    'care_experience',
    'year_group',
    'fsm',
    'sen',
    'ethnicity',
    'english_additional'])

add_keys('autonomy', [
    'autonomy_pressure',
    'autonomy_express',
    'autonomy_decide',
    'autonomy_told',
    'autonomy_myself',
    'autonomy_choice'])
add_keys('life_satisfaction', ['life_satisfaction'])
add_keys('optimism', [
    'optimism_future',
    'optimism_best',
    'optimism_good',
    'optimism_work'])
add_keys('wellbeing', [
    'wellbeing_optimistic',
    'wellbeing_useful',
    'wellbeing_relaxed',
    'wellbeing_problems',
    'wellbeing_thinking',
    'wellbeing_close',
    'wellbeing_mind'])
add_keys('esteem', [
    'esteem_satisfied',
    'esteem_qualities',
    'esteem_well',
    'esteem_value',
    'esteem_good'])
add_keys('stress', [
    'stress_control',
    'stress_overcome',
    'stress_confident',
    'stress_way'])
add_keys('appearance', ['appearance_happy', 'appearance_feel'])
add_keys('negative', [
    'negative_lonely',
    'negative_unhappy',
    'negative_like',
    'negative_cry',
    'negative_school',
    'negative_worry',
    'negative_sleep',
    'negative_wake',
    'negative_shy',
    'negative_scared'])
add_keys('lonely', ['lonely'])
add_keys('support', ['support_ways', 'support_look'])

add_keys('sleep', ['sleep'])
add_keys('physical', ['physical_days', 'physical_hours'])
add_keys('free_like', ['free_like'])
add_keys('media', ['media_hours'])
add_keys('places', [
    'places_freq',
    'places_barriers___1',
    'places_barriers___2',
    'places_barriers___3',
    'places_barriers___4',
    'places_barriers___5',
    'places_barriers___6',
    'places_barriers___7',
    'places_barriers___8',
    'places_barriers___9'])
add_keys('school_belong', ['school_belong'])
add_keys('staff_relationship', [
    'staff_interest', 'staff_believe', 'staff_best', 'staff_listen'])

add_keys('talk', [
    'staff_talk', 'staff_talk_listen', 'staff_talk_helpful', 'staff_talk_if',
    'home_talk', 'home_talk_listen', 'home_talk_helpful', 'home_talk_if',
    'peer_talk', 'peer_talk_listen', 'peer_talk_helpful', 'peer_talk_if'])
add_keys('accept', [
    'accept_staff', 'accept_home', 'accept_local', 'accept_peer'])

add_keys('home_relationship', [
    'home_interest', 'home_believe', 'home_best', 'home_listen'])
add_keys('home_happy', ['home_happy'])

add_keys('local_env', ['local_safe', 'local_support', 'local_trust',
                       'local_neighbours', 'local_places'])
add_keys('discrim', ['discrim_race', 'discrim_gender', 'discrim_orientation',
                     'discrim_disability', 'discrim_faith'])
add_keys('belong_local', ['belong_local'])
add_keys('wealth', ['wealth'])
add_keys('future', ['future_options', 'future_interest', 'future_support'])
add_keys('climate', ['climate'])
add_keys('social', [
    'social_along', 'social_time', 'social_support', 'social_hard'])
add_keys('bully', ['bully_physical', 'bully_other', 'bully_cyber'])

# Preview the dictionary
groups

defaultdict(str,
            {'gender': 'demographic',
             'transgender': 'demographic',
             'sexual_orientation': 'demographic',
             'neurodivergent': 'demographic',
             'birth_parent1': 'demographic',
             'birth_parent2': 'demographic',
             'birth_you': 'demographic',
             'birth_you_age': 'demographic',
             'young_carer': 'demographic',
             'care_experience': 'demographic',
             'year_group': 'demographic',
             'fsm': 'demographic',
             'sen': 'demographic',
             'ethnicity': 'demographic',
             'english_additional': 'demographic',
             'autonomy_pressure': 'autonomy',
             'autonomy_express': 'autonomy',
             'autonomy_decide': 'autonomy',
             'autonomy_told': 'autonomy',
             'autonomy_myself': 'autonomy',
             'autonomy_choice': 'autonomy',
             'life_satisfaction': 'life_satisfaction',
             'opt

In [28]:
# Add groups and preview
result['group'] = result['measure'].map(groups)
result.head()

Unnamed: 0,cat,cat_lab,count,percentage,measure,n_responses,school_lab,year_group_lab,gender_lab,fsm_lab,sen_lab,group
0,"[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, nan]","[Girl, Boy, Non-binary, I describe myself in a...","[14, 24, 18, 17, 12, 16, 11]","[12.5, 21.4, 16.1, 15.2, 10.7, 14.3, 9.8]",gender,101,School A,All,All,All,All,demographic
0,"[1.0, 2.0, 3.0, 4.0, 5.0, nan]","[Yes, No, Prefer not to say, I describe myself...","[23, 16, 14, 25, 27, 7]","[20.5, 14.3, 12.5, 22.3, 24.1, 6.2]",transgender,105,School A,All,All,All,All,demographic
0,"[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, nan]","[Bi/pansexual, Gay/lesbian, Heterosexual/strai...","[14, 23, 12, 18, 27, 15, 3]","[12.5, 20.5, 10.7, 16.1, 24.1, 13.4, 2.7]",sexual_orientation,109,School A,All,All,All,All,demographic
0,"[1.0, 2.0, 3.0, nan]","[Yes, No, Unsure, Missing]","[42, 29, 30, 11]","[37.5, 25.9, 26.8, 9.8]",neurodivergent,101,School A,All,All,All,All,demographic
0,"[1.0, 2.0, 3.0, nan]","[Yes, No, I don't know, Missing]","[35, 40, 34, 3]","[31.2, 35.7, 30.4, 2.7]",birth_parent1,109,School A,All,All,All,All,demographic


## Add labels for each measure

<mark>what to do about media time on each type</mark>

In [29]:
# Define labels
labels = {
    'gender': 'Gender',
    'transgender': 'Do you consider yourself to be transgender?',
    'sexual_orientation': 'Sexual orientation',
    'neurodivergent': 'Do you identify as neurodivergent?',
    'autonomy_pressure': 'I feel pressured in my life',
    'autonomy_express': 'I generally feel free to express my ideas and opinions',
    'autonomy_decide': 'I feel like I am free to decide for myself how to live my life',
    'autonomy_told': 'In my daily life I often have to do what I am told',
    'autonomy_myself': 'I feel I can pretty much be myself in daily situations',
    'life_satisfaction': 'Overall, how satisfied are you with your life nowadays?',
    'optimism_future': 'I am optimistic about my future',
    'optimism_best': 'In uncertain times, I expect the best',
    'optimism_good': 'I think good things are going to happen to me',
    'optimism_work': 'I believe that things will work out, no matter how difficult they seem',
    'wellbeing_optimistic': '''I've been feeling optimistic about the future''',
    'wellbeing_useful': '''I've been feeling useful''',
    'wellbeing_relaxed': '''I've been feeling relaxed''',
    'wellbeing_problems': '''I've been dealing with problems well''',
    'wellbeing_thinking': '''I've been thinking clearly''',
    'wellbeing_close': '''I've been feeling close to other people''',
    'wellbeing_mind': '''I've been able to make up my own mind about things''',
    'esteem_satisfied': 'On the whole, I am satisfied with myself',
    'esteem_qualities': 'I feel that I have a number of good qualities',
    'esteem_well': 'I am able to do things as well as most other people',
    'esteem_value': 'I am a person of value',
    'esteem_good': 'I feel good about myself',
    'stress_control': 'In the last month, how often have you felt that you were unable to control the important things in your life?',
    'stress_overcome': 'In the last month, how often have you felt difficulties were piling up so high that you could not overcome them?',
    'stress_confident': 'In the last month, how often have you felt confident about your ability to handle your personal problems?',
    'stress_way': 'In the last month, how often have you felt that things were going your way?',
    'appearance_happy': 'How happy are you with your appearance (the way that you look)?',
    'appearance_feel': 'My appearance affects how I feel about myself',
    'negative_lonely': 'I feel lonely',
    'negative_unhappy': 'I am unhappy',
    'negative_like': 'Nobody likes me',
    'negative_cry': 'I cry a lot',
    'negative_school': 'I worry when I am at school',
    'negative_worry': 'I worry a lot',
    'negative_sleep': 'I have problems sleeping',
    'negative_wake': 'I wake up in the night',
    'negative_shy': 'I am shy',
    'negative_scared': 'I feel scared',
    'lonely': 'How often do you feel lonely?',
    'support_ways': 'I have ways to support myself (e.g. to cope, or help myself feel better)',
    'support_look': 'I know where to look for advice on how to support myself',
    'sleep': 'Is the amount of sleep you normally get enough for you to feel awake and concentrate on your school work during the day?',
    'physical_days': 'How many days in a usual week are you physically active? Remember, we are interested in things that make you hot, sweaty, and/or breathe faster.',
    'physical_hours': 'On the days that you are physically active, think about all the different activities you typically do over the course of the day. How long on average do you spend being physically active?',
    'free_like': 'How often can you do things that you like in your free time?',
    'media_hours': 'On a normal weekday during term time, how much time do you spend on social media? For example, sites or apps like TikTok, Instagram, and Snapchat',
    'places_freq': 'How many activities/places are there in your local area, that you choose to or would want to go to in your free time?',
    'places_barriers___1': '''There's nothing to do''',
    'places_barriers___2': '''I'm unable to get there and back''',
    'places_barriers___3': '''It's too expensive (to get there or take part)''',
    'places_barriers___4': 'Poor weather',
    'places_barriers___5': 'I have no-one to go with',
    'places_barriers___6': '''It's too busy''',
    'places_barriers___7': 'I feel uncomfortable/anxious about other people who might be there',
    'places_barriers___8': '''My parents/carers don't allow me to go''',
    'places_barriers___9': 'Other',
    'school_belong': 'I feel that I belong/belonged at my school',
    'staff_interest': 'Is interested in my schoolwork',
    'staff_believe': 'Believes that I will be a success',
    'staff_best': 'Wants me to do my best',
    'staff_listen': 'Listens to me when I have something to say',
    'staff_talk': 'Have you ever talked with an adult at your school about feeling down (e.g. stressed, sad, anxious)?',
    'staff_talk_listen': 'Did you feel listened to?',
    'staff_talk_helpful': 'Did they provide advice that you found helpful?',
    'staff_talk_if': 'How would you feel about speaking with an adult at your school when you are feeling down?',
    'accept_staff': 'Do you feel accepted as you are by adults at your school?',
    'home_interest': 'Is interested in my schoolwork',
    'home_believe': 'Believes that I will be a success',
    'home_best': 'Wants me to do my best',
    'home_listen': 'Listens to me when I have something to say',
    'home_talk': 'Have you ever talked with one of your parents/carers about feeling down (e.g. stressed, sad, anxious)?',
    'home_talk_listen': 'Did you feel listened to?',
    'home_talk_helpful': 'Did they provide advice that you found helpful?',
    'home_talk_if': 'How would you feel about speaking with one of your parents/carers when you are feeling down?',
    'accept_home': 'Do you feel accepted as you are by your parents/carers?',
    'home_happy': 'How happy are you with the home that you live in?',
    'young_carer': 'In the last year, have you regularly taken on caring responsibilities for a family member - e.g. due to illness, disability, mental health condition or drug/alcohol dependency?',
    'care_experience': 'Are you or have you ever been in care (living in a foster placement, residential placement, or private/kinship care)?',
    'local_safe': 'How safe do you feel when in your local area?',
    'local_support': 'People around here support each other with their wellbeing',
    'local_trust': 'You can trust people around here',
    'local_neighbours': 'I could ask for help or a favour from neighbours',
    'local_places': 'There are good places to spend your free time (e.g., leisure centres, parks, shops)',
    'discrim_race': 'Your race, skin colour or where you were born',
    'discrim_gender': 'Your gender',
    'discrim_orientation': 'Your sexual orientation',
    'discrim_disability': 'Disability',
    'discrim_faith': 'Your religion/faith',
    'belong_local': 'I feel like I belong in my local area',
    'accept_local': 'Do you feel accepted as you are by people in your local area?',
    'wealth': 'Compared to your friends, is your family richer, poorer or about the same?',
    'future_options': 'How many options are available?',
    'future_interest': 'How do you feel about the options available?',
    'future_support': 'Do you feel (or think you would feel) supported to explore options that interest you, even if no-one else around you has done them before?',
    'climate': 'How often do you worry about the impact of climate change on your future?',
    'social_along': 'I get along with people around me',
    'social_time': 'People like to spend time with me',
    'social_support': 'I feel supported by my friends',
    'social_hard': 'My friends care about me when times are hard (for example if I am sick or have done something wrong)',
    'bully_physical': 'How often do you get physically bullied at school? By this we mean getting hit, pushed around, threatened, or having belongings stolen',
    'bully_other': 'How often do you get bullied in other ways at school? By this we mean insults, slurs, name calling, threats, getting left out or excluded by others, or having rumours spread about you on purpose',
    'bully_cyber': 'How often do you get cyber-bullied? By this we mean someone sending mean text or online messages about you, creating a website making fun of you, posting pictures that make you look bad online, or sharing them with others',
    'peer_talk': 'Have you ever talked with another person your age about feeling down (e.g. stressed, sad, anxious)?',
    'peer_talk_listen': 'Did you feel listened to?',
    'peer_talk_helpful': 'Did they provide advice that you found helpful?',
    'peer_talk_if': 'How would you feel about speaking with another person your age when you are feeling down?',
    'accept_peer': 'Do you feel accepted as you are by other people your age?'
}

In [30]:
result['measure'].drop_duplicates()

0                gender
0           transgender
0    sexual_orientation
0        neurodivergent
0         birth_parent1
            ...        
0            year_group
0                   fsm
0                   sen
0             ethnicity
0    english_additional
Name: measure, Length: 121, dtype: object

In [31]:
# Add labels and preview
result['measure_lab'] = result['measure'].map(labels)
result.head()

Unnamed: 0,cat,cat_lab,count,percentage,measure,n_responses,school_lab,year_group_lab,gender_lab,fsm_lab,sen_lab,group,measure_lab
0,"[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, nan]","[Girl, Boy, Non-binary, I describe myself in a...","[14, 24, 18, 17, 12, 16, 11]","[12.5, 21.4, 16.1, 15.2, 10.7, 14.3, 9.8]",gender,101,School A,All,All,All,All,demographic,Gender
0,"[1.0, 2.0, 3.0, 4.0, 5.0, nan]","[Yes, No, Prefer not to say, I describe myself...","[23, 16, 14, 25, 27, 7]","[20.5, 14.3, 12.5, 22.3, 24.1, 6.2]",transgender,105,School A,All,All,All,All,demographic,Do you consider yourself to be transgender?
0,"[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, nan]","[Bi/pansexual, Gay/lesbian, Heterosexual/strai...","[14, 23, 12, 18, 27, 15, 3]","[12.5, 20.5, 10.7, 16.1, 24.1, 13.4, 2.7]",sexual_orientation,109,School A,All,All,All,All,demographic,Sexual orientation
0,"[1.0, 2.0, 3.0, nan]","[Yes, No, Unsure, Missing]","[42, 29, 30, 11]","[37.5, 25.9, 26.8, 9.8]",neurodivergent,101,School A,All,All,All,All,demographic,Do you identify as neurodivergent?
0,"[1.0, 2.0, 3.0, nan]","[Yes, No, I don't know, Missing]","[35, 40, 34, 3]","[31.2, 35.7, 30.4, 2.7]",birth_parent1,109,School A,All,All,All,All,demographic,


## Save to csv

In [32]:
result.to_csv(os.path.join(paths.survey, paths.aggregate), index=False)