## Create RAG ratings for the aggregate data

## Set-up

### Packages and file paths

In [1]:
# Import required packages
from dataclasses import dataclass
import math
import numpy as np
import os
import pandas as pd
#from statsmodels.stats.weightstats import DescrStatsW

# Set display options
pd.set_option('display.max_rows', 100)

In [2]:
# File paths
@dataclass(frozen=True)
class Paths:
    '''Paths to data and files'''
    survey = '../data/survey_data'
    aggregate = 'aggregate_scores.csv'
    rag = 'aggregate_scores_rag.csv'


paths = Paths()

## Import aggregate scores

In [3]:
data = pd.read_csv(os.path.join(paths.survey, paths.aggregate))
data.tail()

Unnamed: 0,variable,mean,count,school_lab,year_group_lab,gender_lab,fsm_lab,sen_lab
2074,wealth_score,0.392857,28.0,School G,All,All,All,Non-SEN
2075,future_score,7.46875,16.0,School G,All,All,All,Non-SEN
2076,climate_score,0.644444,45.0,School G,All,All,All,Non-SEN
2077,social_score,11.567568,37.0,School G,All,All,All,Non-SEN
2078,bully_score,7.243243,37.0,School G,All,All,All,Non-SEN


## Find weighted mean and SD within each group

In [4]:
def descriptives(values, counts):
    '''
    Calculates:
    * Total sample size (across the schools)
    * Total number of groups without NaN (e.g. schools, areas) used in calc
    * Weighted average of the means
    * Standard deviation of the means
    This normalises weights so they sum 1 (and so they can't all be 0).
    It returns the biased variance and is like a weighted version of np.std().
    For small samples, may want to alter to unbiased variance.
    Based on: https://stackoverflow.com/questions/2413522/weighted-standard-deviation-in-numpy
    Inputs:
    - values - series, to calculate mean and std from
    - counts - series, number of students, used to weight calculations
    Output:
    - result - series with each of the calculations (index is name of calc)
    '''
    # Total sample size
    n_pupils = counts.sum(skipna=True)

    # Total number of groups used in other calc - count number of non-NaN rows
    n_groups = counts.count()

    # Weighted mean
    average = np.average(values, weights=counts)

    # Weighted std
    variance = np.average((values-average)**2, weights=counts)
    std = math.sqrt(variance)

    # Combine into a series
    result = pd.Series(
        [n_pupils, n_groups, average, std],
        index=['total_pupils', 'group_n', 'group_wt_mean', 'group_wt_std'])
    return(result)

In [5]:
groups = ['variable', 'year_group_lab', 'gender_lab', 'fsm_lab', 'sen_lab']
wt_mean = (data
           .groupby(groups)
           .apply(lambda x: descriptives(x['mean'], x['count']))
           .reset_index())
wt_mean

Unnamed: 0,variable,year_group_lab,gender_lab,fsm_lab,sen_lab,total_pupils,group_n,group_wt_mean,group_wt_std
0,accept_score,All,All,All,All,613.0,7.0,10.003263,0.262738
1,accept_score,All,All,All,Non-SEN,326.0,7.0,9.984663,0.437538
2,accept_score,All,All,All,SEN,224.0,6.0,,
3,accept_score,All,All,FSM,All,320.0,7.0,9.965625,0.207753
4,accept_score,All,All,Non-FSM,All,267.0,7.0,10.056180,0.510847
...,...,...,...,...,...,...,...,...,...
292,wellbeing_score,All,All,Non-FSM,All,215.0,7.0,21.241860,0.520790
293,wellbeing_score,All,Boy,All,All,69.0,5.0,,
294,wellbeing_score,All,Girl,All,All,26.0,2.0,,
295,wellbeing_score,Year 10,All,All,All,203.0,6.0,,


In [6]:
rag = pd.merge(data, wt_mean, how='left', on=groups)
rag

Unnamed: 0,variable,mean,count,school_lab,year_group_lab,gender_lab,fsm_lab,sen_lab,total_pupils,group_n,group_wt_mean,group_wt_std
0,birth_you_age_score,7.850427,117.0,School A,All,All,All,All,742.0,7.0,7.873315,0.281498
1,autonomy_score,18.220779,77.0,School A,All,All,All,All,500.0,7.0,17.922000,0.436777
2,life_satisfaction_score,5.065041,123.0,School A,All,All,All,All,788.0,7.0,5.076142,0.304727
3,optimism_score,11.841463,82.0,School A,All,All,All,All,572.0,7.0,12.006993,0.388618
4,wellbeing_score,21.466667,75.0,School A,All,All,All,All,471.0,7.0,20.966030,0.527471
...,...,...,...,...,...,...,...,...,...,...,...,...
2074,wealth_score,0.392857,28.0,School G,All,All,All,Non-SEN,283.0,7.0,0.332155,0.050042
2075,future_score,7.468750,16.0,School G,All,All,All,Non-SEN,167.0,7.0,7.356287,0.454812
2076,climate_score,0.644444,45.0,School G,All,All,All,Non-SEN,396.0,7.0,0.515152,0.066324
2077,social_score,11.567568,37.0,School G,All,All,All,Non-SEN,280.0,7.0,11.792857,0.492870


## Create RAG column based on whether 1SD above or below

<mark>remember the reverse scored things</mark>

In [7]:
# Find 1 SD above and below mean
rag['lower'] = rag['group_wt_mean'] - rag['group_wt_std']
rag['upper'] = rag['group_wt_mean'] + rag['group_wt_std']

In [8]:
# Create RAG column
conditions = [(rag['mean'] <= rag['lower']),
              (rag['mean'] > rag['lower']) & (rag['mean'] < rag['upper']),
              (rag['mean'] >= rag['upper'])]
choices = ['below', 'average', 'above']
rag.loc[:, 'rag'] = np.select(conditions, choices, default=np.nan)

Show some examples

In [9]:
test = rag[
    (rag['variable'] == 'autonomy_score') &
    (rag['year_group_lab'] == 'All') &
    (rag['gender_lab'] == 'All') &
    (rag['fsm_lab'] == 'All') &
    (rag['sen_lab'] == 'All')]
test

Unnamed: 0,variable,mean,count,school_lab,year_group_lab,gender_lab,fsm_lab,sen_lab,total_pupils,group_n,group_wt_mean,group_wt_std,lower,upper,rag
1,autonomy_score,18.220779,77.0,School A,All,All,All,All,500.0,7.0,17.922,0.436777,17.485223,18.358777,average
298,autonomy_score,17.884615,78.0,School B,All,All,All,All,500.0,7.0,17.922,0.436777,17.485223,18.358777,average
595,autonomy_score,17.492754,69.0,School C,All,All,All,All,500.0,7.0,17.922,0.436777,17.485223,18.358777,average
892,autonomy_score,17.764706,68.0,School D,All,All,All,All,500.0,7.0,17.922,0.436777,17.485223,18.358777,average
1189,autonomy_score,18.6,70.0,School E,All,All,All,All,500.0,7.0,17.922,0.436777,17.485223,18.358777,above
1486,autonomy_score,17.225352,71.0,School F,All,All,All,All,500.0,7.0,17.922,0.436777,17.485223,18.358777,below
1783,autonomy_score,18.253731,67.0,School G,All,All,All,All,500.0,7.0,17.922,0.436777,17.485223,18.358777,average


In [10]:
test = data[
    (data['variable'] == 'accept_score') &
    (data['year_group_lab'] == 'Year 10') &
    (data['gender_lab'] == 'All') &
    (data['fsm_lab'] == 'All') &
    (data['sen_lab'] == 'All')]
test

Unnamed: 0,variable,mean,count,school_lab,year_group_lab,gender_lab,fsm_lab,sen_lab
86,accept_score,10.282609,46.0,School A,Year 10,All,All,All
383,accept_score,10.02439,41.0,School B,Year 10,All,All,All
680,accept_score,9.877193,57.0,School C,Year 10,All,All,All
977,accept_score,9.521739,46.0,School D,Year 10,All,All,All
1274,accept_score,10.2,35.0,School E,Year 10,All,All,All
1571,accept_score,,,School F,Year 10,All,All,All
1868,accept_score,10.35,40.0,School G,Year 10,All,All,All


In [11]:
test = rag[
    (rag['variable'] == 'accept_score') &
    (rag['year_group_lab'] == 'Year 10') &
    (rag['gender_lab'] == 'All') &
    (rag['fsm_lab'] == 'All') &
    (rag['sen_lab'] == 'All')]
test

Unnamed: 0,variable,mean,count,school_lab,year_group_lab,gender_lab,fsm_lab,sen_lab,total_pupils,group_n,group_wt_mean,group_wt_std,lower,upper,rag
86,accept_score,10.282609,46.0,School A,Year 10,All,All,All,265.0,6.0,,,,,
383,accept_score,10.02439,41.0,School B,Year 10,All,All,All,265.0,6.0,,,,,
680,accept_score,9.877193,57.0,School C,Year 10,All,All,All,265.0,6.0,,,,,
977,accept_score,9.521739,46.0,School D,Year 10,All,All,All,265.0,6.0,,,,,
1274,accept_score,10.2,35.0,School E,Year 10,All,All,All,265.0,6.0,,,,,
1571,accept_score,,,School F,Year 10,All,All,All,265.0,6.0,,,,,
1868,accept_score,10.35,40.0,School G,Year 10,All,All,All,265.0,6.0,,,,,


## Set to NaN if not relevant

In [12]:
# Birth you age score is just average birth age and not needed as a "score"
# Overall count is the count of pupils in each group (not for specific variable)
mask = rag['variable'].isin(['birth_you_age_score', 'overall_count'])
nan_col = ['group_n', 'group_wt_mean', 'group_wt_std', 'lower', 'upper', 'rag']
rag.loc[mask, nan_col] = np.nan
rag[mask]

Unnamed: 0,variable,mean,count,school_lab,year_group_lab,gender_lab,fsm_lab,sen_lab,total_pupils,group_n,group_wt_mean,group_wt_std,lower,upper,rag
0,birth_you_age_score,7.850427,117.0,School A,All,All,All,All,742.0,,,,,,
33,birth_you_age_score,8.467742,62.0,School A,Year 8,All,All,All,408.0,,,,,,
66,birth_you_age_score,7.038462,52.0,School A,Year 10,All,All,All,320.0,,,,,,
99,birth_you_age_score,8.735294,17.0,School A,All,Girl,All,All,108.0,,,,,,
132,birth_you_age_score,7.642857,28.0,School A,All,Boy,All,All,131.0,,,,,,
165,birth_you_age_score,7.663636,55.0,School A,All,All,FSM,All,373.0,,,,,,
198,birth_you_age_score,7.844828,58.0,School A,All,All,Non-FSM,All,331.0,,,,,,
231,birth_you_age_score,7.607143,56.0,School A,All,All,All,SEN,275.0,,,,,,
264,birth_you_age_score,8.566667,45.0,School A,All,All,All,Non-SEN,388.0,,,,,,
297,birth_you_age_score,7.748,125.0,School B,All,All,All,All,742.0,,,,,,


## Add labels

In [13]:
var_lab = {
    'autonomy_score': 'Autonomy',
    'life_satisfaction_score': 'Life satisfaction',
    'optimism_score': 'Optimism',
    'wellbeing_score': 'Psychological wellbeing',
    'esteem_score': 'Self-esteem',
    'stress_score': 'Stress and coping',
    'appearance_score': 'Feelings around appearance',
    'negative_score': 'Negative affect',
    'lonely_score': 'Loneliness',
    'support_score': 'Supporting own wellbeing',
    'sleep_score': 'Sleep',
    'physical_score': 'Physical activity',
    'free_like_score': 'Free time',
    'media_score': 'Social media use',
    'places_score': 'Places to go and things to do',
    'talk_score': 'Talking about feelings',
    'accept_score': 'Acceptance',
    'school_belong_score': 'School connection',
    'staff_relationship_score': 'Support from staff',
    'home_relationship_score': 'Support from parents/carers',
    'home_happy_score': 'Home environment',
    'local_env_score': 'Local environment',
    'discrim_score': 'Discrimination',
    'belong_local_score': 'Local connection',
    'wealth_score': 'Relative wealth',
    'future_score': 'Future opportunities',
    'climate_score': 'Climate change',
    'social_score': 'Support from friends',
    'bully_score': 'Bullying'
}

In [14]:
# Add label column
rag['variable_lab'] = rag['variable'].map(var_lab)

# View the labels
rag[['variable', 'variable_lab']].drop_duplicates()

Unnamed: 0,variable,variable_lab
0,birth_you_age_score,
1,autonomy_score,Autonomy
2,life_satisfaction_score,Life satisfaction
3,optimism_score,Optimism
4,wellbeing_score,Psychological wellbeing
5,esteem_score,Self-esteem
6,stress_score,Stress and coping
7,appearance_score,Feelings around appearance
8,negative_score,Negative affect
9,lonely_score,Loneliness


## Add descriptions

In [15]:
describe = {
    'autonomy_score': '''How 'in control' young people feel of their life''',
    'life_satisfaction_score': 'How satisfied young people feel with their life',
    'optimism_score': '''Young people's hopefulness and confidence for the future''',
    'wellbeing_score': 'How positive and generally happy young people feel regarding their life',
    'esteem_score': 'How much young people value themselves',
    'stress_score': 'Managing stress levels and coping with difficulties',
    'appearance_score': '''Young people's feelings around the way that they look''',
    'negative_score': 'The frequency with which young people experience emotional difficulties',
    'lonely_score': 'How often young people feel lonely',
    'support_score': '''Young people's knowledge on supporting themselves and looking for advice''',
    'sleep_score': 'How much sleep young people get',
    'physical_score': 'How physically active young people are',
    'free_like_score': 'How often young people can do things that they like in their free time',
    'media_score': 'How much time young people spend on social media',
    'places_score': '''Whether young people feel there are places to go and things to do in their free time''',
    'talk_score': '''How positively/negatively young people feel about talking with others about feeling down''',
    'accept_score': 'Whether young people feel accepted by different groups of people in their life',
    'school_belong_score': 'Feelings of belonging at school',
    'staff_relationship_score': 'The support received from adults at school',
    'home_relationship_score': 'The support received from adults at home',
    'home_happy_score': '''Young people's feelings regarding the home that they live in''',
    'local_env_score': 'How young people feel regarding the area where they live',
    'discrim_score': 'Whether young people feel discriminated against',
    'belong_local_score': '''Young people's feelings of belonging in their local area''',
    'wealth_score': 'Whether young people feel their family is richer, poorer or the same as their friends',
    'future_score': 'How young people feel regarding the future options for work, education or training in their local area',
    'climate_score': 'Worries regarding climate change',
    'social_score': 'The support young people receive from their peers',
    'bully_score': 'The frequency with which young people experience different types of bullying'
}

In [16]:
rag['description'] = rag['variable'].map(describe)
rag[['variable_lab', 'description']].drop_duplicates()

Unnamed: 0,variable_lab,description
0,,
1,Autonomy,How 'in control' young people feel of their life
2,Life satisfaction,How satisfied young people feel with their life
3,Optimism,Young people's hopefulness and confidence for ...
4,Psychological wellbeing,How positive and generally happy young people ...
5,Self-esteem,How much young people value themselves
6,Stress and coping,Managing stress levels and coping with difficu...
7,Feelings around appearance,Young people's feelings around the way that th...
8,Negative affect,The frequency with which young people experien...
9,Loneliness,How often young people feel lonely


## Save as csv file

In [17]:
rag.to_csv(os.path.join(paths.survey, paths.rag), index=False)