# Aggregate scores and create RAG ratings

Aggregate the synthetic person-level standard survey responses to find the mean scores by area, then generate RAG ratings based on the scores.

## Set-up

### Packages and file paths

In [1]:
# Import required packages
from dataclasses import dataclass
import numpy as np
import os
import pandas as pd

# Import functions from our dashboard package
from kailo_beewell_dashboard.synthesise_aggregate import (
    aggregate_scores, results_by_site_and_group)
from kailo_beewell_dashboard.synthesise_scores import create_rag_ratings
from kailo_beewell_dashboard.topic_labels import (
    topic_name_dict, topic_description_dict)

In [2]:
# File paths
@dataclass(frozen=True)
class Paths:
    '''Stores paths to data and files'''
    survey = '../../data/survey_data'
    synthetic_data = 'standard_synthetic_data_raw_msoa.csv'
    aggregate_score = 'standard_area_aggregate_scores_rag.csv'


paths = Paths()

### Import raw data

In [3]:
data = pd.read_csv(os.path.join(paths.survey, paths.synthetic_data))
data.head()

Unnamed: 0,gender,transgender,sexual_orientation,neurodivergent,birth_parent1,birth_parent2,birth_you,birth_you_age,autonomy_pressure,autonomy_express,...,peer_talk_helpful_lab,peer_talk_if_lab,accept_peer_lab,year_group_lab,fsm_lab,sen_lab,ethnicity_lab,english_additional_lab,school_lab,msoa
0,4.0,2.0,6.0,3.0,2.0,,2.0,1.0,2.0,5.0,...,Somewhat helpful,,Not at all,Year 10,Non-FSM,Non-SEN,Ethnic minority,,School E,Torridge 001
1,,2.0,1.0,3.0,3.0,2.0,3.0,8.0,4.0,2.0,...,,Very uncomfortable,Slightly,Year 10,Non-FSM,Non-SEN,Ethnic minority,No,School D,North Devon 001
2,2.0,3.0,4.0,1.0,1.0,1.0,1.0,1.0,5.0,4.0,...,,Very comfortable,Not at all,Year 10,Non-FSM,Non-SEN,White British,No,School E,North Devon 007
3,2.0,5.0,5.0,,2.0,2.0,,3.0,1.0,2.0,...,,Uncomfortable,,Year 10,Non-FSM,Non-SEN,White British,No,School G,North Devon 006
4,5.0,3.0,4.0,1.0,1.0,3.0,3.0,2.0,5.0,2.0,...,Somewhat helpful,,Not at all,,Non-FSM,Non-SEN,White British,Yes,School B,Torridge 003


## Find mean score by MSOA

In [4]:
# Create version where every question has mean NaN and count 0, to use when a
# school has no pupils of a particular subgroup (i.e. no-one in certain
# FSM/SEN/gender/year)
no_pupils = aggregate_scores(data)
no_pupils['mean'] = np.nan
no_pupils['count'] = 0
no_pupils.head()

Unnamed: 0,variable,mean,count
0,autonomy_score,,0
1,life_satisfaction_score,,0
2,optimism_score,,0
3,wellbeing_score,,0
4,esteem_score,,0


In [5]:
# Aggregate for each of the possible schools and pupils groups
agg = results_by_site_and_group(
    data=data, agg_func=aggregate_scores, no_pupils=no_pupils,
    group_type='none', site_col='msoa')

# Hide results when n<10
agg.loc[agg['count'] < 10, ['mean', 'count']] = np.nan

agg.head()

Unnamed: 0,variable,mean,count,msoa
0,autonomy_score,18.4375,16.0,North Devon 001
1,life_satisfaction_score,6.041667,24.0,North Devon 001
2,optimism_score,12.307692,26.0,North Devon 001
3,wellbeing_score,23.230769,13.0,North Devon 001
4,esteem_score,11.764706,17.0,North Devon 001


In [6]:
# Show the data types and presence of null
types = agg.dtypes
null = agg.isnull().any()
pd.DataFrame([types, null]).T

Unnamed: 0,0,1
variable,object,False
mean,float64,True
count,float64,True
msoa,object,False


## Set some MSOA to n<10

In [7]:
agg.loc[agg['msoa'] == 'North Devon 013', ['mean', 'count']] = np.nan

agg.loc[agg['msoa'] == 'North Devon 014', ['mean', 'count']] = np.nan

agg.loc[agg['msoa'] == 'Torridge 007', ['mean', 'count']] = np.nan

## Add RAG ratings

In [8]:
rag = create_rag_ratings(agg)
rag.head()

Unnamed: 0,variable,mean,count,msoa,total_pupils,group_n,group_wt_mean,group_wt_std,lower,upper,rag
0,autonomy_score,18.4375,16.0,North Devon 001,426.0,20.0,17.852113,0.724008,17.128105,18.57612,average
1,life_satisfaction_score,6.041667,24.0,North Devon 001,617.0,20.0,4.946515,0.479829,4.466687,5.426344,above
2,optimism_score,12.307692,26.0,North Devon 001,622.0,20.0,12.069132,0.446459,11.622673,12.51559,average
3,wellbeing_score,23.230769,13.0,North Devon 001,466.0,20.0,21.006438,0.939872,20.066566,21.946309,above
4,esteem_score,11.764706,17.0,North Devon 001,391.0,19.0,12.741688,0.587938,12.15375,13.329626,below


## Add names and descriptions for the topics

In [9]:
# Add label column
rag['variable_lab'] = rag['variable'].map(topic_name_dict)

# View the labels
rag[['variable', 'variable_lab']].drop_duplicates()

Unnamed: 0,variable,variable_lab
0,autonomy_score,Autonomy
1,life_satisfaction_score,Life satisfaction
2,optimism_score,Optimism
3,wellbeing_score,Psychological wellbeing
4,esteem_score,Self-esteem
5,stress_score,Stress and coping
6,appearance_score,Feelings around appearance
7,negative_score,Negative affect
8,lonely_score,Loneliness
9,support_score,Supporting own wellbeing


In [10]:
# Add labels to dataframe
rag['description'] = rag['variable'].map(topic_description_dict)

# View each of the topics and their label
rag[['variable_lab', 'description']].drop_duplicates()

Unnamed: 0,variable_lab,description
0,Autonomy,\nHow 'in control' young people feel of their ...
1,Life satisfaction,\nHow satisfied young people feel with their life
2,Optimism,\nYoung people's hopefulness and confidence fo...
3,Psychological wellbeing,\nHow positive and generally happy young peopl...
4,Self-esteem,\nHow much young people value themselves
5,Stress and coping,\nManaging stress levels and coping with diffi...
6,Feelings around appearance,\nYoung people's feelings around the way that ...
7,Negative affect,\nThe frequency with which young people experi...
8,Loneliness,\nHow often young people feel lonely
9,Supporting own wellbeing,\nYoung people's knowledge on supporting thems...


## Save results

In [11]:
rag.to_csv(os.path.join(paths.survey, paths.aggregate_score),
           index=False, na_rep='NULL')