In [2]:
from afs_mapping_target_families.getters.processed.combined_data import get_combined_data
from afs_mapping_target_families import PROJECT_DIR
import altair as alt
from nesta_ds_utils.viz.altair import formatting, saving
from statistics import mean, median
import scipy.stats
formatting.setup_theme()

In [3]:
asq_data = get_combined_data()
annual_data = asq_data.loc[asq_data['date']== 'Annual']

asq_data_rr = annual_data[['la_name', 'response_rate_combined', "total_kids_hv", "ONS code"]].loc[annual_data['response_rate_combined']!= 'Could Not Calculate Response Rate']


2023-10-30 08:48:04,662 - botocore.credentials - INFO - Found credentials in shared credentials file: ~/.aws/credentials


## Plots used in public facing report

In [4]:
asq_data_rr['response_rate_combined'] = asq_data_rr['response_rate_combined'].apply(lambda x: float(x)*100)
asq_data_rr['total_kids_hv'] = asq_data_rr['total_kids_hv'].apply(lambda x: float(x))

In [13]:
geojson_url = "https://raw.githubusercontent.com/VolcanoBlue13/uk_geojson_topojson_datasets/main/geojson/UK/County_Unitary_Authority/C_UA_UK_2021_boundaries_ultra_generalised.geojson"
regions = alt.Data(
                        url=geojson_url,
                        format=alt.DataFormat(property="features", type="json"),
                    )
alternative_condition = (
                        "datum.response_rate_combined > 0"
                    )

In [10]:
asq_data_rr.head()

Unnamed: 0,la_name,response_rate_combined,total_kids_hv,ONS code
600,County Durham,90.175496,4733.0,E06000047
601,Darlington,104.372764,1035.0,E06000005
602,Gateshead,78.049486,1843.0,E08000037
603,Hartlepool,91.217124,1003.0,E06000001
604,Middlesbrough,93.288233,1623.0,E06000002


In [14]:
alt.Chart(regions).mark_geoshape(stroke="white").transform_lookup(
                                # We want the CTYUA21CD field to be the linking column in the regions data.
                                lookup="properties.CTYUA21CD",
                                from_=alt.LookupData(
                                    asq_data_rr,
                                    "ONS code",
                                    [
                                        "response_rate_combined:Q",
                                        "la_name"
                                    ],
                                )
                            ).encode(color=alt.condition(
                                    alternative_condition,
                                    alt.Color(
                                        "response_rate_combined:Q",
                                        scale=alt.Scale(
                                            scheme="redyellowblue", domain=[30, 100]
                                        ),
                                    ),
                                    alt.value("lightgrey")))

Response rate histogram

In [11]:
# note: I made some formatting fixes in ppt in the report (i.e. axis labels, etc.)
hist = alt.Chart(asq_data_rr).mark_bar().encode(
alt.X("response_rate_combined:Q", bin=alt.Bin(maxbins=40)),
    y='count()'
    ).configure_mark(color="blue")

In [7]:
# These I manually added to the response rate hisogram in ppt
print("The mean response rate is {}".format(mean(asq_data_rr["response_rate_combined"])))
print("The mean response rate is {}".format(median(asq_data_rr["response_rate_combined"])))

The mean response rate is 71.62664042738425
The mean response rate is 75.61166109436319


Response rate vs. population scatterplot

In [14]:
# axis labels / other formatting also done manually in ppt
scatter = alt.Chart(asq_data_rr).mark_point().encode(
    x = "total_kids_hv:Q",
    y = "response_rate_combined:Q",
).configure_mark(color="blue")

In [15]:
correlation = scipy.stats.pearsonr(asq_data_rr["total_kids_hv"], asq_data_rr["response_rate_combined"])
print("The correlation coefficient between number of kids and reponse rate is {} with p-value {}".format(correlation[0], correlation[1]))

The correlation coefficient between number of kids and reponse rate is -0.060570066256134786 with p-value 0.4787511084134485
