In [None]:
import os 
import pandas as pd
from pathlib import Path

from survey_util import load_raw_survey_data, count_by_school_type, calculate_percentage
from fuzzywuzzy import process

In [None]:
# Define paths and base directories

DATA_DIR = Path('../../data')
SURVEY_DIR = DATA_DIR / 'survey'
RAW_DIR = SURVEY_DIR / 'raw'

VIZ_DIR = Path('../../src/_data/viz/survey/phase_2')

raw_data = load_raw_survey_data()
base_respondents = len(raw_data)

SCHOOLS_DATA = pd.read_csv(Path(DATA_DIR, 'leeds_schools_public.csv'))

dfe_data = pd.read_csv(Path(DATA_DIR, 'spc_ees_school_characteristics.csv'), usecols={
    'time_period', 
    'la_name',
    'school_urn',
    'school_name',
    'phase_type_grouping',
    'typeofestablishment_name',
    'ward_code',
    'ward_name',
    'school_postcode',
    'school_size'
})


In [None]:
headline_questions = [
    '05_overall_rating_arts_offer', #excellent good
    '06_pupil_arts_entitlement', # yes no 
    '07_additional_arts_funding', #yesno
    '10_signposting_beyond_curriculum', #yesno
    # '13_arts_facilities', #list of facilities
    '14_rating_extra_curricular_arts_enrichment_offering', #excellent good
    '16_arts_award', #yesno
    '18_external_orgs_arts_performances', #yesno
    # '18a_arts_performance_external_org_subject_areas', #list of subjects
    '24_partnerships_external_orgs', #yesno
]


Headline stats

In [None]:

# Calculate values for headlines
excellent_arts_curriculum = int(((raw_data['05_overall_rating_arts_offer'].value_counts().Excellent) / base_respondents * 100))
pupil_arts_entitlement = int((len(raw_data.loc[raw_data['06_pupil_arts_entitlement']=='Yes']) / base_respondents * 100))
additional_arts_funding = int((len(raw_data.loc[raw_data['07_additional_arts_funding']=='Yes']) / base_respondents * 100))
signposting_beyond_curriculum = int((len(raw_data.loc[raw_data['10_signposting_beyond_curriculum']=='Yes']) / base_respondents * 100))
excellent_extra_curricular_arts = int(((raw_data['14_rating_extra_curricular_arts_enrichment_offering'].value_counts().Excellent) / base_respondents * 100))
arts_award = int((len(raw_data.loc[raw_data['16_arts_award']=='Yes']) / base_respondents * 100))
external_orgs_arts_performances = int((len(raw_data.loc[raw_data['18_external_orgs_arts_performances']=='Yes']) / base_respondents * 100))
partnerships_external_orgs = int((len(raw_data.loc[raw_data['24_partnerships_external_orgs']=='Yes']) / base_respondents * 100))

# Create DataFrame for headlines
headlines_data = {
    'Title': [
        "Arts curriculum",
        "Arts entitlement",
        "Additional arts funding",
        "Signposting",
        "Extra-curricular offering",
        "Arts award",
        "External practitioners",
        "Local partnerships",
    ],
    'Value': [
        excellent_arts_curriculum,
        pupil_arts_entitlement,
        additional_arts_funding,
        signposting_beyond_curriculum,
        excellent_extra_curricular_arts,
        arts_award,
        external_orgs_arts_performances,
        partnerships_external_orgs
    ],
    'Note': [
        "Rated their arts curriculum as Excellent",
        "Said they have a commitment to pupil entitlement in the arts",
        "Said they seek additional funding for arts curriculum and enrichment",
        "Said they sign-post additional creative opportunities for students",
        "Rated their extra-curricular arts offering as Excellent",
        "Said their school offers an Arts Award",
        "Said they invited external creative practitioners into the school",
        "Said their school has a partnership with local arts organisations"
    ],
    'Suffix': '%'
}

headlines_index = headline_questions[:8]

headlines = pd.DataFrame(headlines_data, index=pd.Index(headlines_index, name='Question'))

headlines.to_csv(os.path.join(VIZ_DIR, 'headlines.csv'))


## Overview of Respondents

Survey responses by school type

## Arts Provision in Schools

Commitment to the arts

Signposting headlines

In [None]:
signposting = raw_data.loc[raw_data['10_signposting_beyond_curriculum']=='Yes']
competitions = raw_data.loc[raw_data['09_competitions_showcases']=='Yes']

responses = pd.DataFrame({
    'Competitions & Showcases' : [int(len(competitions)/base_respondents*100)],
    'Signposting beyond the curriculum': [int(len(signposting)/base_respondents*100)]
}).T.reset_index()

responses.columns = ['Headlines', 'Values']

Note = [
        'of schools actively sign-post for pupils to develop creative skills and participate in arts opportunities beyond the curriculum',
        'of schools enter pupils into competitions or showcases - whether that is at a local community or regional/national level'
    ]

signposting_headlines = pd.DataFrame({
    'Headlines': responses['Headlines'],
    'Value': responses['Values'],
    'Suffix': '%',
    'Note': Note
}).to_csv(Path(VIZ_DIR, 'signposting_headlines.csv'), index=False)


External practitioners

In [None]:
partnerships_external_orgs = raw_data.loc[raw_data['24_partnerships_external_orgs']=='Yes']
external_orgs_arts_performances = raw_data.loc[raw_data['18_external_orgs_arts_performances']=='Yes']
partnerships_local_schools = raw_data.loc[raw_data['25_partnerships_local_schools']=='Yes']

responses = pd.DataFrame({
    'Partnerships' : [int(len(partnerships_external_orgs)/base_respondents*100)],
    'Delivering activities': [int(len(external_orgs_arts_performances)/base_respondents*100)],
    'School partnerships': [int(len(partnerships_local_schools)/base_respondents*100)]
}).T.reset_index()

responses.columns = ['Headlines', 'Values']

Note = [
        'of all respondents said their school has a partnership with any local arts organisations',
        'of all respondents said their school invited external practitioners to deliver creative activities',
        'of all respondents said their school partners with other schools to support the arts curriculum'
    ]

signposting_headlines = pd.DataFrame({
    'Headlines': responses['Headlines'],
    'Value': responses['Values'],
    'Suffix': '%',
    'Note': Note
}).to_csv(Path(VIZ_DIR, 'external_partnerships_headlines.csv'), index=False)

Subject areas of the organisations and creative practitioners engaging with schools:

In [None]:
subject_areas = raw_data[['02_school_name', '03_school_type', '18a_arts_performance_external_org_subject_areas']].dropna().reset_index()

subject_areas['18a_arts_performance_external_org_subject_areas'] = subject_areas['18a_arts_performance_external_org_subject_areas'].str.split(',')
subject_areas = subject_areas.explode('18a_arts_performance_external_org_subject_areas').drop(columns={'unique_response_number'})

subject_areas = subject_areas.groupby(['03_school_type', '18a_arts_performance_external_org_subject_areas']).count().reset_index()

subject_areas = subject_areas.rename(columns={
    '03_school_type': 'School Type',
    '18a_arts_performance_external_org_subject_areas': 'Subject Area',
    '02_school_name': 'Count'
})

counts_by_school_type = count_by_school_type(raw_data)

subject_areas['Percent'] = subject_areas.apply(lambda row: calculate_percentage(row, counts_by_school_type), axis=1)

subject_areas = subject_areas.pivot_table(columns=['School Type'], index=['Subject Area'], values=['Percent']).reset_index().set_index('Subject Area')

subject_areas.columns = ['Primary', 'Secondary', 'Special School', 'Through School']

subject_areas = subject_areas.fillna(0).astype(int)

subject_areas.to_csv(Path(VIZ_DIR, 'subject_areas_headlines.csv'), index=True)

subject_areas


Community arts programmes:

In [None]:
community_based_arts_programmes = raw_data.loc[raw_data['26_community_based_arts_programmes']=='Yes']
national_arts_programmes = raw_data.loc[raw_data['23_national_programmes']=='Yes']

responses = pd.DataFrame({
    'Community-based arts programmes' : [int(len(community_based_arts_programmes)/base_respondents*100)],
    'National arts programmes' : [int(len(national_arts_programmes)/base_respondents*100)],
}).T.reset_index()

responses.columns = ['Headlines', 'Values']

Note = [
        'of all respondents said their school is part of a local/community-based arts programme.',
        'of all respondents said their school is part of a national arts programme.',
    ]

signposting_headlines = pd.DataFrame({
    'Headlines': responses['Headlines'],
    'Value': responses['Values'],
    'Suffix': '%',
    'Note': Note
}).to_csv(Path(VIZ_DIR, 'community_programs_headlines.csv'), index=False)

## Facilities

In [None]:
facilities = raw_data[['02_school_name', '03_school_type', '13_arts_facilities']].dropna().reset_index()


facilities['13_arts_facilities'] = facilities['13_arts_facilities'].str.split(',')
facilities = facilities.explode('13_arts_facilities').drop(columns={'unique_response_number'})

facilities = facilities.groupby(['03_school_type', '13_arts_facilities']).size().reset_index(name='Count')

counts_by_school_type = count_by_school_type(raw_data)

facilities['Total'] = facilities.groupby('03_school_type')['Count'].transform('sum').round(0)

facilities['Percent'] = ((facilities['Count'] / facilities['Total']) * 100).round(0).astype(int)

facilities_pivot = facilities.pivot_table(index='13_arts_facilities', columns='03_school_type', values='Percent', fill_value=0)

facilities_pivot['Total'] = facilities_pivot.sum(axis=1).round(0).astype(int)

facilities_pivot.reset_index(inplace=True)

facilities_pivot.columns.name = None 
facilities_pivot.rename(columns={'Primary': 'Primary School', 'Secondary': 'Secondary School', 'Special': 'Special School', 'Through': 'Through School'}, inplace=True)

facilities.to_csv(Path(VIZ_DIR, 'facilities.csv'), index=False)

facilities

## Funding & Support

Funding sources for provision of art curriculum and enrichment activity:

In [None]:
# To add