In [13]:
# Import key librarys

import pandas as pd
import numpy as np

import plotly.express as px

import chart_studio
import chart_studio.plotly as py
import chart_studio.tools as tls

In [None]:
# Setup Chart Studio
username = 'mrcrookes' # your username
api_key = 'ZgEVmRwdEN7u8cqkga9A' # your api key - go to profile > settings > regenerate key
chart_studio.tools.set_credentials_file(username=username, api_key=api_key)

In [30]:
# Define level
level = 'National_5'
level_label = level.replace('_', ' ')

# Import data
ind_df = pd.read_excel("attainment-statistics-(august)-2022-provisional-centre-independent.ods", 
                       engine = "odf", sheet_name = level, skiprows = 2)

state_df = pd.read_excel("attainment-statistics-(august)-2022-provisional-centre-education-authority.ods", 
                        engine = "odf", sheet_name = level, skiprows = 2)

In [31]:
def tidy_up_1(df):
    
    # Remove spaces and replace with '_'
    df.columns = df.columns.str.replace(' ','_')

    #df.drop(df[df.Grade_A_Count_2022 == '[z]'].index, inplace=True)
    df = df.replace('[z]', np.nan)
    df = df.replace('[c]', np.nan)
    df = df.replace('[low]', np.nan)

    # Remove last row
    df = df.drop(df.index[-1])

    # Remove columns with Percentage in the heading
    df = df[df.columns.drop(list(df.filter(regex='Percentage')))]
    
    return df
    
ind_df = tidy_up_1(ind_df)
state_df = tidy_up_1(state_df)

In [32]:
def tidy_up_2(df):

    # Create a list of years
    years = ['2022', '2021', '2020', '2019', '2018']

    # Sort out column titles
    for year in years:
        df = df.rename({'Grade_A_Count_' + year: 'As_' + year}, axis=1)
        df = df.rename({'Grades_A-B_Count_' + year: 'A-Bs_' + year}, axis=1)
        df = df.rename({'Grades_A-C_Count_' + year: 'A-Cs_' + year}, axis=1)
        df = df.rename({'Grades_A-D_Count_' + year: 'A-Ds_' + year}, axis=1)
        df = df.rename({'No_Award_Count_' + year: 'NAs_' + year}, axis=1)

    # Calculate number of Bs, Cs and Ds
    for year in years:
        df['Bs_' + year] = df['A-Bs_' + year] - df['As_' + year]
        df['Cs_' + year] = df['A-Cs_' + year] - df['A-Bs_' + year]
        df['Ds_' + year] = df['A-Ds_' + year] - df['A-Cs_' + year]

    # Remove original columns
    for year in years:
        df.drop('A-Bs_' + year, axis=1, inplace=True)
        df.drop('A-Cs_' + year, axis=1, inplace=True)
        df.drop('A-Ds_' + year, axis=1, inplace=True)
        
    return df
        
ind_df = tidy_up_2(ind_df)
state_df = tidy_up_2(state_df)

In [33]:
ind_df.head()

Unnamed: 0,Subject,As_2022,NAs_2022,Entries_2022,As_2021,NAs_2021,Entries_2021,As_2020,NAs_2020,Entries_2020,...,Ds_2021,Bs_2020,Cs_2020,Ds_2020,Bs_2019,Cs_2019,Ds_2019,Bs_2018,Cs_2018,Ds_2018
0,Accounting,15.0,,20.0,25.0,0.0,35,30.0,0.0,35.0,...,0.0,5.0,0.0,0.0,5.0,5.0,5.0,5.0,5.0,0.0
1,Administration and IT,10.0,,30.0,20.0,0.0,35,10.0,0.0,25.0,...,0.0,10.0,5.0,0.0,10.0,5.0,5.0,10.0,5.0,5.0
2,Applications of Mathematics,75.0,5.0,145.0,75.0,15.0,155,75.0,5.0,135.0,...,5.0,20.0,25.0,10.0,20.0,5.0,10.0,5.0,15.0,5.0
3,Art and Design,405.0,,535.0,370.0,,460,395.0,,520.0,...,0.0,95.0,25.0,5.0,85.0,30.0,10.0,105.0,35.0,10.0
4,Biology,1110.0,35.0,1630.0,1100.0,40.0,1555,1030.0,10.0,1560.0,...,55.0,295.0,190.0,35.0,265.0,150.0,80.0,280.0,145.0,70.0


In [34]:
# Convert into long format

def tolong(df):
    ldf = pd.melt(df, id_vars = ['Subject'], var_name = 'abc', value_name ='Count')
    ldf['Grades'] = ldf['abc'].apply(lambda x: (x.rsplit('_', 1))[0])
    ldf['Year'] = ldf.pop('abc').apply(lambda x: (x.split('_'))[-1])
    
    return ldf
    
ind_ldf = tolong(ind_df)
state_ldf = tolong(state_df)

state_ldf.head()

Unnamed: 0,Subject,Count,Grades,Year
0,Accounting,425.0,As,2022
1,Administration and IT,1665.0,As,2022
2,Applications of Mathematics,3350.0,As,2022
3,Art and Design,4075.0,As,2022
4,Biology,6220.0,As,2022


In [35]:
ind_ldf['School'] = 'Ind'
state_ldf['School'] = 'State'

ind_ldf.head()

Unnamed: 0,Subject,Count,Grades,Year,School
0,Accounting,15.0,As,2022,Ind
1,Administration and IT,10.0,As,2022,Ind
2,Applications of Mathematics,75.0,As,2022,Ind
3,Art and Design,405.0,As,2022,Ind
4,Biology,1110.0,As,2022,Ind


In [1]:
# Plot graph

fig = px.bar(ind_ldf, x = "Subject", y = "Count", 
                color = "Grades",
                barmode = 'overlay',
                facet_row = 'Year',
                width = 1000,
                height = 2000,
                labels={"Count": "Students"},
                category_orders = {'Grades' : ['As', 'Bs', 'Cs', 'Ds', 'NAs', 'Entries']},
                title = 'Entry and grade counts for Independent Schools at ' + level_label
                )

fig.write_html('./graphs/counts_ind_' + level + '.html')

fig.show()

NameError: name 'px' is not defined

In [37]:
# Upload to Chart Studio
py.plot(fig, filename = 'counts_ind_' + level, auto_open=True)

# Output html embed code
#tls.get_embed('https://plotly.com/~mrcrookes/14/') #change to your url

'https://plotly.com/~mrcrookes/27/'

In [38]:
# Plot graph

fig = px.bar(state_ldf, x = "Subject", y = "Count", 
                color = "Grades",
                barmode = 'overlay',
                facet_row = 'Year',
                width = 1000,
                height = 2000,
                labels={"Count": "Students"},
                category_orders = {'Grades' : ['As', 'Bs', 'Cs', 'Ds', 'NAs', 'Entries']},
                title = 'Entry and grade counts for State Schools at ' + level_label
                )

fig.write_html('./graphs/counts_state_' + level + '.html')

fig.show()

In [39]:
# Upload to Chart Studio
py.plot(fig, filename = 'counts_state_' + level, auto_open=True)

# Output html embed code
#tls.get_embed('https://plotly.com/~mrcrookes/14/') #change to your url

'https://plotly.com/~mrcrookes/29/'

In [10]:
# Drop rows with entries

state_ldf = state_ldf[state_ldf.Grades != 'Entries']

# Plot graph

fig = px.histogram(state_ldf, x = "Grades", y = "Count", 
                #color = "Subject",
                histnorm = 'percent',
                barmode = 'overlay',
                #facet_row = 'Year',
                #width = 1000,
                #height = 1000,
                labels = {"Count": "Students"},
                category_orders = {'Grades' : ['Entries', 'As', 'Bs', 'Cs', 'Ds', 'NAs']},
                )

fig.update_traces(hovertemplate = 'Grade = %{x}, <br>Percentage = %{y:.0f}')

fig.update_yaxes(title = "%", range = [0,65])

fig.show()

In [67]:
# Drop rows with entries

ind_ldf = ind_ldf[ind_ldf.Grades != 'Entries']

# Plot graph

fig = px.histogram(ind_ldf, x = "Grades", y = "Count", 
                #color = "Subject",
                histnorm = 'percent',
                barmode = 'overlay',
                #facet_row = 'Year',
                #width = 1000,
                #height = 1000,
                labels = {"Count": "Students"},
                category_orders = {'Grades' : ['Entries', 'As', 'Bs', 'Cs', 'Ds', 'NAs']},
                )

fig.update_traces(hovertemplate = 'Grade = %{x}, <br>Percentage = %{y:.0f}')

fig.update_yaxes(title = "%", range = [0,65])

fig.show()

In [26]:
# Merge together two df
#dfc = pd.merge(state_ldf, ind_ldf, on = ['Subject', 'Grades', 'Year', 'Count'])
dfc = pd.concat([state_ldf, ind_ldf])

dfc.head()

Unnamed: 0,Subject,Count,Grades,Year,School
0,Accounting,15.0,As,2022,State
1,Art and Design (Design),185.0,As,2022,State
2,Art and Design (Expressive),410.0,As,2022,State
3,Biology,570.0,As,2022,State
4,Business Management,110.0,As,2022,State


In [62]:
# Drop rows with entries

dfc = dfc[dfc.Grades != 'Entries']

# Plot graph

fig = px.histogram(dfc, x = "Grades", y = "Count", 
              #  color = "Subject",
                histnorm = 'percent',
                barmode = 'overlay',
                facet_row = 'School',
                #width = 1000,
                #height = 1000,
                hover_data = {'School':False, 'Count':False},
                labels = {"Count": "Students"},
                category_orders = {'Grades' : ['Entries', 'As', 'Bs', 'Cs', 'Ds', 'NAs']},
                )

#fig.update_traces(hovertemplate = 'Grade = %{x}, <br>Percentage = %{y:.0f}')

#fig.update_traces(hovertemplate = 'x=%{x}<br>y=%{y}<br>test=%{color:.2f}')

#fig.update_yaxes(title = "%", range = [0,65])
fig.update_yaxes(title = "%")

fig.show()