In [1]:
import pandas as pd
import altair as alt

In [2]:
df = pd.read_excel("gender_demographics.xlsx")
summer_df = df[df.term.str.contains('summer')]

df_2 =  pd.read_excel("ethnicity_demographics.xlsx")
summer_df_2 = df_2[df_2.term.str.contains('summer')]

In [3]:
click = alt.selection_multi(encodings=['x'])

base = alt.Chart(summer_df).mark_bar(size=18).encode(
    y = alt.Y("count()", scale=alt.Scale(domain=(0, 300)),
             axis=alt.Axis(title='Number of Students')),
    x = 'term',
    color = alt.condition(click, 'term', alt.value('lightgray'))
).add_selection(
    click
).properties(
    height=300,
    width=100
)

gender = alt.Chart(summer_df).mark_bar(size=18).encode(
    y = alt.Y("count()", scale=alt.Scale(domain=(0, 300)),
             axis=alt.Axis(title='')),
    x = "gender",
    color = "term"
).transform_filter(
    click
).properties(
    height=300,
    width=80
)

ethnicity = alt.Chart(summer_df_2).mark_bar(size=18).encode(
    y = alt.Y("count()", scale=alt.Scale(domain=(0, 300)),
             axis=alt.Axis(title='')),
    x = "ethnicity",
    color = "term"
).transform_filter(
    click
).properties(
    height=300,
    width=140
)

combo = base|gender|ethnicity

combo.properties(
    title={"text": "Students Gender and Ethnicity by Term",
          "subtitle": "Click on the bars in the term graph to highlight those students in the gender and ethnicity graphs"}
   
)

In [4]:
df_3 = pd.read_csv("average_attendance.csv")
summer_df_3 = df_3[df_3.term.str.contains('summer')]


line = alt.Chart(summer_df_3).mark_line().encode(
    y = alt.Y("average attendace (%)", scale=alt.Scale(domain=(0, 100))),
    x= "term"
).properties(
    width = 200
)

summer_2020_df = pd.DataFrame({
    'term': ["summer 2019", "summer 2021"],
    'average attendace (%)':  [74, 79],
})

summer_2020_chart = alt.Chart(summer_2020_df).mark_line(strokeDash=[4,4]).encode(
    y = 'average attendace (%)',
    x = 'term'
)



point = alt.Chart(summer_df_3).mark_point().encode(
    y = alt.Y("average attendace (%)", scale=alt.Scale(domain=(0, 100))),
    x= "term"
).properties(
    width = 200
)

(line + summer_2020_chart + point).properties(
    title="Average Attendace Over Terms"
)

In [5]:
df_sayot = pd.read_excel("SAYO_T_formatted.xlsx")

In [6]:
domain = ['Adults', 'Communication', 'Critical Thinking', "English Language Skills (Combined)", "Math Skills (Combined)",
         "Peers", "Perseverance", "Self Regulation"]
range_ = ['blue', 'blue', 'blue', 'green', 'red', 'blue', 'blue', 'blue']

selection = alt.selection_single(fields=['Category _Avg'], bind='legend')

scatter = alt.Chart(df_sayot).mark_point().encode(
    x= alt.X('Pre_Test', scale=alt.Scale(domain=(0, 5)), title = "Pre BtC Scores"),
    y= alt.Y('Post_Test', scale=alt.Scale(domain=(0, 5)), title = "Post BtC Scores"),
    color = alt.Color("Category _Avg", scale=alt.Scale(domain=domain, range=range_)), 
    opacity = alt.condition(selection, alt.value(0.5), alt.value(0))
).properties(
    width=400,
    height=400,
    title={"text": "Average Scores of Students in BtC '22 from SAYO-T",
          "subtitle": "Anything above orange line is improment, select the legend to see diffrent tests"}
).add_selection(selection)

line = pd.DataFrame({
    'pre_scores': [0, 5],
    'post_scores':  [0, 5],
})

line_plot = alt.Chart(line).mark_line(strokeDash=[2,2], color= '#ff7533').encode(
    x= 'pre_scores',
    y= 'post_scores',
)

line_plot+scatter