In [1]:
import pandas as pd
import altair as alt

In [3]:
df = pd.read_excel("gender_demographics.xlsx")
summer_df = df[df.term.str.contains('summer')]

df_2 =  pd.read_excel("ethnicity_demographics.xlsx")
summer_df_2 = df_2[df_2.term.str.contains('summer')]

selection = alt.selection_multi(fields=['term'], bind='legend')

total = alt.Chart(summer_df).mark_bar().encode(
    y = "count()",
    x = "term"
)

gender = alt.Chart(summer_df).mark_bar().encode(
    y="count()",
    x="gender",
    color="term",
    opacity=alt.condition(selection, alt.value(1), alt.value(0.2))
).add_selection(
    selection
)


ethnicity = alt.Chart(summer_df_2).mark_bar().encode(
    y="count()",
    x="ethnicity",
    color="term",
    opacity=alt.condition(selection, alt.value(1), alt.value(0.2))
).add_selection(
    selection
)
total | gender | ethnicity
#total

In [5]:
single = alt.selection_single()

total = alt.Chart(summer_df).mark_bar().encode(
    y = "count()",
    x = "term",
    color=alt.condition(single, 'term', alt.value('lightgray'))
).add_selection(
    single
)

gender_2022 = alt.Chart(summer_df).mark_bar().encode(
    alt.Y("count()", scale=alt.Scale(domain=(0, 120))),
    x="gender"
).transform_filter(
    alt.FieldEqualPredicate(field='term', equal="summer 2022")
)

ethnicity_2022 = alt.Chart(summer_df_2).mark_bar().encode(
    alt.Y("count()", scale=alt.Scale(domain=(0, 120))),
    x="ethnicity"
).transform_filter(
    alt.FieldEqualPredicate(field='term', equal="summer 2022")
)

total | gender_2022 | ethnicity_2022

In [13]:
click = alt.selection_multi(encodings=['x'])

base = alt.Chart(summer_df).mark_bar(size=18).encode(
    y = alt.Y("count()", scale=alt.Scale(domain=(0, 300)),
             axis=alt.Axis(title='Number of Students')),
    x = 'term',
    color = alt.condition(click, 'term', alt.value('lightgray'))
).add_selection(
    click
).properties(
    height=300,
    width=100
)

gender = alt.Chart(summer_df).mark_bar(size=18).encode(
    y = alt.Y("count()", scale=alt.Scale(domain=(0, 300)),
             axis=alt.Axis(title='')),
    x = "gender",
    color = "term"
).transform_filter(
    click
).properties(
    height=300,
    width=80
)

ethnicity = alt.Chart(summer_df_2).mark_bar(size=18).encode(
    y = alt.Y("count()", scale=alt.Scale(domain=(0, 300)),
             axis=alt.Axis(title='')),
    x = "ethnicity",
    color = "term"
).transform_filter(
    click
).properties(
    height=300,
    width=140
)

combo = base|gender|ethnicity

combo.properties(
    title={"text": "Students Gender and Ethnicity by Term",
          "subtitle": "Click on the bars in the term graph to highlight those students in the gender and ethnicity graphs"}
   
)

In [48]:
df_3 = pd.read_csv("average_attendance.csv")
summer_df_3 = df_3[df_3.term.str.contains('summer')]


line = alt.Chart(summer_df_3).mark_line().encode(
    y = alt.Y("average attendace (%)", scale=alt.Scale(domain=(0, 100))),
    x= "term"
).properties(
    width = 200
)

point = alt.Chart(summer_df_3).mark_point().encode(
    y = alt.Y("average attendace (%)", scale=alt.Scale(domain=(0, 100))),
    x= "term"
).properties(
    width = 200
)

(line + point).properties(
    title="Average Attendace Over Terms"
)

In [49]:
df_sayo_t = pd.read_csv("SAYO_T_22.csv")
df_sayo_t = df_sayo_t.drop(["StudentName"], axis=1)
df_sayo_t.head()

df_sayo_t_avg = pd.DataFrame(df_sayo_t, columns = ['Pre_Adults_Mean', 'Pre_Comm_Mean', 'Pre_CritThink_Mean', 'Pre_Peers_Mean', 
                          'Pre_Persev_Mean', 'Pre_SelfReg_Mean', 'Avg_Pre_ELA_Combined', 'Avg_Pre_Math_Combined',
                          'Post_Adults_Mean', 'Post_Comm_Mean', 'Post_CritThink_Mean', 'Post_Peers_Mean', 
                          'Post_Persev_Mean', 'Post_SelfReg_Mean', 'Avg_Post_ELA_Combined', 'Avg_Post_Math_Combined'])
df_sayo_t_avg.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 76 entries, 0 to 75
Data columns (total 16 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Pre_Adults_Mean         76 non-null     float64
 1   Pre_Comm_Mean           76 non-null     float64
 2   Pre_CritThink_Mean      76 non-null     float64
 3   Pre_Peers_Mean          76 non-null     float64
 4   Pre_Persev_Mean         76 non-null     float64
 5   Pre_SelfReg_Mean        76 non-null     float64
 6   Avg_Pre_ELA_Combined    31 non-null     float64
 7   Avg_Pre_Math_Combined   72 non-null     float64
 8   Post_Adults_Mean        66 non-null     float64
 9   Post_Comm_Mean          66 non-null     float64
 10  Post_CritThink_Mean     66 non-null     float64
 11  Post_Peers_Mean         66 non-null     float64
 12  Post_Persev_Mean        66 non-null     float64
 13  Post_SelfReg_Mean       66 non-null     float64
 14  Avg_Post_ELA_Combined   20 non-null     floa

In [10]:
alt.Chart(df_sayo_t_avg).mark_rect().encode(
    x= alt.X('Pre_Adults_Mean', bin=alt.Bin(maxbins=5), scale=alt.Scale(domain=(0, 5))),
    y= alt.Y('Post_Adults_Mean', bin=alt.Bin(maxbins=5), scale=alt.Scale(domain=(0, 5))),
    color= alt.Color('count()')
)

In [11]:
df_4 = pd.read_excel("SAYO_T_formatted.xlsx")
df_4.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 482 entries, 0 to 481
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Category _Avg  482 non-null    object 
 1   Pre_Test       482 non-null    float64
 2   Post_Test      482 non-null    float64
dtypes: float64(2), object(1)
memory usage: 11.4+ KB


In [39]:
domain = ['Adults', 'Communication', 'Critical Thinking', "English Language Skills (Combined)", "Math Skills (Combined)",
         "Peers", "Perseverance", "Self Regulation"]
range_ = ['blue', 'blue', 'blue', 'green', 'red', 'blue', 'blue', 'blue']

selection = alt.selection_single(fields=['Category _Avg'], bind='legend')

scatter = alt.Chart(df_4).mark_point().encode(
    x= alt.X('Pre_Test', scale=alt.Scale(domain=(0, 5)), title = "Pre BtC Scores"),
    y= alt.Y('Post_Test', scale=alt.Scale(domain=(0, 5)), title = "Post BtC Scores"),
    color = alt.Color("Category _Avg", scale=alt.Scale(domain=domain, range=range_)), 
    opacity = alt.condition(selection, alt.value(0.5), alt.value(0))
).properties(
    width=400,
    height=400,
    title={"text": "Average Scores of Students in BtC '22 from SAYO-T",
          "subtitle": "Anything above orange line is improment, select the legend to see diffrent tests"}
).add_selection(selection)

line = pd.DataFrame({
    'pre_scores': [0, 5],
    'post_scores':  [0, 5],
})

line_plot = alt.Chart(line).mark_line(strokeDash=[2,2], color= '#ff7533').encode(
    x= 'pre_scores',
    y= 'post_scores',
)

line_plot+scatter

In [None]:
df_4.head()

SyntaxError: invalid syntax (<ipython-input-14-c24063cd8f59>, line 1)