In [None]:
import pandas as pd
import altair as alt
alt.data_transformers.disable_max_rows()

In [None]:
df = pd.read_csv('scores.csv')

In [None]:
gaps = []
for grade in df['grade']:
    if 1<=grade<=3:
        gaps.append('Grade 1-3')
    elif 4<=grade<=6:
        gaps.append('Grade 4-6')
    elif 7<=grade<=9:
        gaps.append('Grade 7-9')
    else:
        gaps.append('Grade 10-12')
        
df['grade_gap'] = gaps

schools = []
for grade in df['grade']:
    if 1<=grade<=5:
        schools.append('Elementary School')
    elif 6<=grade<=8:
        schools.append('Middle School')
    else:
        schools.append('High School')
        
df['school'] = schools

df[['grade', 'grade_gap', 'school']].sample(10)

## VAD (Valence, Arousal, Dominance)

### VAD_grade.png

In [None]:
line_V = alt.Chart(df).transform_filter(
    (alt.datum.author_gender != 'ambiguous') & (alt.datum.author_gender != 'unknown')
).mark_line(
    point=alt.OverlayMarkDef(filled=False, fill="white")
).encode(
    y=alt.Y('mean(valence):Q', title='Avg. valence').scale(zero=False), 
    x=alt.X('grade:Q', title='Grade', axis=alt.Axis(labelAngle=0)),
    color=alt.Color(
        'author_gender:N', 
        scale=alt.Scale(domain=['male', 'female'], range=['mediumslateblue', 'red']),
        legend=alt.Legend(title=None, orient='top', offset=0)
    )
).properties(
    title=alt.Title('(a)', orient='bottom')
)

entire_V = alt.Chart(df).mark_line(strokeDash=[2,1]).encode(
    y=alt.Y('mean(valence):Q').scale(zero=False), 
    x=alt.X('grade:Q', title='Grade', axis=alt.Axis(labelAngle=0)),
    color=alt.ColorValue('gray')
)

band_V = line_V.mark_errorband(extent='ci', opacity=0.3)

line_A = line_V.encode(
    y=alt.Y('mean(arousal):Q', title='Avg. arousal').scale(zero=False)
).properties(
    title=alt.Title('(b)', orient='bottom')
)
entire_A = entire_V.encode(
    y=alt.Y('mean(arousal):Q').scale(zero=False)
)
band_A = line_A.mark_errorband(extent='ci', opacity=0.3)

line_D = line_V.encode(
    y=alt.Y('mean(dominance):Q', title='Avg. dominance').scale(zero=False)
).properties(
    title=alt.Title('(c)', orient='bottom')
)
entire_D = entire_V.encode(y=alt.Y('mean(dominance):Q').scale(zero=False))
band_D = line_D.mark_errorband(extent='ci', opacity=0.3)

((line_V+band_V+entire_V) | (line_A+band_A+entire_A) | (line_D+band_D+entire_D)).configure_axis(
    titleFontSize=16, 
    labelFontSize=12
).configure_legend(
    labelFontSize=14
)

### VAD_grade_grouped.png

In [None]:
line_V = alt.Chart(df).transform_filter(
    (alt.datum.author_gender != 'ambiguous') & (alt.datum.author_gender != 'unknown')
).mark_line(
    point=alt.OverlayMarkDef(filled=False, fill="white")
).encode(
    y=alt.Y('mean(valence):Q', title='Avg. valence').scale(zero=False), 
    x=alt.X(
        'grade_gap:N', 
        title=None, 
        axis=alt.Axis(labelAngle=0), 
        sort=['Grade 1-3', 'Grade 4-6', 'Grade 7-9', 'Grade 10-12']
    ),
    color=alt.Color(
        'author_gender:N', 
        scale=alt.Scale(domain=['male', 'female'], range=['mediumslateblue', 'red']),
        legend=alt.Legend(title=None, orient='top', offset=0)
    )
).properties(
    title=alt.Title('(a)', orient='bottom'),
    width=350
)

entire_V = alt.Chart(df).mark_line(strokeDash=[2,1]).encode(
    y=alt.Y('mean(valence):Q').scale(zero=False), 
    x=alt.X('grade_gap:N', 
            title=None, 
            axis=alt.Axis(labelAngle=0), 
            sort=['Grade 1-3', 'Grade 4-6', 'Grade 7-9', 'Grade 10-12']
           ),
    color=alt.ColorValue('gray')
)

band_V = line_V.mark_errorband(extent='ci', opacity=0.3)

line_A = line_V.encode(
    y=alt.Y('mean(arousal):Q', title='Avg. arousal').scale(zero=False)
).properties(
    title=alt.Title('(b)', orient='bottom')
)
entire_A = entire_V.encode(
    y=alt.Y('mean(arousal):Q').scale(zero=False)
)
band_A = line_A.mark_errorband(extent='ci', opacity=0.3)

line_D = line_V.encode(
    y=alt.Y('mean(dominance):Q', title='Avg. dominance').scale(zero=False)
).properties(
    title=alt.Title('(c)', orient='bottom')
)
entire_D = entire_V.encode(y=alt.Y('mean(dominance):Q').scale(zero=False))
band_D = line_D.mark_errorband(extent='ci', opacity=0.3)

((line_V+band_V+entire_V) | (line_A+band_A+entire_A) | (line_D+band_D+entire_D)).configure_axis(
    titleFontSize=16, 
    labelFontSize=12
).configure_legend(
    labelFontSize=14
).configure_axisX(
    labelFontWeight='bold'
)

### VAD_school.png

In [None]:
line_V = alt.Chart(df).transform_filter(
    (alt.datum.author_gender != 'ambiguous') & (alt.datum.author_gender != 'unknown')
).mark_line(
    point=alt.OverlayMarkDef(filled=False, fill="white")
).encode(
    y=alt.Y('mean(valence):Q', title='Avg. valence').scale(zero=False), 
    x=alt.X(
        'school:N', 
        title=None, 
        axis=alt.Axis(labelAngle=0), 
        sort=['Elementary School', 'Middle School', 'High School']
    ),
    color=alt.Color(
        'author_gender:N', 
        scale=alt.Scale(domain=['male', 'female'], range=['mediumslateblue', 'red']),
        legend=alt.Legend(title=None, orient='top', offset=0)
    )
).properties(
    title=alt.Title('(a)', orient='bottom'),
    width=350
)

entire_V = alt.Chart(df).mark_line(strokeDash=[2,1]).encode(
    y=alt.Y('mean(valence):Q').scale(zero=False), 
    x=alt.X('school:N', 
            title=None, 
            axis=alt.Axis(labelAngle=0), 
            sort=['Elementary School', 'Middle School', 'High School']
           ),
    color=alt.ColorValue('gray')
)

band_V = line_V.mark_errorband(extent='ci', opacity=0.3)

line_A = line_V.encode(
    y=alt.Y('mean(arousal):Q', title='Avg. arousal').scale(zero=False)
).properties(
    title=alt.Title('(b)', orient='bottom')
)
entire_A = entire_V.encode(
    y=alt.Y('mean(arousal):Q').scale(zero=False)
)
band_A = line_A.mark_errorband(extent='ci', opacity=0.3)

line_D = line_V.encode(
    y=alt.Y('mean(dominance):Q', title='Avg. dominance').scale(zero=False)
).properties(
    title=alt.Title('(c)', orient='bottom')
)
entire_D = entire_V.encode(y=alt.Y('mean(dominance):Q').scale(zero=False))
band_D = line_D.mark_errorband(extent='ci', opacity=0.3)

((line_V+band_V+entire_V) | (line_A+band_A+entire_A) | (line_D+band_D+entire_D)).configure_axis(
    titleFontSize=16, 
    labelFontSize=12
).configure_legend(
    labelFontSize=14
).configure_axisX(
    labelFontWeight='bold'
)