In [None]:
import pandas as pd
import altair as alt
alt.data_transformers.disable_max_rows()

In [None]:
df = pd.read_csv('scores.csv')

In [None]:
gaps = []
for grade in df['grade']:
    if 1<=grade<=3:
        gaps.append('Grade 1-3')
    elif 4<=grade<=6:
        gaps.append('Grade 4-6')
    elif 7<=grade<=9:
        gaps.append('Grade 7-9')
    else:
        gaps.append('Grade 10-12')
        
df['grade_gap'] = gaps

schools = []
for grade in df['grade']:
    if 1<=grade<=5:
        schools.append('Elementary School')
    elif 6<=grade<=8:
        schools.append('Middle School')
    else:
        schools.append('High School')
        
df['school'] = schools

df[['grade', 'grade_gap', 'school']].sample(10)

## NRC Word-Emotion Association Lexicon
\- 8 basic emotions: anger, fear, anticipation, trust, surprise, sadness, joy, disgust

In [None]:
df_EMO_melted = df.melt(
    id_vars=['grade', 'author_gender', 'grade_gap', 'school'], 
    value_vars=['anger', 'fear', 'anticipation', 'trust', 'surprise', 'sadness', 'joy', 'disgust'], 
    var_name='emotion', 
    value_name='score'
)

df_EMO_melted

### EMO_overall.png

In [None]:
alt.Chart(df_EMO_melted).mark_line().encode(
    x=alt.X('grade:Q', title='Grade'),
    y=alt.Y('mean(score)', title='Avg. emotion intensity').scale(zero=False),
    color=alt.Color('emotion', legend=alt.Legend(title='Emotions'))
).properties(
    width=400
).configure_axis(
    titleFontSize=16, 
    labelFontSize=12
).configure_legend(
    titleFontSize=14,
    labelFontSize=12,
    symbolSize=200,
    symbolStrokeWidth=3
)

### EMO_overall_gender.png

In [None]:
alt.Chart(df_EMO_melted).transform_filter(
    (alt.datum.author_gender != 'ambiguous') & (alt.datum.author_gender != 'unknown')
).mark_line().encode(
    x=alt.X('grade:Q', title='Grade'),
    y=alt.Y('mean(score)', title='Avg. emotion intensity').scale(zero=False),
    color=alt.Color('emotion', legend=alt.Legend(title='Emotions'))
).properties(
    width=300
).facet(
    facet=alt.Facet(
        'author_gender:O', 
        title=None, 
        sort='descending', 
        header=alt.Header(labelFontSize=14)
    ),
    columns=2
).configure_axis(
    titleFontSize=16, 
    labelFontSize=12
).configure_legend(
    titleFontSize=14,
    labelFontSize=12,
    symbolSize=200,
    symbolStrokeWidth=3
)

### EMO_grade_grouped.png

In [None]:
line = alt.Chart(df_EMO_melted).transform_filter(
    (alt.datum.author_gender != 'ambiguous') & (alt.datum.author_gender != 'unknown')
).mark_line(
    point=alt.OverlayMarkDef(filled=False, fill="white")
).encode(
    y=alt.Y('mean(score):Q', title='Avg. emotion intensity').scale(zero=False), 
    x=alt.X('grade_gap:N', 
            title=None, 
            axis=alt.Axis(labelAngle=0), 
            sort=['Grade 1-3', 'Grade 4-6', 'Grade 7-9', 'Grade 10-12']
           ),
    color=alt.Color(
        'author_gender:N', 
        scale=alt.Scale(domain=['male', 'female'], range=['mediumslateblue', 'red']),
        legend=alt.Legend(title=None, orient='top', offset=0)
    )
).properties(
    width=350
)

entire = alt.Chart(df_EMO_melted).mark_line(strokeDash=[2,1]).encode(
    y=alt.Y('mean(score):Q').scale(zero=False), 
    x=alt.X('grade_gap:N', 
            title=None, 
            axis=alt.Axis(labelAngle=0), 
            sort=['Grade 1-3', 'Grade 4-6', 'Grade 7-9', 'Grade 10-12']
           ),
    color=alt.ColorValue('gray')
)

band = line.mark_errorband(extent='ci', opacity=0.3)

(line+entire+band).facet(
    facet=alt.Facet(
        'emotion:O', 
        title=None, 
        header=alt.Header(labelFontSize=14)
    ),
    columns=4
).configure_axis(
    titleFontSize=16, 
    labelFontSize=12
).configure_legend(
    labelFontSize=14
).configure_axisX(
    labelFontWeight='bold'
)

### EMO_school.png

In [None]:
line = alt.Chart(df_EMO_melted).transform_filter(
    (alt.datum.author_gender != 'ambiguous') & (alt.datum.author_gender != 'unknown')
).mark_line(
    point=alt.OverlayMarkDef(filled=False, fill="white")
).encode(
    y=alt.Y('mean(score):Q', title='Avg. emotion intensity').scale(zero=False), 
    x=alt.X('school:N', 
            title=None, 
            axis=alt.Axis(labelAngle=0), 
            sort=['anger', 'fear', 'anticipation', 'trust', 'surprise', 'sadness', 'joy', 'disgust']
           ),
    color=alt.Color(
        'author_gender:N', 
        scale=alt.Scale(domain=['male', 'female'], range=['mediumslateblue', 'red']),
        legend=alt.Legend(title=None, orient='top', offset=0)
    )
).properties(
    width=350
)

entire = alt.Chart(df_EMO_melted).mark_line(strokeDash=[2,1]).encode(
    y=alt.Y('mean(score):Q').scale(zero=False), 
    x=alt.X('school:N', 
            title=None, 
            axis=alt.Axis(labelAngle=0), 
            sort=['anger', 'fear', 'anticipation', 'trust', 'surprise', 'sadness', 'joy', 'disgust']
           ),
    color=alt.ColorValue('gray')
)

band = line.mark_errorband(extent='ci', opacity=0.3)

(line+entire+band).facet(
    facet=alt.Facet(
        'emotion:O', 
        title=None, 
        header=alt.Header(labelFontSize=14)
    ),
    columns=4
).configure_axis(
    titleFontSize=16, 
    labelFontSize=12
).configure_legend(
    labelFontSize=14
).configure_axisX(
    labelFontWeight='bold'
)