### In this Notebook we look at the most popular Evaluation Form

We explore the realtionship between grade types and the grade received

In [None]:
import pandas as pd
import sas7bdat
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
pd.set_option('display.max_columns',None) 

import matplotlib as mpl

mpl.style.use('seaborn-whitegrid')  # Use 'seaborn-whitegrid' instead of 'ggplot'

mpl.rcParams['font.family'] = 'serif'
mpl.rcParams['font.size'] = 14
mpl.rcParams['axes.labelsize'] = 13
mpl.rcParams['axes.titlesize'] = 16
mpl.rcParams['xtick.labelsize'] = 11
mpl.rcParams['ytick.labelsize'] = 11
mpl.rcParams['axes.titleweight'] = 'bold'
mpl.rcParams['axes.spines.top'] = False
mpl.rcParams['axes.spines.right'] = False
mpl.rcParams['axes.prop_cycle'] = plt.cycler('color', plt.cm.tab10.colors)

plt.rcParams['figure.figsize'] = [10, 6]
plt.rcParams['figure.dpi'] = 200
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['figure.facecolor'] = '#f8f8f8'

pd.set_option('display.max_columns', None)

In [None]:
#Read Grades
filepath='../data/karakterer.sas7bdat'
grade =pd.read_sas(filepath, format='sas7bdat',encoding='iso-8859-1')

In [None]:
#Read Course Information
filepath='../data/macom_fag.sas7bdat'
course =pd.read_sas(filepath, format='sas7bdat',encoding='iso-8859-1')
course=course[['FagNavn','FagNummer','InstNr']]
course.drop_duplicates(subset=['FagNummer','InstNr'],inplace=True)

In [None]:
#Merge grade with course info
grade=grade.merge(course,on=['FagNummer','InstNr'],how='left')


In [None]:
#I need to clean the grades table so I can get the descriptions, mean etc.
grade['Karakter']=grade['Karakter'].astype(str)#We need to make the grades string to extract the pattern
grade['Karakter']=grade['Karakter'].str.extract('(\d+)')#Extact only the number from the Grade column - some contain invalid values


#Only keep relevant grade types 
grades=["3","0","2","4","7","10","12"]

grade=grade[grade['Karakter'].isin(grades)]

grade['Karakter']=grade['Karakter'].astype(int)

In [None]:
#Map interpretable names for thte examform
map={
    'MDT':'Oral',
    "SKR":'Written',
    "SS":'Large Written',
    "SAM":'Collaboration'
}

grade['EvaleringsForm']=grade['EvaleringsForm'].map(map)

#Transform grade 3 to -3
grade['Karakter']=grade['Karakter'].astype(int)
grade['Karakter']=grade['Karakter'].replace({3:-3})


#Create DataFrame for each evaluation form
orl=grade[grade['EvaleringsForm']=='Oral']
lrg=grade[grade['EvaleringsForm']=='Large Written']
clb=grade[grade['EvaleringsForm']=='Collaboration']
wrt=grade[grade['EvaleringsForm']=='Written']    

In [None]:
#Create individual DataFrames, as to better Scale each column. 
fun=(grade.groupby(['Karakter','EvaleringsForm'],sort=False)
              .agg(**{'students': ('elev_id','count')})
              .reset_index()
              )

In [None]:
#Explain the grade distribution for each evalualation form
test=fun.query('students >= 500')


#Get the most popular grade to ~1 for each grade since some eval forms have millions of observations
def scale_students(df):
    for col in df.columns:
        X_min = df['students'].min()
        X_max = df['students'].max()
        X_range=X_max-X_min
        df['students']=(((df['students'] - X_min) / X_range) *0.98+0.01)
        return df

    
    # Separate the data for different evaluation forms and scale the 'students' column
oral = test[test['EvaleringsForm'] == 'Oral']
large = test[test['EvaleringsForm'] == 'Large Written']
colab = test[test['EvaleringsForm'] == 'Collaboration']
written = test[test['EvaleringsForm'] == 'Written']

oral = scale_students(oral)
large = scale_students(large)
colab = scale_students(colab)
written = scale_students(written)

# Extract the necessary columns for plotting
a = oral['Karakter']
b = oral['students']

c = written['Karakter']
d = written['students']

e = large['Karakter']
f = large['students']

g = colab['Karakter']
h = colab['students']

# Create subplots for each evaluation form
fig, axes = plt.subplots(4, 1, figsize=(8, 8))

# Plot the grade distribution for each evaluation form
axes[0].bar(a, b)
axes[1].bar(c, d)
axes[2].bar(e, f)
axes[3].bar(g, h)

# Set titles and x-axis ticks for each subplot
axes[0].set_title('Oral: Mean={:.2f}, Median={:.2f}, Standard Deviation={:.2f}'.format(*orl['Karakter'].describe()[['mean', '50%', 'std']])), axes[0].set_xticks(a)
axes[1].set_title('Written: Mean={:.2f}, Median={:.2f}, Standard Deviation={:.2f}'.format(*wrt['Karakter'].describe()[['mean', '50%', 'std']])), axes[1].set_xticks(a)
axes[2].set_title('Large Written: Mean={:.2f}, Median={:.2f}, Standard Deviation={:.2f}'.format(*lrg['Karakter'].describe()[['mean', '50%', 'std']])), axes[2].set_xticks(a)
axes[3].set_title('Collaboration: Mean={:.2f}, Median={:.2f}, Standard Deviation={:.2f}'.format(*clb['Karakter'].describe()[['mean', '50%', 'std']])), axes[3].set_xticks(a)

# Save the filtered data as a pickle file
test.to_pickle('figures/scaled_evaluation_forms.pkl')

# Adjust subplot spacing and save the figure as a PDF file
fig.tight_layout()
plt.savefig('figures/scaled_examforms_multiple.pdf')

# Display the plot
plt.show()

In [None]:
# Calculate the count of students for each evaluation form
fun = (grade.groupby('EvaleringsForm', sort=False)
       .agg(**{'students': ('elev_id', 'count')})
       .reset_index()
)

# Filter out evaluation forms with less than 1000 students
test = fun[fun['students'] > 1000]

fig, ax = plt.subplots()

# Sort the evaluation forms based on the number of students in descending order
test = test.sort_values('students', ascending=False)

x = test['EvaleringsForm']
y = test['students']

# Plot the number of students for each evaluation form
ax.bar(x, y)

ax.set_xlabel('Evaluation form')
ax.set_ylabel('Count of grades distributed on evaluation form')
ax.set_title('Number of Grades per Evaluation Form in Millions')
plt.xticks(x)
plt.tight_layout()

# Save the filtered data as a pickle file
test.to_pickle('figures/evaluation_forms.pkl')

# Save the figure as a PDF file
plt.savefig('figures/evaluations_forms.pdf')

# Display the plot
plt.show()


In [None]:
# Calculate the count of students for each grade type
df = (grade.groupby('KarakterType', sort=False)
      .agg(**{'students': ('elev_id', 'count')})
      .reset_index()
)

df['KarakterType'] = df['KarakterType'].astype(str)

# Remove the grade type 'GST' from the dataframe
df = df.loc[df['KarakterType'] != 'GST']

# Map interpretable names for the exam form
map = {
    'ÅRS': 'Yearly Assessment',
    "STA": 'Continuous Assessment',
    "EKS": 'Exam',
    "IPR": 'Internal Test'
}

df['KarakterType'] = df['KarakterType'].map(map)

#Sort by number of students descending
df = df.sort_values('students', ascending=False)


fig, ax = plt.subplots(figsize=(12, 6))

x = df['KarakterType']
y = df['students']

# Plot the number of students for each grade type
ax.bar(x, y)

ax.set_xlabel('Grade Type')
ax.set_ylabel('Number of evaluations per grade type')
ax.set_title('Count of Grades Based on the Grade Type in Millions')
plt.xticks(x)
plt.tight_layout()

# Save the filtered data as a pickle file
df.to_pickle('figures/Exa_type.pkl')

# Save the figure as a PDF file
plt.savefig('figures/exam_type.pdf')

# Display the plot
plt.show()
