### In this Notebook we explore the most popular reasons for being absent 

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sas7bdat
import seaborn as sns

import matplotlib as mpl

mpl.style.use('seaborn-whitegrid')  # Use 'seaborn-whitegrid' instead of 'ggplot'

mpl.rcParams['font.family'] = 'serif'
mpl.rcParams['font.size'] = 14
mpl.rcParams['axes.labelsize'] = 13
mpl.rcParams['axes.titlesize'] = 16
mpl.rcParams['xtick.labelsize'] = 11
mpl.rcParams['ytick.labelsize'] = 11
mpl.rcParams['axes.titleweight'] = 'bold'
mpl.rcParams['axes.spines.top'] = False
mpl.rcParams['axes.spines.right'] = False
mpl.rcParams['axes.prop_cycle'] = plt.cycler('color', plt.cm.Set1.colors)

plt.rcParams['figure.figsize'] = [10, 6]
plt.rcParams['figure.dpi'] = 200
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['figure.facecolor'] = '#f8f8f8'

pd.set_option('display.max_columns', None)

In [None]:
#Read absence df
filepath='../data/macom_fravaer.sas7bdat'
df =pd.read_sas(filepath, format='sas7bdat',encoding='iso-8859-1')

#Drop duplicates
df=df.drop_duplicates(subset=['elev_id','LektionsNr'],keep="first")

#We are onle interested in understanding instances with a reason for departure in the visuals
df=df[~df['Fraværsårsag'].isna()]
df['Fraværsårsag']=df['Fraværsårsag'].astype(str)

#Read student information
elev=pd.read_pickle('../data/clean_reg.pkl')

In [None]:
# Grouping students by start year
fun = (df.groupby('Fraværsårsag', sort=False)
              .agg(**{'students': ('elev_id', 'count')})
              .reset_index()
)

fun=fun.query('Fraværsårsag != "Andet"')

print(fun)

map = {
    'Sygdom': 'Illness',
    "Private forhold": 'Private Reason',
    "Kom for sent": 'Late for activity',
    "Skolerelaterede aktiviteter": 'School related activities'
}

# Convert values to strings before mapping
fun['Fraværsårsag'] = fun['Fraværsårsag'].astype(str).map(map)

print(fun)

test = fun.sort_values('students', ascending=False).head(10)

x = test['Fraværsårsag']
y = test['students']

fig, ax = plt.subplots(figsize=(16, 8))
ax.bar(x, y,color='steelblue')

ax.set_xlabel('Reason for missing activity')
ax.set_ylabel('Count of reasons registered')
ax.set_title('The most popular reasons for being absent')
ax.ticklabel_format(style='plain', axis='y')
plt.xticks(x)
plt.tight_layout()
test.to_pickle('figures/reason_absence.pkl')
plt.savefig('figures/reason_absence.pdf')
plt.show()
