# Dropout Risk

In [1]:
import os
from pathlib import Path
import pandas as pd
from ast import literal_eval

import warnings

CURR_PATH = Path.cwd()
SEED = 59

warnings.filterwarnings('ignore')
font_family = "Calibri"

data_path = CURR_PATH.parents[0] / 'data'

In [2]:
styles = [
    dict(selector="th", props=[("font-size", "11pt"),
                               ("text-align", "center"),
                               ('font-family', font_family),
                               ('background-color', 'white'),
                               ('border-style', 'solid'),
                               ('border-color', 'grey'),
                               ('border-width', 'thin')]),
    dict(selector="td", props=[("font-size", "11pt"),
                               #("text-align", "center"),
                               ('font-family', font_family),
                               #('background-color', 'white'),
                               ('border-style', 'solid'),
                               ('border-color', 'grey'),
                               ('border-width', 'thin')]),
    dict(selector="caption", props=[("caption-side", "bottom")])
]

## Best Dropout Risk Models

In [3]:
best_models = pd.read_csv(data_path / 'best_models.csv', sep=';')

best_models_styled = best_models.style.set_table_styles(styles)\
    .set_properties(**{'background-color': 'white'})\
    .format('{:.1f}', subset=['DT'])\
    .format('{:.3f}', subset=['P1','BACC', 'REC'])\
    .hide(axis="index")
best_models_styled

PS,P,S,BACC,Train_Risk,Test_Risk,P1,#Feat,CO,DT,BAL,C,REC
AR2,AR,2,0.866,0.5,0.37069,0.353,38,0,0.3,S,RF,0.814
AR3,AR,3,0.935,0.5,0.37069,0.336,32,4,0.5,R,RF,0.884
CM2,CM,2,0.92,0.365796,0.632075,0.557,36,1,0.3,N,SVC,0.866
CM3,CM,3,0.927,0.5,0.632075,0.566,74,0,0.5,S,RF,0.881
PT2,PT,2,0.913,0.5,0.396226,0.358,16,3,0.3,S,LSVC,0.857
PT3,PT,3,0.882,0.5,0.396226,0.396,47,3,0.3,S,LSVC,0.857


## Changes in Dropout Risk

In [4]:
risk_data = pd.read_csv(data_path / 'risk_data.csv' , sep=';')

print(risk_data.shape, risk_data['T'].unique())
risk_data.head()

(72, 10) ['AN' 'AN1' 'AN2' 'AN3' 'AN4' 'GN']


Unnamed: 0,ST,PS,0_P1,1_P1,P1,T,0_P2,1_P2,P2,P2-P1
0,D,AR2,8,35,0.813953,AN,14,29,0.674419,-0.139535
1,D,AR2,8,35,0.813953,AN1,14,29,0.674419,-0.139535
2,D,AR2,8,35,0.813953,AN2,14,29,0.674419,-0.139535
3,D,AR2,8,35,0.813953,AN3,17,26,0.604651,-0.209302
4,D,AR2,8,35,0.813953,AN4,18,25,0.581395,-0.232558


In [5]:
pt = pd.pivot_table(data=risk_data, index=['ST','PS'],
              columns=['T'], values='P2-P1',
                   margins=True)

pt_styled = pt.style.set_table_styles(styles)\
    .format('{:.3f}')\
    .background_gradient(cmap='PuOr', vmin=-0.7, vmax=0.7, axis=None)

pt_styled

Unnamed: 0_level_0,T,AN,AN1,AN2,AN3,AN4,GN,All
ST,PS,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
D,AR2,-0.14,-0.14,-0.14,-0.209,-0.233,-0.256,-0.186
D,AR3,-0.163,-0.233,-0.419,-0.535,-0.628,-0.605,-0.43
D,CM2,-0.09,-0.104,-0.104,-0.149,-0.209,-0.149,-0.134
D,CM3,-0.06,-0.09,-0.149,-0.179,-0.194,-0.164,-0.139
D,PT2,-0.238,-0.238,-0.238,-0.238,-0.286,-0.238,-0.246
D,PT3,0.048,0.048,0.048,0.0,0.0,-0.048,0.016
G,AR2,-0.068,-0.068,-0.068,-0.068,-0.055,-0.055,-0.064
G,AR3,0.027,0.014,0.0,-0.014,-0.014,-0.014,0.0
G,CM2,0.026,0.026,0.026,0.026,0.026,0.026,0.026
G,CM3,0.026,0.026,0.026,0.0,0.0,0.0,0.013
