In [135]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier as RFC, AdaBoostClassifier as ABC
from sklearn.neural_network import MLPClassifier as MLP
from sklearn.linear_model import Perceptron 
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.tree import DecisionTreeClassifier as DTC
from sklearn.model_selection import cross_validate
from sklearn.model_selection import GridSearchCV
import optuna
from sklearn.metrics import confusion_matrix
import OpenLA as la
from sklearn.metrics import f1_score

In [136]:
def make_data_set(dim,cl_train,cl_test):
    Edudata = r'.\data\EduData_20221028'
    train_vec_path = r'.\data\vectors\norm_Student_Vctors_course{}_{}dim.csv'.format(cl_train,dim)
    test_vec_path = r'.\data\vectors\norm_Student_Vctors_course{}_{}dim.csv'.format(cl_test,dim)
    train_vec = pd.read_csv(train_vec_path).set_index('userid')
    test_vec = pd.read_csv(test_vec_path).set_index('userid')
    train_grade =  la.CourseInformation(files_dir=Edudata, course_id=cl_train).grade_point_df().set_index("userid")
    test_grade = la.CourseInformation(files_dir=Edudata, course_id=cl_test).grade_point_df().set_index("userid")
    grade_dict = {'S':0, 'A':0, 'B':0, 'C':1, 'D':1, 'F':1}
    train_grade =  train_grade.replace(grade_dict)
    test_grade = test_grade.replace(grade_dict)
    train_data = pd.merge(train_vec,train_grade,left_index=True,right_index=True,how="inner")
    test_data = pd.merge(test_vec,test_grade,left_index=True,right_index=True,how="inner")
    x_train = train_data.drop(columns="grade")
    y_train = train_data["grade"]
    x_test = test_data.drop(columns="grade")
    y_test = test_data["grade"]
    return x_train,y_train,x_test,y_test

In [137]:
def at_risk_prediction(train_data, train_label, test_data, test_label, model_k="rfc"):
    if model_k == "rfc":
        model = RFC(random_state=42)
        model_params ={
            'n_estimators':[10,20,30,50,100],
            'criterion': ['gini','entropy','log_loss'],
            'max_depth' : [10,20,None]
        }
        model_nt = RFC(random_state=42)
    elif model_k == "svc":
        model = SVC(random_state=42)
        model_params = {
            "C": [10** i for i in range(-3,3)],
            "kernel": ["linear","poly","rbf","sigmoid"]
        } 
        model_nt = SVC(random_state=42)
    elif model_k == "ada":
        model = ABC(random_state=42)
        model_params = {
            'n_estimators':[1,10,20,30,50,100],
            'learning_rate':[0.1,0.5,1.0,2.0,5.0],
            'algorithm': ['SAMME','SAMME.R']
        }
        model_nt = ABC(random_state=42)
    elif model_k == 'knn':
        model = KNN()
        model_params = {
            'n_neighbors' : [3,5,7,10],
            'weights': ["uniform","distance"],
            'algorithm':['auto','ball_tree','kd_tree','brute']
        }
        model_nt = KNN()
    elif model_k == 'mlp':
        model = MLP(random_state=42,early_stopping=True)
        model_params={
            'hidden_layer_sizes': [10,50,100,200,500,1000],
            'activation': ['identity','logistic','tanh','relu'],
            'solver': ['lbfgs','sgd','adam'],
            'alpha': [10** -3, 10** -4, 10** -5],
            'max_iter': [50,100,200,300,500,100]
        }
        model_nt = MLP(random_state=42,early_stopping=True)
    #elif model_k == 
    # grid search によって最適なモデルを選択　パラメータの評価は3CV
    gscv = GridSearchCV(model, model_params,cv=3,scoring='f1',verbose=3)
    gscv.fit(train_data,train_label)
    # 最もよかったパラメータ
    best = gscv.best_estimator_

    # predict with best parameters model
    pred = best.predict(test_data)
    f1_best = f1_score(pred,test_label)

    #print(gscv.best_params_)
    #print(gscv.best_score_)
    #print(confusion_matrix(pred,test_label))

    #"Non Tuning model"
    model_nt.fit(train_data,train_label)
    pred_nt = model_nt.predict(test_data)
    f1_nt = f1_score(pred_nt,test_label)
    #print(confusion_matrix(pred_nt,test_label))
    return gscv.best_params_, f1_best, f1_nt

### E2Vec k100

In [138]:
cl_courses = ["D-2022","D-2021","A-2022","A-2021"]
dim=100

In [139]:
columns = ["c_train","c_test","dim","epoch","model","best_param","f1_bestparam","f1_default"]
results_df = pd.DataFrame(columns=columns)

for cl_test in cl_courses:
        for cl_train in cl_courses:
            if cl_test == cl_train:
                continue
            x_train,y_train,x_test,y_test = make_data_set(dim,cl_train,cl_test)
            for model_select in ["rfc","svc","ada",'knn']:
                 best_param, best_param_f1, nt_f1 = at_risk_prediction(x_train,y_train,x_test,y_test,model_k=model_select)
                 row = pd.DataFrame(np.array([cl_train,cl_test,100,30,model_select,best_param,best_param_f1,nt_f1]).reshape(1,8),columns=columns)
                 results_df = pd.concat([results_df,row],ignore_index=True)


Fitting 3 folds for each of 45 candidates, totalling 135 fits
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.800 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.696 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.963 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.846 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.696 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.929 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.815 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.696 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.963 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=50;, score=0.815 total time=   0.0s
[CV 2/3] END crite

  self._df = pd.read_csv(file_path)


Fitting 3 folds for each of 45 candidates, totalling 135 fits
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.600 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.333 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.571 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.500 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.375 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.600 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.571 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.353 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.632 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=50;, score=0.696 total time=   0.0s
[CV 2/3] END crite

  self._df = pd.read_csv(file_path)


Fitting 3 folds for each of 45 candidates, totalling 135 fits
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.600 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.333 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.571 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.500 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.375 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.600 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.571 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.353 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.632 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=50;, score=0.696 total time=   0.0s
[CV 2/3] END crite

  self._df = pd.read_csv(file_path)


Fitting 3 folds for each of 45 candidates, totalling 135 fits
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.667 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.588 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.588 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.588 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.667 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.588 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.625 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.625 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.588 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=50;, score=0.625 total time=   0.0s
[CV 2/3] END crite

  self._df = pd.read_csv(file_path)


Fitting 3 folds for each of 45 candidates, totalling 135 fits
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.800 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.696 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.963 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.846 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.696 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.929 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.815 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.696 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.963 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=50;, score=0.815 total time=   0.0s
[CV 2/3] END crite

  self._df = pd.read_csv(file_path)


Fitting 3 folds for each of 45 candidates, totalling 135 fits
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.471 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.471 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.783 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.583 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.700 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.741 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.560 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.692 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.714 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=50;, score=0.667 total time=   0.0s
[CV 2/3] END crite

In [None]:
results_df

Unnamed: 0,c_train,c_test,dim,epoch,model,best_param,f1_bestparam,f1_default
0,D-2021,D-2022,100,30,rfc,"{'criterion': 'gini', 'max_depth': 10, 'n_esti...",0.612245,0.612245
1,D-2021,D-2022,100,30,svc,"{'C': 10, 'kernel': 'rbf'}",0.59375,0.603774
2,D-2021,D-2022,100,30,ada,"{'algorithm': 'SAMME', 'learning_rate': 0.5, '...",0.727273,0.666667
3,D-2021,D-2022,100,30,knn,"{'algorithm': 'auto', 'n_neighbors': 10, 'weig...",0.216216,0.390244
4,A-2022,D-2022,100,30,rfc,"{'criterion': 'gini', 'max_depth': 10, 'n_esti...",0.629213,0.615385
5,A-2022,D-2022,100,30,svc,"{'C': 0.001, 'kernel': 'linear'}",0.52381,0.528
6,A-2022,D-2022,100,30,ada,"{'algorithm': 'SAMME.R', 'learning_rate': 2.0,...",0.456522,0.297297
7,A-2022,D-2022,100,30,knn,"{'algorithm': 'auto', 'n_neighbors': 5, 'weigh...",0.547945,0.547945
8,A-2021,D-2022,100,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.516667,0.532258
9,A-2021,D-2022,100,30,svc,"{'C': 1, 'kernel': 'poly'}",0.508772,0.520325


### Table 10 

In [None]:
results_df[(results_df["c_train"]=="A-2021")&(results_df["c_test"]=="A-2022")]

Unnamed: 0,c_train,c_test,dim,epoch,model,best_param,f1_bestparam,f1_default
32,A-2021,A-2022,100,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.666667,0.694444
33,A-2021,A-2022,100,30,svc,"{'C': 1, 'kernel': 'poly'}",0.535714,0.694444
34,A-2021,A-2022,100,30,ada,"{'algorithm': 'SAMME', 'learning_rate': 2.0, '...",0.686567,0.702703
35,A-2021,A-2022,100,30,knn,"{'algorithm': 'auto', 'n_neighbors': 7, 'weigh...",0.5,0.490566


### Table 11

In [None]:
results_df[(results_df["c_train"]=="D-2021")&(results_df["c_test"]=="D-2022")]

Unnamed: 0,c_train,c_test,dim,epoch,model,best_param,f1_bestparam,f1_default
0,D-2021,D-2022,100,30,rfc,"{'criterion': 'gini', 'max_depth': 10, 'n_esti...",0.612245,0.612245
1,D-2021,D-2022,100,30,svc,"{'C': 10, 'kernel': 'rbf'}",0.59375,0.603774
2,D-2021,D-2022,100,30,ada,"{'algorithm': 'SAMME', 'learning_rate': 0.5, '...",0.727273,0.666667
3,D-2021,D-2022,100,30,knn,"{'algorithm': 'auto', 'n_neighbors': 10, 'weig...",0.216216,0.390244


### Table 12

In [None]:
results_df[(results_df["model"]=="rfc")].sort_values(["c_train","c_test"])

Unnamed: 0,c_train,c_test,dim,epoch,model,best_param,f1_bestparam,f1_default
32,A-2021,A-2022,100,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.666667,0.694444
20,A-2021,D-2021,100,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.575342,0.581081
8,A-2021,D-2022,100,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.516667,0.532258
44,A-2022,A-2021,100,30,rfc,"{'criterion': 'gini', 'max_depth': 10, 'n_esti...",0.666667,0.684211
16,A-2022,D-2021,100,30,rfc,"{'criterion': 'gini', 'max_depth': 10, 'n_esti...",0.788462,0.808081
4,A-2022,D-2022,100,30,rfc,"{'criterion': 'gini', 'max_depth': 10, 'n_esti...",0.629213,0.615385
40,D-2021,A-2021,100,30,rfc,"{'criterion': 'gini', 'max_depth': 10, 'n_esti...",0.408163,0.408163
28,D-2021,A-2022,100,30,rfc,"{'criterion': 'gini', 'max_depth': 10, 'n_esti...",0.333333,0.333333
0,D-2021,D-2022,100,30,rfc,"{'criterion': 'gini', 'max_depth': 10, 'n_esti...",0.612245,0.612245
36,D-2022,A-2021,100,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.633333,0.590164


### E2Vec k10

In [None]:
cl_courses = ["D-2022","D-2021","A-2022","A-2021"]
dim=10

In [None]:
columns = ["c_train","c_test","dim","epoch","model","best_param","f1_bestparam","f1_default"]
results_k10_df = pd.DataFrame(columns=columns)

for cl_test in cl_courses:
        for cl_train in cl_courses:
            if cl_test == cl_train:
                continue
            x_train,y_train,x_test,y_test = make_data_set(10,cl_train,cl_test)
            for model_select in ["rfc","svc","ada",'knn']:
                 best_param, best_param_f1, nt_f1 = at_risk_prediction(x_train,y_train,x_test,y_test,model_k=model_select)
                 row = pd.DataFrame(np.array([cl_train,cl_test,10,30,model_select,best_param,best_param_f1,nt_f1]).reshape(1,8),columns=columns)
                 results_k10_df = pd.concat([results_k10_df,row],ignore_index=True)

Fitting 3 folds for each of 45 candidates, totalling 135 fits
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.769 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.696 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.857 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.769 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.696 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.889 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.741 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.667 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.923 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=50;, score=0.714 total time=   0.0s
[CV 2/3] END crite

  self._df = pd.read_csv(file_path)


Fitting 3 folds for each of 45 candidates, totalling 135 fits
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.545 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.609 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.222 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.667 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.400 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.222 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.545 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.476 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.316 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=50;, score=0.545 total time=   0.0s
[CV 2/3] END crite

  self._df = pd.read_csv(file_path)


Fitting 3 folds for each of 45 candidates, totalling 135 fits
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.545 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.609 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.222 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.667 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.400 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.222 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.545 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.476 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.316 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=50;, score=0.545 total time=   0.0s
[CV 2/3] END crite

  self._df = pd.read_csv(file_path)


Fitting 3 folds for each of 45 candidates, totalling 135 fits
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.471 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.556 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.471 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.400 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.471 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.333 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.500 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.526 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.353 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=50;, score=0.588 total time=   0.0s
[CV 2/3] END crite

  self._df = pd.read_csv(file_path)


Fitting 3 folds for each of 45 candidates, totalling 135 fits
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.769 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.696 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.857 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.769 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.696 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.889 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.741 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.667 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.923 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=50;, score=0.714 total time=   0.0s
[CV 2/3] END crite

  self._df = pd.read_csv(file_path)


Fitting 3 folds for each of 45 candidates, totalling 135 fits
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.636 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.640 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.667 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.696 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.786 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.583 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.667 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.759 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.692 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=50;, score=0.667 total time=   0.0s
[CV 2/3] END crite

  self._df = pd.read_csv(file_path)


Fitting 3 folds for each of 45 candidates, totalling 135 fits
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.545 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.609 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.222 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.667 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.400 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.222 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.545 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.476 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.316 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=50;, score=0.545 total time=   0.0s
[CV 2/3] END crite

In [None]:
results_k10_df

Unnamed: 0,c_train,c_test,dim,epoch,model,best_param,f1_bestparam,f1_default
0,D-2021,D-2022,10,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.581818,0.561404
1,D-2021,D-2022,10,30,svc,"{'C': 1, 'kernel': 'poly'}",0.517241,0.478261
2,D-2021,D-2022,10,30,ada,"{'algorithm': 'SAMME.R', 'learning_rate': 0.1,...",0.615385,0.590164
3,D-2021,D-2022,10,30,knn,"{'algorithm': 'auto', 'n_neighbors': 3, 'weigh...",0.341463,0.25641
4,A-2022,D-2022,10,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.405405,0.4
5,A-2022,D-2022,10,30,svc,"{'C': 0.001, 'kernel': 'linear'}",0.52381,0.44186
6,A-2022,D-2022,10,30,ada,"{'algorithm': 'SAMME', 'learning_rate': 1.0, '...",0.388889,0.414634
7,A-2022,D-2022,10,30,knn,"{'algorithm': 'auto', 'n_neighbors': 3, 'weigh...",0.285714,0.333333
8,A-2021,D-2022,10,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.49505,0.520833
9,A-2021,D-2022,10,30,svc,"{'C': 0.001, 'kernel': 'linear'}",0.52381,0.512


### Table 10

In [None]:
results_k10_df[(results_k10_df["c_train"]=="A-2021")&(results_k10_df["c_test"]=="A-2022")]

Unnamed: 0,c_train,c_test,dim,epoch,model,best_param,f1_bestparam,f1_default
32,A-2021,A-2022,10,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.557377,0.539683
33,A-2021,A-2022,10,30,svc,"{'C': 0.001, 'kernel': 'linear'}",0.716049,0.657895
34,A-2021,A-2022,10,30,ada,"{'algorithm': 'SAMME', 'learning_rate': 0.1, '...",0.533333,0.407407
35,A-2021,A-2022,10,30,knn,"{'algorithm': 'auto', 'n_neighbors': 7, 'weigh...",0.580645,0.626866


### Table 11

In [None]:
results_k10_df[(results_k10_df["c_train"]=="D-2021")&(results_k10_df["c_test"]=="D-2022")]

Unnamed: 0,c_train,c_test,dim,epoch,model,best_param,f1_bestparam,f1_default
0,D-2021,D-2022,10,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.581818,0.561404
1,D-2021,D-2022,10,30,svc,"{'C': 1, 'kernel': 'poly'}",0.517241,0.478261
2,D-2021,D-2022,10,30,ada,"{'algorithm': 'SAMME.R', 'learning_rate': 0.1,...",0.615385,0.590164
3,D-2021,D-2022,10,30,knn,"{'algorithm': 'auto', 'n_neighbors': 3, 'weigh...",0.341463,0.25641


### Table 12

In [None]:
results_k10_df[(results_k10_df["model"]=="rfc")].sort_values(["c_train","c_test"])

Unnamed: 0,c_train,c_test,dim,epoch,model,best_param,f1_bestparam,f1_default
32,A-2021,A-2022,10,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.557377,0.539683
20,A-2021,D-2021,10,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.43956,0.673267
8,A-2021,D-2022,10,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.49505,0.520833
44,A-2022,A-2021,10,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.666667,0.717949
16,A-2022,D-2021,10,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.520548,0.675
4,A-2022,D-2022,10,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.405405,0.4
40,D-2021,A-2021,10,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.677419,0.646154
28,D-2021,A-2022,10,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.410256,0.418605
0,D-2021,D-2022,10,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.581818,0.561404
36,D-2022,A-2021,10,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.44898,0.444444


### E2VecA

In [None]:
dim =100

In [None]:
def make_data_set_A(dim,cl_train,cl_test):
    Edudata = r'.\data\EduData_20221028'
    train_vec_path = r'.\data\vectors\norm_Student_Vctors_course{}_{}dim_A20.csv'.format(cl_train,dim)
    test_vec_path = r'.\data\vectors\norm_Student_Vctors_course{}_{}dim_A20.csv'.format(cl_test,dim)
    train_vec = pd.read_csv(train_vec_path).set_index('userid')
    test_vec = pd.read_csv(test_vec_path).set_index('userid')
    train_grade =  la.CourseInformation(files_dir=Edudata, course_id=cl_train).grade_point_df().set_index("userid")
    test_grade = la.CourseInformation(files_dir=Edudata, course_id=cl_test).grade_point_df().set_index("userid")
    grade_dict = {'S':0, 'A':0, 'B':0, 'C':1, 'D':1, 'F':1}
    train_grade =  train_grade.replace(grade_dict)
    test_grade = test_grade.replace(grade_dict)
    train_data = pd.merge(train_vec,train_grade,left_index=True,right_index=True,how="inner")
    test_data = pd.merge(test_vec,test_grade,left_index=True,right_index=True,how="inner")
    x_train = train_data.drop(columns="grade")
    y_train = train_data["grade"]
    x_test = test_data.drop(columns="grade")
    y_test = test_data["grade"]
    return x_train,y_train,x_test,y_test

In [None]:
columns = ["c_train","c_test","dim","epoch","model","best_param","f1_bestparam","f1_default"]
results_A_df = pd.DataFrame(columns=columns)

for cl_test in cl_courses:
        for cl_train in cl_courses:
            if cl_test == cl_train:
                continue
            x_train,y_train,x_test,y_test = make_data_set_A(dim,cl_train,cl_test)
            for model_select in ["rfc","svc","ada",'knn']:
                 best_param, best_param_f1, nt_f1 = at_risk_prediction(x_train,y_train,x_test,y_test,model_k=model_select)
                 row = pd.DataFrame(np.array([cl_train,cl_test,100,30,model_select,best_param,best_param_f1,nt_f1]).reshape(1,8),columns=columns)
                 results_A_df = pd.concat([results_A_df,row],ignore_index=True)

Fitting 3 folds for each of 45 candidates, totalling 135 fits
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.733 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.750 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.923 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.800 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.800 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.929 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.800 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.800 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.929 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=50;, score=0.800 total time=   0.0s
[CV 2/3] END crite

  self._df = pd.read_csv(file_path)


Fitting 3 folds for each of 45 candidates, totalling 135 fits
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.600 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.526 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.267 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.667 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.353 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.267 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.609 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.444 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.250 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=50;, score=0.667 total time=   0.0s
[CV 2/3] END crite

  self._df = pd.read_csv(file_path)


Fitting 3 folds for each of 45 candidates, totalling 135 fits
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.600 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.526 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.267 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.667 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.353 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.267 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.609 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.444 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.250 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=50;, score=0.667 total time=   0.0s
[CV 2/3] END crite

  self._df = pd.read_csv(file_path)


Fitting 3 folds for each of 45 candidates, totalling 135 fits
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.556 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.500 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.600 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.632 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.533 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.526 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.632 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.625 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.632 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=50;, score=0.632 total time=   0.0s
[CV 2/3] END crite

  self._df = pd.read_csv(file_path)


Fitting 3 folds for each of 45 candidates, totalling 135 fits
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.733 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.750 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.923 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.800 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.800 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.929 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.800 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.800 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.929 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=50;, score=0.800 total time=   0.0s
[CV 2/3] END crite

  self._df = pd.read_csv(file_path)


Fitting 3 folds for each of 45 candidates, totalling 135 fits
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.833 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.609 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.692 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.720 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.667 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.692 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.857 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.720 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.692 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=50;, score=0.857 total time=   0.0s
[CV 2/3] END crite

  self._df = pd.read_csv(file_path)


[CV 2/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.526 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.267 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.667 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.353 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.267 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.609 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.444 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.250 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=50;, score=0.667 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=50;, score=0.526 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=50;, score=0.571 total t

In [None]:
results_A_df[(results_A_df["c_train"]=="A-2021")&(results_A_df["c_test"]=="A-2022")]

Unnamed: 0,c_train,c_test,dim,epoch,model,best_param,f1_bestparam,f1_default
32,A-2021,A-2022,100,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.676056,0.708861
33,A-2021,A-2022,100,30,svc,"{'C': 10, 'kernel': 'sigmoid'}",0.712329,0.716049
34,A-2021,A-2022,100,30,ada,"{'algorithm': 'SAMME', 'learning_rate': 0.1, '...",0.657534,0.678571
35,A-2021,A-2022,100,30,knn,"{'algorithm': 'auto', 'n_neighbors': 10, 'weig...",0.657534,0.608696


In [None]:
results_A_df[(results_A_df["c_train"]=="D-2021")&(results_A_df["c_test"]=="D-2022")]

Unnamed: 0,c_train,c_test,dim,epoch,model,best_param,f1_bestparam,f1_default
0,D-2021,D-2022,100,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.641509,0.62963
1,D-2021,D-2022,100,30,svc,"{'C': 1, 'kernel': 'poly'}",0.571429,0.541667
2,D-2021,D-2022,100,30,ada,"{'algorithm': 'SAMME', 'learning_rate': 2.0, '...",0.325581,0.586207
3,D-2021,D-2022,100,30,knn,"{'algorithm': 'auto', 'n_neighbors': 3, 'weigh...",0.478261,0.465116


In [None]:
results_A_df[(results_A_df["model"]=="rfc")].sort_values(["c_train","c_test"])

Unnamed: 0,c_train,c_test,dim,epoch,model,best_param,f1_bestparam,f1_default
0,D-2021,D-2022,100,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.641509,0.62963
4,A-2022,D-2022,100,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.54321,0.56
8,A-2021,D-2022,100,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.529915,0.545455
12,D-2022,D-2021,100,30,rfc,"{'criterion': 'gini', 'max_depth': 10, 'n_esti...",0.825,0.831169
16,A-2022,D-2021,100,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.763636,0.738739
20,A-2021,D-2021,100,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.59854,0.585714
24,D-2022,A-2022,100,30,rfc,"{'criterion': 'gini', 'max_depth': 10, 'n_esti...",0.263158,0.315789
28,D-2021,A-2022,100,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.242424,0.342857
32,A-2021,A-2022,100,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.676056,0.708861
36,D-2022,A-2021,100,30,rfc,"{'criterion': 'gini', 'max_depth': 10, 'n_esti...",0.5,0.489796


### E2VecD

In [None]:
def make_data_set_D(dim,cl_train,cl_test):
    Edudata = r'.\data\EduData_20221028'
    train_vec_path = r'.\data\vectors\norm_Student_Vctors_course{}_{}dim_D20.csv'.format(cl_train,dim)
    test_vec_path = r'.\data\vectors\norm_Student_Vctors_course{}_{}dim_D20.csv'.format(cl_test,dim)
    train_vec = pd.read_csv(train_vec_path).set_index('userid')
    test_vec = pd.read_csv(test_vec_path).set_index('userid')
    train_grade =  la.CourseInformation(files_dir=Edudata, course_id=cl_train).grade_point_df().set_index("userid")
    test_grade = la.CourseInformation(files_dir=Edudata, course_id=cl_test).grade_point_df().set_index("userid")
    grade_dict = {'S':0, 'A':0, 'B':0, 'C':1, 'D':1, 'F':1}
    train_grade =  train_grade.replace(grade_dict)
    test_grade = test_grade.replace(grade_dict)
    train_data = pd.merge(train_vec,train_grade,left_index=True,right_index=True,how="inner")
    test_data = pd.merge(test_vec,test_grade,left_index=True,right_index=True,how="inner")
    x_train = train_data.drop(columns="grade")
    y_train = train_data["grade"]
    x_test = test_data.drop(columns="grade")
    y_test = test_data["grade"]
    return x_train,y_train,x_test,y_test

In [None]:
columns = ["c_train","c_test","dim","epoch","model","best_param","f1_bestparam","f1_default"]
results_D_df = pd.DataFrame(columns=columns)

for cl_test in cl_courses:
        for cl_train in cl_courses:
            if cl_test == cl_train:
                continue
            x_train,y_train,x_test,y_test = make_data_set_D(dim,cl_train,cl_test)
            for model_select in ["rfc","svc","ada",'knn']:
                 best_param, best_param_f1, nt_f1 = at_risk_prediction(x_train,y_train,x_test,y_test,model_k=model_select)
                 row = pd.DataFrame(np.array([cl_train,cl_test,100,30,model_select,best_param,best_param_f1,nt_f1]).reshape(1,8),columns=columns)
                 results_D_df = pd.concat([results_D_df,row],ignore_index=True)

Fitting 3 folds for each of 45 candidates, totalling 135 fits
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.692 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.696 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.815 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.769 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.750 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.880 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.800 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.750 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.880 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=50;, score=0.769 total time=   0.0s
[CV 2/3] END crite

  self._df = pd.read_csv(file_path)


Fitting 3 folds for each of 45 candidates, totalling 135 fits
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.762 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.800 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.588 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.667 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.778 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.625 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.667 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.700 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.625 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=50;, score=0.636 total time=   0.0s
[CV 2/3] END crite

  self._df = pd.read_csv(file_path)


Fitting 3 folds for each of 45 candidates, totalling 135 fits
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.762 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.800 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.588 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.667 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.778 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.625 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.667 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.700 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.625 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=50;, score=0.636 total time=   0.0s
[CV 2/3] END crite

  self._df = pd.read_csv(file_path)


Fitting 3 folds for each of 45 candidates, totalling 135 fits
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.533 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.500 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.556 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.533 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.737 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.632 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.533 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.737 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.588 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=50;, score=0.500 total time=   0.0s
[CV 2/3] END crite

  self._df = pd.read_csv(file_path)


Fitting 3 folds for each of 45 candidates, totalling 135 fits
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.692 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.696 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.815 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.769 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.750 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.880 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.800 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.750 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.880 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=50;, score=0.769 total time=   0.0s
[CV 2/3] END crite

  self._df = pd.read_csv(file_path)


Fitting 3 folds for each of 45 candidates, totalling 135 fits
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.545 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.786 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.640 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.667 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.692 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.714 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.720 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.741 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.759 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=50;, score=0.741 total time=   0.0s
[CV 2/3] END crite

  self._df = pd.read_csv(file_path)


Fitting 3 folds for each of 45 candidates, totalling 135 fits
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.762 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.800 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=10;, score=0.588 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.667 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.778 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=20;, score=0.625 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.667 total time=   0.0s
[CV 2/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.700 total time=   0.0s
[CV 3/3] END criterion=gini, max_depth=10, n_estimators=30;, score=0.625 total time=   0.0s
[CV 1/3] END criterion=gini, max_depth=10, n_estimators=50;, score=0.636 total time=   0.0s
[CV 2/3] END crite

In [None]:
results_D_df[(results_D_df["c_train"]=="A-2021")&(results_D_df["c_test"]=="A-2022")]

Unnamed: 0,c_train,c_test,dim,epoch,model,best_param,f1_bestparam,f1_default
32,A-2021,A-2022,100,30,rfc,"{'criterion': 'gini', 'max_depth': 10, 'n_esti...",0.683544,0.7
33,A-2021,A-2022,100,30,svc,"{'C': 0.001, 'kernel': 'linear'}",0.716049,0.716049
34,A-2021,A-2022,100,30,ada,"{'algorithm': 'SAMME.R', 'learning_rate': 5.0,...",0.666667,0.537313
35,A-2021,A-2022,100,30,knn,"{'algorithm': 'auto', 'n_neighbors': 7, 'weigh...",0.57971,0.657534


In [None]:
results_D_df[(results_D_df["c_train"]=="D-2021")&(results_D_df["c_test"]=="D-2022")]

Unnamed: 0,c_train,c_test,dim,epoch,model,best_param,f1_bestparam,f1_default
0,D-2021,D-2022,100,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.6,0.56
1,D-2021,D-2022,100,30,svc,"{'C': 10, 'kernel': 'poly'}",0.488889,0.390244
2,D-2021,D-2022,100,30,ada,"{'algorithm': 'SAMME.R', 'learning_rate': 0.5,...",0.542373,0.580645
3,D-2021,D-2022,100,30,knn,"{'algorithm': 'auto', 'n_neighbors': 3, 'weigh...",0.380952,0.216216


In [None]:
results_D_df[(results_D_df["model"]=="rfc")].sort_values(["c_train","c_test"])

Unnamed: 0,c_train,c_test,dim,epoch,model,best_param,f1_bestparam,f1_default
0,D-2021,D-2022,100,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.6,0.56
4,A-2022,D-2022,100,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.565217,0.565217
8,A-2021,D-2022,100,30,rfc,"{'criterion': 'gini', 'max_depth': 10, 'n_esti...",0.509091,0.522523
12,D-2022,D-2021,100,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.769231,0.775
16,A-2022,D-2021,100,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.678261,0.654867
20,A-2021,D-2021,100,30,rfc,"{'criterion': 'gini', 'max_depth': 10, 'n_esti...",0.545455,0.626866
24,D-2022,A-2022,100,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.434783,0.444444
28,D-2021,A-2022,100,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.324324,0.277778
32,A-2021,A-2022,100,30,rfc,"{'criterion': 'gini', 'max_depth': 10, 'n_esti...",0.683544,0.7
36,D-2022,A-2021,100,30,rfc,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.346154,0.580645
