In [1]:
# Data management
from google.colab import drive
import pandas as pd

# Additional Scikit-Learn imports
from sklearn.model_selection import StratifiedKFold

# Scikit-Learn's ML models
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier, AdaBoostRegressor, AdaBoostClassifier

# Additional Metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score

# Fast.ai DNN Model
from fastai.tabular import *

print('Imports complete.')

Imports complete.


In [2]:
# Objects used to help manage the metrics data
class Metric:
    def __init__(self, name, fold):
        self.name = name
        self.fold_num = fold
        self.values = {}

    def __str__(self):
        return str({self.name: self.values})

    def __repr__(self):
        return str({self.name: self.values})

    def addValue(self, m_type, value):
        if m_type != None and value != None:
            self.values[m_type] = value

    def getValue(self, m_type):
        if m_type in self.values:
            return self.values[m_type]

    def getName(self):
        return self.name

    def getMeasures(self):
        # Retuns all the types of measurements (accuracy or time or whatever you have)
        return self.values.keys()

    def getValues(self):
        return self.values

    def containsType(self, m_type):
        # Checks to see if the measurement type (accuracy for example) is contained in here
        if type(m_type) == list:
            for m in m_type:
                if m not in self.values:
                    return False
            return True
        elif type(m_type) == str:
            if m_type in self.values:
                return True
            else:
                return False
        else:
            return False

    def getMetricWithMeasure(self, m_type):
        # Return a metric with only the data requested, which may be in list format if there is more than one measurement desired
        if type(m_type) == list:
            new_metric = Metric(self.name, fold=self.fold_num)
            for m in m_type:
                new_metric.addValue(m, self.values[m])

            return new_metric
        elif type(m_type) == str:
            new_metric = Metric(self.name, fold=self.fold_num)
            new_metric.addValue(m_type, self.values[m_type])

            return new_metric

class MetricsManager:
    def __init__(self):
        self.metrics_list = []
    
    def getMetrics(self, model_name='all', m_type='all'):
        # If they want everything, give them everything
        if model_name == 'all' and m_type == 'all':
            return self.metrics_list
        # If they want a list of models, the conditional in the lambda function changes a little bit
        elif type(model_name) == list:
            # This line is a blast! It searches through all of the metrics the manager knows about, and returns all the metrics that have both the name and metrics the user wants in a list
            return list(
                filter(
                    None, 
                    map( 
                        lambda m : m.getMetricWithMeasure(m_type) if (m.getName() in model_name) and (m.containsType(m_type) or m_type == 'all') else None, 
                        self.metrics_list
                    )
                )
            )
        # Return the data requested as per the terrible line below
        else:
            # This line is a blast! It searches through all of the metrics the manager knows about, and returns all the metrics that have both the name and metrics the user wants in a list
            return list(
                filter(
                    None, 
                    map( 
                        lambda m : m.getMetricWithMeasure(m_type) if (m.getName() == model_name or model_name == 'all') and (m.containsType(m_type) or m_type == 'all') else None, 
                        self.metrics_list
                    )
                )
            )

    def addMetric(self, metric):
        self.metrics_list.append(metric)

    def printMeasures(self, model='all', metrics='all'):
        # Acquire all of the metrics the user wants us to print first so there's no weird filtering going on later
        metrics = self.getMetrics(model_name=model, m_type=metrics)

        # Figure out all of the metrics that are going to be available and figure out their ordering
        #   If we are printing time and accuracy data, we want the columns to be consistent
        measurements = []
        for metric in metrics:
            metric_measures = metric.getMeasures()
            for measure in metric_measures:
                if measure not in measurements:
                    measurements.append(measure)

        # Formatting for the header, we need to print the model column name, then each of the values collected
        print('{:10}'.format('model'), end='')
        for measure in measurements:
            print('{:11}'.format(measure), end='')
        print('\n', end='')
        print('-------'*(len(measurements)+1))

        # Go through all of the models and print their data one line at a time
        printed_models = []
        for metric in metrics:
            metric_name = metric.getName()
            
            # If the model hasn't already been printed (this can happen if I have multiple folds for one classifier), then print the data
            if metric_name not in printed_models:
                print('{:9}'.format(metric_name), end='')

                # Get all of the values stored in this metric (it's in a dictionary)
                metric_values = metric.getValues()
                
                # Go through all of the measurement values in the order as determined above
                for measure in measurements:
                    if measure in metric_values:
                        # We need to go through all of the data and calculate the mean and std deviation from each fold
                        #  If there are no folds, then this won't cause any damage (Keep unique identifiers!)
                        vals = []
                        for m in metrics:
                            if m.getName() == metric_name:
                                vals.append(m.getValues()[measure])
                        # Print the calcuated mean and standard deviations
                        print('{:6.2f}\u00B1{:6<.2f}'.format(np.mean(vals), np.std(vals)), end='')
                    # If there is no value to print, just skip over this element
                    else:
                        print(' '*11, end='')
                # Make note of the model we just printed. We don't want any repeats
                printed_models.append(metric_name)
                print('\n', end='')

In [3]:
def train_and_eval_on(X, y, feature_set, metrics_manager):
    """
    train_and_eval_on function
        Description: This function will train all the models on the given feature set of the X (data) for predicting y (target) and add the acquired metrics 
          to the MetricsManager object from the user

        Args: 
            X => pd.DataFrame object containing the data
            y => pd.Series object containings the target classifications
            feature_set => list of features in X to use for training
            metrics_manager => MetricsManager object (custom)

        Returns:
            Nothing
        
        Keys used for the manager:
                        Random Forest => rf
                        Decision Tree => dt
                        k-Nearest Neighbors => knn
                        Support Vector Machine => svm
                        Logistic Regression => lr
                        Linear Discriminant Analysis => lda
                        AdaBoost => ab
                        Naive Bayes => nb
                        Keras-TensorFlow => keras
                        Fast.ai => fastai
    """

    # Select the given features within the data
    X = X[feature_set]

    print('Training with {} features'.format(len(X.columns)))

    # Create stratified, 10-fold cross validation object
    random_state = 0
    sss = StratifiedKFold(n_splits=10, shuffle=True, random_state=random_state)

    i=1

    # Experiment with 10-fold cross validation
    for train_idx, test_idx in sss.split(X, y):

        print('fold num {}'.format(i))
        i+=1

        # Split the data into the training and testing sets
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

        # Random Forest Model
        rf = RandomForestClassifier(random_state=random_state)
        rf.fit(X_train, y_train)

        y_pred = rf.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        rec = recall_score(y_test, y_pred, average='weighted')
        prec = precision_score(y_test, y_pred, average='weighted')

        m = Metric('rf', fold=i)
        m.addValue('acc', 100*acc)
        m.addValue('rec', 100*rec)
        m.addValue('prec', 100*prec)
        mm.addMetric(m)

        """# Decision Tree Model
        dt = DecisionTreeClassifier(random_state=random_state)
        dt.fit(X_train, y_train)
        score = dt.score(X_test, y_test)

        m = Metric('dt', fold=i)
        m.addValue('acc', 100*score)
        mm.addMetric(m)

        # k-Nearest Neighbors Model
        knn = KNeighborsClassifier()
        knn.fit(X_train, y_train)
        score = knn.score(X_test, y_test)

        m = Metric('knn', fold=i)
        m.addValue('acc', 100*score)
        mm.addMetric(m)

        # Support Vector Machine Model
        svm = SVC(random_state=random_state)
        svm.fit(X_train, y_train)
        score = svm.score(X_test, y_test)

        m = Metric('svm', fold=i)
        m.addValue('acc', 100*score)
        mm.addMetric(m)

        # Logistic Regression Model
        lr = LogisticRegression(random_state=random_state)
        lr.fit(X_train, y_train)
        score = lr.score(X_test, y_test)

        m = Metric('lr', fold=i)
        m.addValue('acc', 100*score)
        mm.addMetric(m)

        # Linear Discriminant Analysis Model
        lda = LinearDiscriminantAnalysis()
        lda.fit(X_train, y_train)
        score = lda.score(X_test, y_test)

        m = Metric('lda', fold=i)
        m.addValue('acc', 100*score)
        mm.addMetric(m)"""

        # AdaBoost Model
        ab = AdaBoostClassifier(random_state=random_state)
        ab.fit(X_train, y_train)

        y_pred = rf.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        rec = recall_score(y_test, y_pred, average='weighted')
        prec = precision_score(y_test, y_pred, average='weighted')

        m = Metric('ab', fold=i)
        m.addValue('acc', 100*acc)
        m.addValue('rec', 100*rec)
        m.addValue('prec', 100*prec)
        mm.addMetric(m)

        """# Naive Bayes Model
        nb = GaussianNB()
        nb.fit(X_train, y_train)
        score = nb.score(X_test, y_test)

        m = Metric('nb', fold=i)
        m.addValue('acc', 100*score)
        mm.addMetric(m)

        # Keras-TensorFlow DNN Model
        dnn_keras = Sequential(layers=[
                                 Dense(128, kernel_regularizer=l2(0.001), activation='relu',input_shape=(len(X_train.columns),)),
                                 BatchNormalization(),
                                 Dense(64, activation='relu', kernel_regularizer=l2(0.001)),
                                 BatchNormalization(),
                                 Dense(y_train.nunique(), activation='softmax')
        ])
        dnn_keras.compile(
            optimizer='adam', 
            loss='categorical_crossentropy', 
            metrics=['accuracy'])
        dnn_keras.fit(X_train, pd.get_dummies(y_train), epochs=100, verbose=0, batch_size=512)
        _, score = dnn_keras.evaluate(X_test, pd.get_dummies(y_test), verbose=0)

        m = Metric('keras', fold=i)
        m.addValue('acc', 100*score)
        mm.addMetric(m)"""

        # Fast.ai DNN Model
        data_fold = (TabularList.from_df(df, path=path, cont_names=X_train.columns, procs=[Categorify, Normalize])
                     .split_by_idxs(train_idx, test_idx)
                     .label_from_df(cols=dep_var)
                     .databunch(num_workers=0))
        dnn_fastai = tabular_learner(data_fold, layers=[200, 100], metrics=accuracy)
        dnn_fastai.fit_one_cycle(cyc_len=10, callbacks=None)
        _, score = dnn_fastai.validate()

        # acquire predictions
        y_pred = []
        #print('Length of test set: {}'.format(len(y_test)))
        for j in range(len(y_test)):
            row, clas, probs = dnn_fastai.predict(X_test.iloc[j])
            #print(clas)
            pred = 0
            if clas == tensor(1):
                pred = 1
            y_pred.append(pred)
            """actual = y_test.iloc[i]
            if pred == actual:
                print('Correct!')
            else:
                print('Incorrect!')
            print('Prediction: {}'.format(pred))
            print('Actual: {}'.format(actual))
            print('-'*20)"""

        acc = accuracy_score(y_test, y_pred)
        rec = recall_score(y_test, y_pred, average='weighted')
        prec = precision_score(y_test, y_pred, average='weighted')

        m = Metric('fastai', fold=i)
        m.addValue('acc', 100*acc)
        m.addValue('rec', 100*rec)
        m.addValue('prec', 100*prec)
        mm.addMetric(m)

In [4]:
def balance_data(df, label_column):
    labels = df[label_column].unique()
    sample_length_list = []
    for i in range(len(labels)):
        samples = df.loc[ df[label_column] == labels[i] ]
        sample_length_list.append( len(samples) )
        #print('Number of {} samples: {}'.format(labels[i], len( samples )))

    random_state = 0
    smallest_count = min(sample_length_list)
    dfs = []
    for i in range(len(labels)):
        #dfs.append( df.loc[ df[label_column] == labels[i] ].sample(smallest_count) )

        # We are only sampling 40 purely for testing reasons to help speed up the dev process!
        # Uncomment the line above this to actually run the complete tests
        dfs.append( df.loc[ df[label_column] == labels[i] ].sample(40) )

    return pd.concat(dfs)

## Import Data

In [5]:
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [6]:
!ls /content/gdrive/My\ Drive/PredictingMOOCDropout

export.pkl  test_normalized_trimmed_features.csv
models	    train_normalized_trimmed_features.csv


In [7]:
path = '/content/gdrive/My Drive/PredictingMOOCDropout/'

# Import the training data
f_train = 'train_normalized_trimmed_features.csv'
df_train = pd.read_csv(path + f_train)

f_test = 'test_normalized_trimmed_features.csv'
df_test = pd.read_csv(path + f_test)

In [8]:
df_train.head()

Unnamed: 0,enroll_id,action_count,session_count,seek_video_count,play_video_count,pause_video_count,stop_video_count,load_video_count,problem_get_count,problem_check_count,problem_save_count,reset_problem_count,problem_check_correct_count,problem_check_incorrect_count,create_thread_count,create_comment_count,delete_thread_count,delete_comment_count,click_info_count,click_courseware_count,click_about_count,click_forum_count,click_progress_count,close_courseware_count,truth,username,course_id,age,gender,education,user_enroll_count,course_enroll_count,course_category
0,772,-0.228387,-0.228387,-0.243043,-0.296739,-0.333535,-0.059298,-0.480968,-0.042412,-0.305844,-0.202161,-0.103969,-0.337528,-0.237532,-0.104921,-0.103794,-0.020105,-0.037288,-0.384412,-0.428333,-0.293695,-0.17735,-0.284796,-0.464309,1,5981,course-v1:TsinghuaX+70800232X+2015_T2,29,0,3,6,1333,0
1,773,-0.204883,-0.204883,-0.115253,-0.242027,-0.296012,-0.059298,-0.445267,-0.042412,-0.305844,-0.202161,-0.103969,-0.337528,-0.237532,-0.104921,-0.103794,-0.020105,-0.037288,-0.27836,-0.394335,-0.159877,-0.17735,-0.284796,-0.428065,1,1544995,course-v1:TsinghuaX+70800232X+2015_T2,0,0,0,2,1333,0
2,774,-0.13932,-0.13932,-0.051358,-0.105248,-0.20846,-0.057709,-0.195358,-0.042412,-0.305844,-0.202161,-0.103969,-0.337528,-0.237532,-0.104921,-0.103794,-0.020105,-0.037288,0.14585,-0.139346,-0.427513,-0.17735,-0.284796,-0.138111,1,1072798,course-v1:TsinghuaX+70800232X+2015_T2,0,0,0,5,1333,0
3,776,-0.207357,-0.207357,-0.243043,-0.255705,-0.296012,-0.057709,-0.409566,-0.042412,-0.305844,-0.202161,-0.103969,-0.337528,-0.237532,-0.104921,-0.103794,-0.020105,-0.037288,-0.27836,-0.360336,-0.159877,-0.17735,-0.284796,-0.391821,0,561867,course-v1:TsinghuaX+70800232X+2015_T2,37,0,6,6,1333,0
4,777,-0.225912,-0.225912,-0.243043,-0.296739,-0.333535,-0.059298,-0.480968,-0.042412,-0.305844,-0.202161,-0.103969,-0.337528,-0.237532,-0.104921,-0.103794,-0.020105,-0.037288,-0.384412,-0.411334,-0.293695,-0.17735,-0.284796,-0.428065,1,1368125,course-v1:TsinghuaX+70800232X+2015_T2,0,0,0,11,1333,0


In [9]:
df_test.head()

Unnamed: 0,enroll_id,action_count,session_count,seek_video_count,play_video_count,pause_video_count,stop_video_count,load_video_count,problem_get_count,problem_check_count,problem_save_count,reset_problem_count,problem_check_correct_count,problem_check_incorrect_count,create_thread_count,create_comment_count,delete_thread_count,delete_comment_count,click_info_count,click_courseware_count,click_about_count,click_forum_count,click_progress_count,close_courseware_count,truth,username,course_id,age,gender,education,user_enroll_count,course_enroll_count,course_category
0,131072,-0.225912,-0.225912,-0.243043,-0.296739,-0.333535,-0.059298,-0.480968,-0.042412,-0.305844,-0.202161,-0.103969,-0.337528,-0.237532,-0.104921,-0.103794,-0.020105,-0.037288,-0.490464,-0.428333,0.107758,-0.17735,-0.284796,-0.464309,1,1051125,course-v1:TsinghuaX+20220053X_2015_T2+2015_T2,0,0,0,7,687,0
1,131073,-0.224675,-0.224675,-0.243043,-0.296739,-0.333535,-0.059298,-0.445267,-0.042412,-0.305844,-0.202161,-0.103969,-0.337528,-0.237532,-0.104921,-0.103794,-0.020105,-0.037288,-0.490464,-0.428333,-0.427513,-0.17735,-0.284796,-0.319333,0,255243,course-v1:TsinghuaX+20220053X_2015_T2+2015_T2,0,0,0,4,687,0
2,393217,-0.225912,-0.225912,-0.243043,-0.296739,-0.333535,-0.059298,-0.480968,-0.036598,-0.305844,0.175968,-0.103969,-0.337528,-0.237532,-0.104921,-0.103794,-0.020105,-0.037288,-0.490464,-0.428333,-0.427513,-0.17735,-0.284796,-0.464309,1,3691982,course-v1:UC_BerkeleyX+ColWri2_1x_2015_T1+2016_TS,0,0,0,3,735,0
3,393221,-0.20612,-0.20612,-0.243043,-0.269383,-0.296012,-0.059298,-0.409566,-0.042412,-0.305844,-0.202161,-0.103969,-0.337528,-0.237532,-0.104921,-0.103794,-0.020105,-0.037288,-0.27836,-0.326338,-0.159877,-0.17735,-0.284796,-0.355577,1,368763,course-v1:UC_BerkeleyX+ColWri2_1x_2015_T1+2016_TS,23,0,1,2,735,0
4,131079,-0.229624,-0.229624,-0.243043,-0.296739,-0.333535,-0.059298,-0.480968,-0.042412,-0.305844,-0.202161,-0.103969,-0.337528,-0.237532,-0.104921,-0.103794,-0.020105,-0.037288,-0.490464,-0.428333,-0.293695,-0.17735,-0.284796,-0.464309,1,938081,course-v1:TsinghuaX+20220053X_2015_T2+2015_T2,0,0,0,10,687,0


In [10]:
# Features that could lead to overfitting the models
bad_features = ['enroll_id', 'username', 'course_id']
df_train.drop(labels=bad_features, axis='columns', inplace=True)
df_test.drop(labels=bad_features, axis='columns', inplace=True)

In [11]:
df = pd.concat([df_train, df_test])

In [12]:
dep_var = 'truth'
X = df.loc[:, df.columns != dep_var]
y = df[dep_var]

## Experiments

In [13]:
mm = MetricsManager()
train_and_eval_on(X=X, y=y, feature_set=X.columns, metrics_manager=mm)

Training with 29 features
fold num 1


epoch,train_loss,valid_loss,accuracy,time
0,0.41455,0.42002,0.826767,00:15
1,0.395651,0.425486,0.825704,00:15
2,0.389056,0.433794,0.831243,00:15
3,0.413788,0.420696,0.829204,00:15
4,0.389053,0.413973,0.833592,00:15
5,0.389652,0.419809,0.832528,00:15
6,0.38912,0.436162,0.832573,00:15
7,0.395395,0.427314,0.834257,00:15
8,0.390161,0.422274,0.83142,00:15
9,0.39778,0.4125,0.826944,00:15


fold num 2


epoch,train_loss,valid_loss,accuracy,time
0,0.398441,0.411241,0.830091,00:15
1,0.41159,0.411468,0.837314,00:15
2,0.405057,0.409861,0.833991,00:15
3,0.403794,0.400838,0.839131,00:15
4,0.402891,0.455626,0.835497,00:16
5,0.377242,0.397548,0.837447,00:16
6,0.399563,0.432165,0.837625,00:16
7,0.38154,0.402418,0.83337,00:15
8,0.384515,0.406989,0.833946,00:16
9,0.391955,0.40485,0.831642,00:15


fold num 3


epoch,train_loss,valid_loss,accuracy,time
0,0.405307,0.41668,0.827912,00:15
1,0.411554,0.398179,0.834781,00:16
2,0.40088,0.405884,0.830349,00:16
3,0.406206,0.405797,0.833895,00:16
4,0.395193,0.402542,0.831413,00:16
5,0.401453,0.397937,0.836731,00:16
6,0.387468,0.398396,0.832831,00:16
7,0.398589,0.399902,0.835402,00:16
8,0.395371,0.396659,0.835623,00:15
9,0.390018,0.397639,0.835313,00:16


fold num 4


epoch,train_loss,valid_loss,accuracy,time
0,0.412618,0.427977,0.829596,00:15
1,0.399732,0.412888,0.832122,00:15
2,0.402457,0.404537,0.83487,00:15
3,0.404159,0.399636,0.836598,00:16
4,0.409944,0.406723,0.835712,00:16
5,0.38484,0.393645,0.837396,00:16
6,0.389966,0.401507,0.833452,00:16
7,0.394014,0.41504,0.833319,00:15
8,0.38735,0.425582,0.835978,00:15
9,0.396085,0.402072,0.834116,00:15


fold num 5


epoch,train_loss,valid_loss,accuracy,time
0,0.411962,0.410398,0.832432,00:15
1,0.403979,0.419702,0.828222,00:16
2,0.392298,0.396361,0.837795,00:15
3,0.409784,0.398827,0.837086,00:16
4,0.391494,0.403299,0.838282,00:16
5,0.395082,0.400826,0.838548,00:16
6,0.382762,0.394239,0.840144,00:16
7,0.374688,0.402858,0.836775,00:16
8,0.382462,0.394168,0.837883,00:15
9,0.384381,0.396434,0.83877,00:16


fold num 6


epoch,train_loss,valid_loss,accuracy,time
0,0.402421,0.448234,0.831767,00:15
1,0.408654,0.410086,0.830394,00:16
2,0.402895,0.41374,0.832521,00:16
3,0.392081,0.434127,0.834737,00:16
4,0.409635,0.437265,0.833319,00:15
5,0.385416,0.421761,0.835534,00:16
6,0.388855,0.420674,0.834471,00:16
7,0.389524,0.423686,0.834958,00:16
8,0.396078,0.475529,0.838016,00:16
9,0.391135,0.413772,0.835224,00:15


fold num 7


epoch,train_loss,valid_loss,accuracy,time
0,0.404844,0.43971,0.828975,00:15
1,0.412203,0.408678,0.833363,00:15
2,0.410921,0.400355,0.835357,00:15
3,0.399788,0.427749,0.83097,00:15
4,0.398958,0.407049,0.83713,00:15
5,0.374202,0.403951,0.836687,00:15
6,0.401004,0.400329,0.835402,00:15
7,0.385794,0.411235,0.835623,00:16
8,0.38374,0.40822,0.836908,00:15
9,0.387586,0.420127,0.834648,00:15


fold num 8


epoch,train_loss,valid_loss,accuracy,time
0,0.409407,0.420235,0.826316,00:15
1,0.399375,0.421103,0.836465,00:15
2,0.409664,0.413423,0.83385,00:15
3,0.405225,0.408622,0.834515,00:15
4,0.397223,0.409374,0.834382,00:15
5,0.396303,0.410655,0.831413,00:15
6,0.39437,0.419046,0.836022,00:15
7,0.388092,0.425941,0.836244,00:15
8,0.388516,0.41213,0.836908,00:15
9,0.411777,0.408138,0.838947,00:15


fold num 9


epoch,train_loss,valid_loss,accuracy,time
0,0.410407,0.413165,0.828133,00:15
1,0.402583,0.403766,0.833407,00:15
2,0.38373,0.401089,0.837529,00:15
3,0.397814,0.402699,0.836022,00:15
4,0.392914,0.403342,0.83323,00:15
5,0.385508,0.401522,0.837617,00:15
6,0.399406,0.412848,0.836687,00:15
7,0.38928,0.406288,0.834072,00:15
8,0.390613,0.399045,0.836775,00:15
9,0.390369,0.407285,0.836598,00:15


fold num 10


epoch,train_loss,valid_loss,accuracy,time
0,0.402577,0.416273,0.829596,00:15
1,0.400803,0.414526,0.831546,00:15
2,0.404662,0.437431,0.832964,00:15
3,0.38901,0.410341,0.829906,00:15
4,0.386004,0.412122,0.835003,00:15
5,0.387277,0.421293,0.834161,00:15
6,0.403783,0.405619,0.832388,00:15
7,0.384687,0.40639,0.830571,00:15
8,0.386701,0.40811,0.832831,00:15
9,0.38211,0.415304,0.832742,00:15


In [14]:
mm.printMeasures()

model     acc        rec        prec       
----------------------------
rf        83.87±0.19 83.87±0.19 82.99±0.21
ab        83.87±0.19 83.87±0.19 82.99±0.21
fastai    83.45±0.33 83.45±0.33 82.94±0.29
