In [32]:
import pandas as pd
import numpy as np

In [33]:
def get_avg_folds(df, models):
    result = pd.DataFrame(columns=["Model","Accuracy","Balanced Accuracy", "Precision-Macro",  "Precision-Weighted", "Recall-Macro", "Recall-Weighted", "F1-Score-Macro", "F1-Score-Weighted"], index=np.arange(len(models)))

    for i, model in enumerate(models):
        model_folds = df.loc[df['Model'] == model]
        result.loc[i]["Model"] = model
        result.loc[i]["Accuracy"] = round(np.mean(model_folds['Accuracy'].to_list()), 3)
        result.loc[i]["Balanced Accuracy"] = round(np.mean(model_folds['Balanced Accuracy'].to_list()), 3)
        result.loc[i]["Precision-Macro"] = round(np.mean(model_folds['Precision-Macro'].to_list()), 3)
        result.loc[i]["Precision-Weighted"] = round(np.mean(model_folds['Precision-Weighted'].to_list()), 3)
        result.loc[i]["Recall-Macro"] = round(np.mean(model_folds['Recall-Macro'].to_list()), 3)
        result.loc[i]["Recall-Weighted"] = round(np.mean(model_folds['Recall-Weighted'].to_list()), 3)
        result.loc[i]["F1-Score-Macro"] = round(np.mean(model_folds['F1-Score-Macro'].to_list()), 3)
        result.loc[i]["F1-Score-Weighted"] = round(np.mean(model_folds['F1-Score-Weighted'].to_list()), 3)

    return result

## FASTTEXT

In [34]:
df = pd.read_excel("results/10-Fold-fasttext-davidson.xlsx")
del df['Unnamed: 0']
df

Unnamed: 0,Fold,Model,Accuracy,Balanced Accuracy,Precision-Macro,Precision-Weighted,Recall-Macro,Recall-Weighted,F1-Score-Macro,F1-Score-Weighted
0,1,AdaBoostClassifier,0.644,0.362,0.262,0.435,0.362,0.644,0.302,0.516
1,1,BaggingClassifier,0.994,0.911,0.973,0.994,0.911,0.994,0.936,0.994
2,1,BernoulliNB,0.869,0.781,0.783,0.884,0.781,0.869,0.759,0.872
3,1,CalibratedClassifierCV,0.988,0.851,0.867,0.987,0.851,0.988,0.858,0.988
4,1,DecisionTreeClassifier,0.989,0.910,0.965,0.989,0.910,0.989,0.932,0.989
...,...,...,...,...,...,...,...,...,...,...
245,10,RidgeClassifier,0.947,0.593,0.596,0.921,0.593,0.947,0.593,0.933
246,10,RidgeClassifierCV,0.947,0.593,0.596,0.921,0.593,0.947,0.593,0.933
247,10,SGDClassifier,0.973,0.694,0.721,0.969,0.694,0.973,0.707,0.971
248,10,SVC,0.975,0.804,0.838,0.975,0.804,0.975,0.818,0.975


In [35]:
models = df['Model'].copy()[:25].to_list()
models

['AdaBoostClassifier',
 'BaggingClassifier',
 'BernoulliNB',
 'CalibratedClassifierCV',
 'DecisionTreeClassifier',
 'DummyClassifier',
 'ExtraTreeClassifier',
 'ExtraTreesClassifier',
 'GaussianNB',
 'KNeighborsClassifier',
 'LabelPropagation',
 'LabelSpreading',
 'LinearDiscriminantAnalysis',
 'LinearSVC',
 'LogisticRegression',
 'NearestCentroid',
 'PassiveAggressiveClassifier',
 'Perceptron',
 'QuadraticDiscriminantAnalysis',
 'RandomForestClassifier',
 'RidgeClassifier',
 'RidgeClassifierCV',
 'SGDClassifier',
 'SVC',
 'LGBMClassifier']

In [36]:
result = get_avg_folds(df, models)
result

Unnamed: 0,Model,Accuracy,Balanced Accuracy,Precision-Macro,Precision-Weighted,Recall-Macro,Recall-Weighted,F1-Score-Macro,F1-Score-Weighted
0,AdaBoostClassifier,0.631,0.372,0.287,0.492,0.367,0.631,0.297,0.52
1,BaggingClassifier,0.995,0.93,0.945,0.995,0.93,0.995,0.933,0.994
2,BernoulliNB,0.858,0.723,0.682,0.874,0.713,0.858,0.67,0.862
3,CalibratedClassifierCV,0.99,0.867,0.912,0.99,0.867,0.99,0.882,0.99
4,DecisionTreeClassifier,0.992,0.931,0.938,0.992,0.931,0.992,0.925,0.992
5,DummyClassifier,0.414,0.127,0.052,0.172,0.127,0.414,0.074,0.243
6,ExtraTreeClassifier,0.921,0.715,0.711,0.922,0.705,0.921,0.699,0.922
7,ExtraTreesClassifier,0.985,0.81,0.876,0.985,0.81,0.985,0.835,0.984
8,GaussianNB,0.923,0.897,0.717,0.946,0.886,0.923,0.74,0.932
9,KNeighborsClassifier,0.861,0.588,0.733,0.86,0.588,0.861,0.618,0.854


In [38]:
result.to_excel("results/avg-10-Fold-fasttext-davidson.xlsx")

## GLOVE

In [39]:
df = pd.read_excel("results/10-Fold-glove-davidson.xlsx")
del df['Unnamed: 0']
df

Unnamed: 0,Fold,Model,Accuracy,Balanced Accuracy,Precision-Macro,Precision-Weighted,Recall-Macro,Recall-Weighted,F1-Score-Macro,F1-Score-Weighted
0,1,AdaBoostClassifier,0.644,0.360,0.280,0.448,0.360,0.644,0.303,0.517
1,1,BaggingClassifier,0.994,0.913,0.973,0.994,0.913,0.994,0.937,0.994
2,1,BernoulliNB,0.768,0.663,0.524,0.817,0.663,0.768,0.555,0.786
3,1,CalibratedClassifierCV,0.987,0.891,0.919,0.987,0.891,0.987,0.903,0.987
4,1,DecisionTreeClassifier,0.990,0.932,0.920,0.991,0.932,0.990,0.924,0.990
...,...,...,...,...,...,...,...,...,...,...
245,10,RidgeClassifier,0.945,0.592,0.595,0.919,0.592,0.945,0.592,0.931
246,10,RidgeClassifierCV,0.945,0.592,0.595,0.919,0.592,0.945,0.592,0.931
247,10,SGDClassifier,0.974,0.699,0.726,0.969,0.699,0.974,0.711,0.971
248,10,SVC,0.974,0.791,0.856,0.974,0.791,0.974,0.815,0.973


In [40]:
models = df['Model'].copy()[:25].to_list()
models

['AdaBoostClassifier',
 'BaggingClassifier',
 'BernoulliNB',
 'CalibratedClassifierCV',
 'DecisionTreeClassifier',
 'DummyClassifier',
 'ExtraTreeClassifier',
 'ExtraTreesClassifier',
 'GaussianNB',
 'KNeighborsClassifier',
 'LabelPropagation',
 'LabelSpreading',
 'LinearDiscriminantAnalysis',
 'LinearSVC',
 'LogisticRegression',
 'NearestCentroid',
 'PassiveAggressiveClassifier',
 'Perceptron',
 'QuadraticDiscriminantAnalysis',
 'RandomForestClassifier',
 'RidgeClassifier',
 'RidgeClassifierCV',
 'SGDClassifier',
 'SVC',
 'LGBMClassifier']

In [41]:
result = get_avg_folds(df, models)
result

Unnamed: 0,Model,Accuracy,Balanced Accuracy,Precision-Macro,Precision-Weighted,Recall-Macro,Recall-Weighted,F1-Score-Macro,F1-Score-Weighted
0,AdaBoostClassifier,0.64,0.408,0.344,0.543,0.408,0.64,0.328,0.536
1,BaggingClassifier,0.995,0.929,0.962,0.995,0.929,0.995,0.938,0.994
2,BernoulliNB,0.765,0.658,0.511,0.812,0.649,0.765,0.541,0.782
3,CalibratedClassifierCV,0.99,0.863,0.904,0.99,0.863,0.99,0.877,0.99
4,DecisionTreeClassifier,0.992,0.915,0.904,0.992,0.915,0.992,0.903,0.992
5,DummyClassifier,0.414,0.127,0.052,0.172,0.127,0.414,0.074,0.243
6,ExtraTreeClassifier,0.921,0.723,0.725,0.922,0.723,0.921,0.72,0.921
7,ExtraTreesClassifier,0.982,0.796,0.888,0.982,0.796,0.982,0.826,0.981
8,GaussianNB,0.92,0.897,0.706,0.945,0.886,0.92,0.729,0.93
9,KNeighborsClassifier,0.852,0.571,0.708,0.849,0.571,0.852,0.602,0.844


In [42]:
result.to_excel("results/avg-10-Fold-glove-davidson.xlsx")

## BERT

In [43]:
df = pd.read_excel("results/10-Fold-BERT-davidson.xlsx")
del df['Unnamed: 0']
df

Unnamed: 0,Fold,Model,Accuracy,Balanced Accuracy,Precision-Macro,Precision-Weighted,Recall-Macro,Recall-Weighted,F1-Score-Macro,F1-Score-Weighted
0,1,AdaBoostClassifier,0.642,0.357,0.262,0.434,0.357,0.642,0.301,0.515
1,1,BaggingClassifier,0.996,0.942,0.990,0.996,0.942,0.996,0.961,0.996
2,1,BernoulliNB,0.827,0.656,0.660,0.858,0.656,0.827,0.639,0.837
3,1,CalibratedClassifierCV,0.989,0.932,0.992,0.989,0.932,0.989,0.958,0.989
4,1,DecisionTreeClassifier,0.993,0.895,0.953,0.993,0.895,0.993,0.902,0.993
...,...,...,...,...,...,...,...,...,...,...
245,10,RidgeClassifier,0.948,0.595,0.597,0.922,0.595,0.948,0.595,0.934
246,10,RidgeClassifierCV,0.948,0.595,0.597,0.922,0.595,0.948,0.595,0.934
247,10,SGDClassifier,0.979,0.711,0.732,0.975,0.711,0.979,0.721,0.977
248,10,SVC,0.981,0.797,0.864,0.981,0.797,0.981,0.824,0.981


In [44]:
models = df['Model'].copy()[:25].to_list()
models

['AdaBoostClassifier',
 'BaggingClassifier',
 'BernoulliNB',
 'CalibratedClassifierCV',
 'DecisionTreeClassifier',
 'DummyClassifier',
 'ExtraTreeClassifier',
 'ExtraTreesClassifier',
 'GaussianNB',
 'KNeighborsClassifier',
 'LabelPropagation',
 'LabelSpreading',
 'LinearDiscriminantAnalysis',
 'LinearSVC',
 'LogisticRegression',
 'NearestCentroid',
 'PassiveAggressiveClassifier',
 'Perceptron',
 'QuadraticDiscriminantAnalysis',
 'RandomForestClassifier',
 'RidgeClassifier',
 'RidgeClassifierCV',
 'SGDClassifier',
 'SVC',
 'LGBMClassifier']

In [45]:
result = get_avg_folds(df, models)
result

Unnamed: 0,Model,Accuracy,Balanced Accuracy,Precision-Macro,Precision-Weighted,Recall-Macro,Recall-Weighted,F1-Score-Macro,F1-Score-Weighted
0,AdaBoostClassifier,0.613,0.411,0.343,0.555,0.406,0.613,0.316,0.508
1,BaggingClassifier,0.996,0.943,0.979,0.996,0.943,0.996,0.952,0.996
2,BernoulliNB,0.825,0.656,0.655,0.848,0.646,0.825,0.617,0.831
3,CalibratedClassifierCV,0.991,0.852,0.903,0.99,0.841,0.991,0.861,0.99
4,DecisionTreeClassifier,0.994,0.942,0.946,0.994,0.942,0.994,0.931,0.994
5,DummyClassifier,0.414,0.127,0.052,0.172,0.127,0.414,0.074,0.243
6,ExtraTreeClassifier,0.904,0.731,0.729,0.903,0.722,0.904,0.716,0.903
7,ExtraTreesClassifier,0.989,0.824,0.89,0.988,0.824,0.989,0.846,0.988
8,GaussianNB,0.307,0.407,0.459,0.834,0.403,0.307,0.146,0.31
9,KNeighborsClassifier,0.912,0.661,0.773,0.91,0.661,0.912,0.692,0.909


In [46]:
result.to_excel("results/avg-10-Fold-BERT-davidson.xlsx")

## TFIDF

In [47]:
df = pd.read_excel("results/10-Fold-tfdif-davidson.xlsx")
del df['Unnamed: 0']
df

Unnamed: 0,Fold,Model,Accuracy,Balanced Accuracy,Precision-Macro,Precision-Weighted,Recall-Macro,Recall-Weighted,F1-Score-Macro,F1-Score-Weighted
0,1,AdaBoostClassifier,0.628,0.262,0.373,0.546,0.262,0.628,0.233,0.513
1,1,BaggingClassifier,0.995,0.940,0.988,0.995,0.940,0.995,0.959,0.995
2,1,BernoulliNB,0.883,0.753,0.687,0.913,0.753,0.883,0.697,0.888
3,1,CalibratedClassifierCV,0.990,0.910,0.992,0.990,0.910,0.990,0.944,0.990
4,1,DecisionTreeClassifier,0.993,0.927,0.988,0.993,0.927,0.993,0.952,0.993
...,...,...,...,...,...,...,...,...,...,...
245,10,RidgeClassifier,0.946,0.594,0.596,0.920,0.594,0.946,0.594,0.931
246,10,RidgeClassifierCV,0.946,0.594,0.596,0.920,0.594,0.946,0.594,0.931
247,10,SGDClassifier,0.976,0.699,0.724,0.971,0.699,0.976,0.710,0.974
248,10,SVC,0.973,0.738,0.844,0.972,0.738,0.973,0.765,0.972


In [48]:
models = df['Model'].copy()[:25].to_list()
models

['AdaBoostClassifier',
 'BaggingClassifier',
 'BernoulliNB',
 'CalibratedClassifierCV',
 'DecisionTreeClassifier',
 'DummyClassifier',
 'ExtraTreeClassifier',
 'ExtraTreesClassifier',
 'GaussianNB',
 'KNeighborsClassifier',
 'LabelPropagation',
 'LabelSpreading',
 'LinearDiscriminantAnalysis',
 'LinearSVC',
 'LogisticRegression',
 'NearestCentroid',
 'PassiveAggressiveClassifier',
 'Perceptron',
 'QuadraticDiscriminantAnalysis',
 'RandomForestClassifier',
 'RidgeClassifier',
 'RidgeClassifierCV',
 'SGDClassifier',
 'SVC',
 'LGBMClassifier']

In [49]:
result = get_avg_folds(df, models)
result

Unnamed: 0,Model,Accuracy,Balanced Accuracy,Precision-Macro,Precision-Weighted,Recall-Macro,Recall-Weighted,F1-Score-Macro,F1-Score-Weighted
0,AdaBoostClassifier,0.633,0.346,0.352,0.564,0.346,0.633,0.29,0.533
1,BaggingClassifier,0.996,0.95,0.979,0.996,0.95,0.996,0.956,0.995
2,BernoulliNB,0.883,0.774,0.755,0.911,0.774,0.883,0.74,0.888
3,CalibratedClassifierCV,0.99,0.876,0.922,0.989,0.876,0.99,0.884,0.989
4,DecisionTreeClassifier,0.995,0.945,0.949,0.995,0.933,0.995,0.931,0.994
5,DummyClassifier,0.414,0.127,0.052,0.172,0.127,0.414,0.074,0.243
6,ExtraTreeClassifier,0.968,0.851,0.837,0.968,0.839,0.968,0.832,0.968
7,ExtraTreesClassifier,0.994,0.928,0.974,0.994,0.928,0.994,0.942,0.994
8,GaussianNB,0.246,0.303,0.489,0.811,0.299,0.246,0.263,0.368
9,KNeighborsClassifier,0.802,0.484,0.628,0.805,0.484,0.802,0.515,0.792


In [50]:
result.to_excel("results/avg-10-Fold-tfdif-davidson.xlsx")

## WORD2VEC

In [51]:
df = pd.read_excel("results/10-Fold-word2vec-davidson.xlsx")
del df['Unnamed: 0']
df

Unnamed: 0,Fold,Model,Accuracy,Balanced Accuracy,Precision-Macro,Precision-Weighted,Recall-Macro,Recall-Weighted,F1-Score-Macro,F1-Score-Weighted
0,1,AdaBoostClassifier,0.644,0.360,0.261,0.434,0.360,0.644,0.301,0.515
1,1,BaggingClassifier,0.993,0.914,0.961,0.993,0.914,0.993,0.933,0.993
2,1,BernoulliNB,0.800,0.665,0.552,0.828,0.665,0.800,0.578,0.809
3,1,CalibratedClassifierCV,0.986,0.880,0.966,0.986,0.880,0.986,0.902,0.986
4,1,DecisionTreeClassifier,0.988,0.867,0.942,0.988,0.867,0.988,0.884,0.988
...,...,...,...,...,...,...,...,...,...,...
245,10,RidgeClassifier,0.946,0.596,0.596,0.920,0.596,0.946,0.595,0.932
246,10,RidgeClassifierCV,0.946,0.596,0.596,0.920,0.596,0.946,0.595,0.932
247,10,SGDClassifier,0.975,0.707,0.724,0.970,0.707,0.975,0.715,0.972
248,10,SVC,0.973,0.800,0.856,0.973,0.800,0.973,0.822,0.973


In [52]:
models = df['Model'].copy()[:25].to_list()
models

['AdaBoostClassifier',
 'BaggingClassifier',
 'BernoulliNB',
 'CalibratedClassifierCV',
 'DecisionTreeClassifier',
 'DummyClassifier',
 'ExtraTreeClassifier',
 'ExtraTreesClassifier',
 'GaussianNB',
 'KNeighborsClassifier',
 'LabelPropagation',
 'LabelSpreading',
 'LinearDiscriminantAnalysis',
 'LinearSVC',
 'LogisticRegression',
 'NearestCentroid',
 'PassiveAggressiveClassifier',
 'Perceptron',
 'QuadraticDiscriminantAnalysis',
 'RandomForestClassifier',
 'RidgeClassifier',
 'RidgeClassifierCV',
 'SGDClassifier',
 'SVC',
 'LGBMClassifier']

In [53]:
result = get_avg_folds(df, models)
result

Unnamed: 0,Model,Accuracy,Balanced Accuracy,Precision-Macro,Precision-Weighted,Recall-Macro,Recall-Weighted,F1-Score-Macro,F1-Score-Weighted
0,AdaBoostClassifier,0.636,0.395,0.308,0.501,0.39,0.636,0.317,0.525
1,BaggingClassifier,0.995,0.929,0.948,0.995,0.929,0.995,0.934,0.995
2,BernoulliNB,0.802,0.699,0.557,0.83,0.69,0.802,0.587,0.812
3,CalibratedClassifierCV,0.99,0.878,0.905,0.99,0.878,0.99,0.882,0.99
4,DecisionTreeClassifier,0.992,0.889,0.912,0.992,0.889,0.992,0.891,0.992
5,DummyClassifier,0.414,0.127,0.052,0.172,0.127,0.414,0.074,0.243
6,ExtraTreeClassifier,0.911,0.7,0.703,0.912,0.7,0.911,0.699,0.911
7,ExtraTreesClassifier,0.984,0.801,0.888,0.983,0.801,0.984,0.832,0.983
8,GaussianNB,0.919,0.897,0.704,0.944,0.885,0.919,0.728,0.929
9,KNeighborsClassifier,0.854,0.565,0.695,0.851,0.565,0.854,0.596,0.846


In [None]:
result.to_excel("results/avg-10-Fold-word2vec-davidson.xlsx")