In [24]:
# Import 
from pathlib import Path
import bz2
import _pickle as cPickle
import pandas as pd

current_dir = Path.cwd()
relative_path = 'data/compressed_preprocessed.pbz2'
frame_path = current_dir.joinpath(relative_path)
df = bz2.BZ2File(str(frame_path), 'rb')
df = cPickle.load(df)

In [25]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 157336 entries, 1131838 to 1289200
Data columns (total 27 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   seconds_remaining      157336 non-null  float64
 1   calc_dist              157336 non-null  float64
 2   calc_angle             157336 non-null  float64
 3   event_zone_Def_Zone    157336 non-null  uint8  
 4   event_zone_Neu_Zone    157336 non-null  uint8  
 5   event_zone_Off_Zone    157336 non-null  uint8  
 6   shot_type_Backhand     157336 non-null  uint8  
 7   shot_type_Deflected    157336 non-null  uint8  
 8   shot_type_Slap         157336 non-null  uint8  
 9   shot_type_Snap         157336 non-null  uint8  
 10  shot_type_Tip_In       157336 non-null  uint8  
 11  shot_type_Wrap_around  157336 non-null  uint8  
 12  shot_type_Wrist        157336 non-null  uint8  
 13  is_playoff             157336 non-null  uint8  
 14  is_overtime            157336

In [26]:
pd.set_option('display.max_columns', None)
df.head()

Unnamed: 0,seconds_remaining,calc_dist,calc_angle,event_zone_Def_Zone,event_zone_Neu_Zone,event_zone_Off_Zone,shot_type_Backhand,shot_type_Deflected,shot_type_Slap,shot_type_Snap,shot_type_Tip_In,shot_type_Wrap_around,shot_type_Wrist,is_playoff,is_overtime,is_event_team_home,is_rebound,is_extra_attacker,is_empty_net,is_goal,period,players_shooting,skaters_shooting,fwds_shooting,players_defending,skaters_defending,fwds_defending
1131838,0.975833,0.11576,-0.332952,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,6,5,3,6,5,3
1131839,0.959167,0.279206,0.060475,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,6,5,3,6,5,3
1131840,0.95,0.252486,-0.159478,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,6,5,3,6,5,3
1131841,0.9,0.191132,0.1358,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,6,5,3,6,5,3
1131842,0.836667,0.095056,-0.108,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,6,5,3,6,5,3


In [27]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
seconds_remaining,157336.0,0.493305,0.289879,0.0,0.238333,0.495833,0.745833,1.0
calc_dist,157336.0,0.181072,0.12077,0.0,0.093284,0.161315,0.245565,1.0
calc_angle,157336.0,-0.003648,0.192532,-1.0,-0.147584,0.0,0.143004,0.8858
event_zone_Def_Zone,157336.0,0.013665,0.116096,0.0,0.0,0.0,0.0,1.0
event_zone_Neu_Zone,157336.0,0.022423,0.148056,0.0,0.0,0.0,0.0,1.0
event_zone_Off_Zone,157336.0,0.963912,0.186511,0.0,1.0,1.0,1.0,1.0
shot_type_Backhand,157336.0,0.063412,0.243703,0.0,0.0,0.0,0.0,1.0
shot_type_Deflected,157336.0,0.019798,0.139307,0.0,0.0,0.0,0.0,1.0
shot_type_Slap,157336.0,0.161826,0.368292,0.0,0.0,0.0,0.0,1.0
shot_type_Snap,157336.0,0.135773,0.342549,0.0,0.0,0.0,0.0,1.0


In [28]:
y = df['is_goal']
X = df.drop(labels=['is_goal'], axis=1)

In [29]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, stratify=df['is_goal'], random_state=0)

In [30]:
def goal_prob(y):
    return sum(y)/len(y)
print(f"Probability of goal in sample: {goal_prob(y):.3f}")

Probability of goal in sample: 0.051


In [31]:
from sklearn import metrics

# Adapted from code found at https://imbalanced-learn.org/.
# Original page has been updated. The code ca be found using the Wayback Machine at
# https://web.archive.org/web/20200919051559/https://imbalanced-learn.org/stable/auto_examples/applications/plot_impact_imbalanced_classes.html
def evaluate_classifier(clf, df_scores, clf_name=None):
    from sklearn.pipeline import Pipeline
    if clf_name is None:
        if isinstance(clf, Pipeline):
            clf_name = clf[-1].__class__.__name__
        else:
            clf_name = clf.__class__.__name__
            
    acc = clf.fit(X_train, y_train).score(X_test, y_test)
    y_pred = clf.predict(X_test)
    bal_acc = metrics.balanced_accuracy_score(y_test, y_pred)
    f1_score = metrics.f1_score(y_test, y_pred)
    prec_score = metrics.precision_score(y_test, y_pred, zero_division=0)
    recall_score = metrics.recall_score(y_test, y_pred)
    pred_prob = goal_prob(y_pred)
    
    if (clf_name in df_scores.index):
        df_scores = df_scores.drop(clf_name)

    scores = {
        'Accuracy': acc,
        'Balanced Acc.': bal_acc,
        'F1': f1_score,
        'Precision': prec_score,
        'Recall': recall_score,
        'Pred. Goal %': pred_prob
    }
    clf_score = pd.DataFrame(scores, 
                             index=[clf_name]
                            )
    
    df_scores = df_scores.append(clf_score).round(decimals=3)
    return df_scores

df_scores = pd.DataFrame()

In [32]:
from sklearn.dummy import DummyClassifier

clf = DummyClassifier(strategy="most_frequent")
df_scores = evaluate_classifier(clf, df_scores, "Dummy (Most Frequent)")
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0


In [33]:
clf = DummyClassifier(strategy="stratified")
df_scores = evaluate_classifier(clf, df_scores, "Dummy (Stratified)")
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05


In [34]:
from imblearn.pipeline import make_pipeline as make_pipeline_with_sampler
from sklearn.preprocessing import StandardScaler

def evaluate_classifier_with_sampler(clf, df_scores, sampler=None, clf_name=None):
    pipeline = make_pipeline_with_sampler(
        StandardScaler(),
        sampler,
        clf
    )
    return evaluate_classifier(pipeline, df_scores, clf_name)

In [35]:
from sklearn.linear_model import LogisticRegression

df_scores = evaluate_classifier_with_sampler(
    LogisticRegression(), 
    df_scores, 
    clf_name="Logistic Regression"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003


In [36]:
df_scores = evaluate_classifier_with_sampler(
    LogisticRegression(class_weight='balanced'), 
    df_scores,
    clf_name="Logistic Regression (Balanced)"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357


In [37]:
from imblearn.under_sampling import RandomUnderSampler

df_scores = evaluate_classifier_with_sampler(
    LogisticRegression(), 
    df_scores, 
    sampler=RandomUnderSampler(random_state=0),
    clf_name="Logistic Regression (Random Under-sampling)"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361


In [38]:
from imblearn.over_sampling import RandomOverSampler

df_scores = evaluate_classifier_with_sampler(
    LogisticRegression(), 
    df_scores, 
    sampler=RandomOverSampler(random_state=0),
    clf_name="Logistic Regression (Random Over-sampling)"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358


In [39]:
from imblearn.over_sampling import SMOTE

df_scores = evaluate_classifier_with_sampler(
    LogisticRegression(), 
    df_scores, 
    sampler=SMOTE(random_state=0),
    clf_name="Logistic Regression (SMOTE)"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367


In [40]:
from imblearn.over_sampling import ADASYN

df_scores = evaluate_classifier_with_sampler(
    LogisticRegression(), 
    df_scores, 
    sampler=ADASYN(random_state=0),
    clf_name="Logistic Regression (ADASYN)"
)
df_scores

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.7s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.7s finished


Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393


In [41]:
from imblearn.under_sampling import TomekLinks

df_scores = evaluate_classifier_with_sampler(
    LogisticRegression(), 
    df_scores, 
    sampler=TomekLinks(),
    clf_name="Logistic Regression (TomekLinks)"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393
Logistic Regression (TomekLinks),0.949,0.514,0.057,0.476,0.03,0.003


In [42]:
from imblearn.under_sampling import EditedNearestNeighbours 

df_scores = evaluate_classifier_with_sampler(
    LogisticRegression(), 
    df_scores, 
    sampler=EditedNearestNeighbours(),
    clf_name="Logistic Regression (ENN)"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393
Logistic Regression (TomekLinks),0.949,0.514,0.057,0.476,0.03,0.003
Logistic Regression (ENN),0.949,0.527,0.101,0.479,0.057,0.006


In [43]:
from sklearn.ensemble import RandomForestClassifier

df_scores = evaluate_classifier_with_sampler(
    RandomForestClassifier(), 
    df_scores, clf_name="Random Forest"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393
Logistic Regression (TomekLinks),0.949,0.514,0.057,0.476,0.03,0.003
Logistic Regression (ENN),0.949,0.527,0.101,0.479,0.057,0.006


In [44]:
df_scores = evaluate_classifier_with_sampler(
    RandomForestClassifier(class_weight='balanced'), 
    df_scores,
    clf_name="Random Forest (Balanced)"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393
Logistic Regression (TomekLinks),0.949,0.514,0.057,0.476,0.03,0.003
Logistic Regression (ENN),0.949,0.527,0.101,0.479,0.057,0.006


In [45]:
df_scores = evaluate_classifier_with_sampler(
    RandomForestClassifier(), 
    df_scores, 
    sampler=RandomUnderSampler(random_state=0),
    clf_name="Random Forest (Random Under-sampling)"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393
Logistic Regression (TomekLinks),0.949,0.514,0.057,0.476,0.03,0.003
Logistic Regression (ENN),0.949,0.527,0.101,0.479,0.057,0.006


In [46]:
df_scores = evaluate_classifier_with_sampler(
    RandomForestClassifier(), 
    df_scores, 
    sampler=RandomOverSampler(random_state=0),
    clf_name="Random Forest (Random Over-sampling)"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393
Logistic Regression (TomekLinks),0.949,0.514,0.057,0.476,0.03,0.003
Logistic Regression (ENN),0.949,0.527,0.101,0.479,0.057,0.006


In [47]:
df_scores = evaluate_classifier_with_sampler(
    RandomForestClassifier(), 
    df_scores, 
    sampler=SMOTE(random_state=0),
    clf_name="Random Forest (SMOTE)"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393
Logistic Regression (TomekLinks),0.949,0.514,0.057,0.476,0.03,0.003
Logistic Regression (ENN),0.949,0.527,0.101,0.479,0.057,0.006


In [48]:
df_scores = evaluate_classifier_with_sampler(
    RandomForestClassifier(), 
    df_scores, 
    sampler=ADASYN(random_state=0),
    clf_name="Random Forest (ADASYN)"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393
Logistic Regression (TomekLinks),0.949,0.514,0.057,0.476,0.03,0.003
Logistic Regression (ENN),0.949,0.527,0.101,0.479,0.057,0.006


In [50]:
df_scores = evaluate_classifier_with_sampler(
    RandomForestClassifier(), 
    df_scores, 
    sampler=TomekLinks(),
    clf_name="Random Forest (TomekLinks)"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393
Logistic Regression (TomekLinks),0.949,0.514,0.057,0.476,0.03,0.003
Logistic Regression (ENN),0.949,0.527,0.101,0.479,0.057,0.006


In [51]:
df_scores = evaluate_classifier_with_sampler(
    RandomForestClassifier(), 
    df_scores, 
    sampler=EditedNearestNeighbours(),
    clf_name="Random Forest (ENN)"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393
Logistic Regression (TomekLinks),0.949,0.514,0.057,0.476,0.03,0.003
Logistic Regression (ENN),0.949,0.527,0.101,0.479,0.057,0.006


In [52]:
from sklearn.svm import LinearSVC

df_scores = evaluate_classifier_with_sampler(
    LinearSVC(dual=False), 
    df_scores, 
    clf_name="SVM"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393
Logistic Regression (TomekLinks),0.949,0.514,0.057,0.476,0.03,0.003
Logistic Regression (ENN),0.949,0.527,0.101,0.479,0.057,0.006


In [53]:
df_scores = evaluate_classifier_with_sampler(
    LinearSVC(dual=False, class_weight='balanced'), 
    df_scores,
    clf_name="SVM (Balanced)"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393
Logistic Regression (TomekLinks),0.949,0.514,0.057,0.476,0.03,0.003
Logistic Regression (ENN),0.949,0.527,0.101,0.479,0.057,0.006


In [54]:
df_scores = evaluate_classifier_with_sampler(
    LinearSVC(dual=False), 
    df_scores, 
    sampler=RandomUnderSampler(random_state=0),
    clf_name="SVM (Random Under-sampling)"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393
Logistic Regression (TomekLinks),0.949,0.514,0.057,0.476,0.03,0.003
Logistic Regression (ENN),0.949,0.527,0.101,0.479,0.057,0.006


In [55]:
df_scores = evaluate_classifier_with_sampler(
    LinearSVC(dual=False), 
    df_scores, 
    sampler=RandomOverSampler(random_state=0),
    clf_name="SVM (Random Over-sampling)"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393
Logistic Regression (TomekLinks),0.949,0.514,0.057,0.476,0.03,0.003
Logistic Regression (ENN),0.949,0.527,0.101,0.479,0.057,0.006


In [56]:
df_scores = evaluate_classifier_with_sampler(
    LinearSVC(dual=False), 
    df_scores, 
    sampler=SMOTE(random_state=0),
    clf_name="SVM (SMOTE)"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393
Logistic Regression (TomekLinks),0.949,0.514,0.057,0.476,0.03,0.003
Logistic Regression (ENN),0.949,0.527,0.101,0.479,0.057,0.006


In [57]:
df_scores = evaluate_classifier_with_sampler(
    LinearSVC(dual=False), 
    df_scores, 
    sampler=ADASYN(random_state=0),
    clf_name="SVM (ADASYN)"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393
Logistic Regression (TomekLinks),0.949,0.514,0.057,0.476,0.03,0.003
Logistic Regression (ENN),0.949,0.527,0.101,0.479,0.057,0.006


In [58]:
df_scores = evaluate_classifier_with_sampler(
    LinearSVC(dual=False), 
    df_scores, 
    sampler=TomekLinks(),
    clf_name="SVM (TomekLinks)"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393
Logistic Regression (TomekLinks),0.949,0.514,0.057,0.476,0.03,0.003
Logistic Regression (ENN),0.949,0.527,0.101,0.479,0.057,0.006


In [59]:
df_scores = evaluate_classifier_with_sampler(
    LinearSVC(dual=False),
    df_scores, 
    sampler=EditedNearestNeighbours(),
    clf_name="SVM (ENN)"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393
Logistic Regression (TomekLinks),0.949,0.514,0.057,0.476,0.03,0.003
Logistic Regression (ENN),0.949,0.527,0.101,0.479,0.057,0.006


In [60]:
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingClassifier

df_scores = evaluate_classifier_with_sampler(
    HistGradientBoostingClassifier(random_state=0), 
    df_scores, 
    clf_name="GradBoost"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393
Logistic Regression (TomekLinks),0.949,0.514,0.057,0.476,0.03,0.003
Logistic Regression (ENN),0.949,0.527,0.101,0.479,0.057,0.006


In [62]:
df_scores = evaluate_classifier_with_sampler(
    HistGradientBoostingClassifier(random_state=0), 
    df_scores, 
    sampler=RandomUnderSampler(random_state=0),
    clf_name="GradBoost (Random Under-sampling)"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393
Logistic Regression (TomekLinks),0.949,0.514,0.057,0.476,0.03,0.003
Logistic Regression (ENN),0.949,0.527,0.101,0.479,0.057,0.006


In [63]:
df_scores = evaluate_classifier_with_sampler(
    HistGradientBoostingClassifier(random_state=0), 
    df_scores, 
    sampler=RandomOverSampler(random_state=0),
    clf_name="GradBoost (Random Over-sampling)"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393
Logistic Regression (TomekLinks),0.949,0.514,0.057,0.476,0.03,0.003
Logistic Regression (ENN),0.949,0.527,0.101,0.479,0.057,0.006


In [64]:
df_scores = evaluate_classifier_with_sampler(
    HistGradientBoostingClassifier(random_state=0), 
    df_scores, 
    sampler=SMOTE(random_state=0),
    clf_name="GradBoost (SMOTE)"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393
Logistic Regression (TomekLinks),0.949,0.514,0.057,0.476,0.03,0.003
Logistic Regression (ENN),0.949,0.527,0.101,0.479,0.057,0.006


In [65]:
df_scores = evaluate_classifier_with_sampler(
    HistGradientBoostingClassifier(random_state=0), 
    df_scores, 
    sampler=ADASYN(random_state=0),
    clf_name="GradBoost (ADASYN)"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393
Logistic Regression (TomekLinks),0.949,0.514,0.057,0.476,0.03,0.003
Logistic Regression (ENN),0.949,0.527,0.101,0.479,0.057,0.006


In [66]:
df_scores = evaluate_classifier_with_sampler(
    HistGradientBoostingClassifier(random_state=0), 
    df_scores, 
    sampler=TomekLinks(),
    clf_name="GradBoost (TomekLinks)"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393
Logistic Regression (TomekLinks),0.949,0.514,0.057,0.476,0.03,0.003
Logistic Regression (ENN),0.949,0.527,0.101,0.479,0.057,0.006


In [67]:
df_scores = evaluate_classifier_with_sampler(
    HistGradientBoostingClassifier(random_state=0), 
    df_scores, 
    sampler=EditedNearestNeighbours(),
    clf_name="GradBoost (ENN)"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393
Logistic Regression (TomekLinks),0.949,0.514,0.057,0.476,0.03,0.003
Logistic Regression (ENN),0.949,0.527,0.101,0.479,0.057,0.006


In [68]:
from imblearn.ensemble import BalancedBaggingClassifier

df_scores = evaluate_classifier_with_sampler(
    BalancedBaggingClassifier(        
        base_estimator=None,
        random_state=0, 
        n_jobs=-1
    ),
    df_scores, 
    clf_name="Balanced Bagging (Decision Tree)"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393
Logistic Regression (TomekLinks),0.949,0.514,0.057,0.476,0.03,0.003
Logistic Regression (ENN),0.949,0.527,0.101,0.479,0.057,0.006


In [69]:
df_scores = evaluate_classifier_with_sampler(
    BalancedBaggingClassifier(        
        base_estimator=HistGradientBoostingClassifier(random_state=0),
        random_state=0, 
        n_jobs=-1
    ),
    df_scores, 
    clf_name="Balanced Bagging (GradBoost)"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393
Logistic Regression (TomekLinks),0.949,0.514,0.057,0.476,0.03,0.003
Logistic Regression (ENN),0.949,0.527,0.101,0.479,0.057,0.006


In [71]:
from imblearn.ensemble import RUSBoostClassifier

df_scores = evaluate_classifier_with_sampler(
    RUSBoostClassifier(        
        random_state=0
    ),
    df_scores, 
    clf_name="RUS Boost"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393
Logistic Regression (TomekLinks),0.949,0.514,0.057,0.476,0.03,0.003
Logistic Regression (ENN),0.949,0.527,0.101,0.479,0.057,0.006


In [72]:
from imblearn.ensemble import EasyEnsembleClassifier

df_scores = evaluate_classifier_with_sampler(
    EasyEnsembleClassifier(        
        random_state=0, 
        n_jobs=-1
    ),
    df_scores, 
    clf_name="Easy Ensemble"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393
Logistic Regression (TomekLinks),0.949,0.514,0.057,0.476,0.03,0.003
Logistic Regression (ENN),0.949,0.527,0.101,0.479,0.057,0.006


In [73]:
from imblearn.ensemble import BalancedRandomForestClassifier

df_scores = evaluate_classifier_with_sampler(
    BalancedRandomForestClassifier(        
        random_state=0, 
        n_jobs=-1
    ),
    df_scores, 
    clf_name="Balanced Random Forest"
)
df_scores

Unnamed: 0,Accuracy,Balanced Acc.,F1,Precision,Recall,Pred. Goal %
Dummy (Most Frequent),0.949,0.5,0.0,0.0,0.0,0.0
Dummy (Stratified),0.904,0.507,0.065,0.065,0.064,0.05
Logistic Regression,0.949,0.512,0.049,0.494,0.026,0.003
Logistic Regression (Balanced),0.662,0.674,0.172,0.098,0.688,0.357
Logistic Regression (Random Under-sampling),0.659,0.673,0.171,0.097,0.688,0.361
Logistic Regression (Random Over-sampling),0.661,0.672,0.171,0.098,0.685,0.358
Logistic Regression (SMOTE),0.652,0.672,0.169,0.096,0.694,0.367
Logistic Regression (ADASYN),0.628,0.667,0.163,0.092,0.711,0.393
Logistic Regression (TomekLinks),0.949,0.514,0.057,0.476,0.03,0.003
Logistic Regression (ENN),0.949,0.527,0.101,0.479,0.057,0.006
