# Binary Classification with probability threshold for the "ok" class

In this notebook we tried to optimize the better performing classifiers SVC, Random Forest and Ensemble. We use the feature sets which worked best on the individual classifiers. We did this by training a binary classifier for the classes good and bad with a training set with only "good" and "bad" samples, i.e. dropped the "ok" samples. We then implemented an architecture that predicts all three classes based on the prediction probability and a threshold. For probabilities under this threshold we predict "ok". For each classifier we optimize the threshold and then test on the test set.

In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
# package imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import svm, metrics, model_selection
from ast import literal_eval
import time
from sklearn.multiclass import OneVsRestClassifier
import statistics
from sklearn.svm import SVC #Quelle Buch
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC
import time

In [0]:
# Function to split train valid test with the option to upsample and drop all samples with "ok" as the class
def train_test_valid_split(df,upsampling=True, print_distribution = False, drop_1 = False):
    train,test = train_test_split(df,test_size=0.3,stratify=df["Reviewer_Score"], random_state=42)
    test,valid = train_test_split(test,test_size=0.5,stratify=test["Reviewer_Score"], random_state=42)
    #Zusammengefasst, folgende Aufteilung:
    #70% Training, 15% Validation, 15% Test


    unique, counts_train = np.unique(train["Reviewer_Score"], return_counts=True)
    if(upsampling): #Idee: reduce class 0 to the size of class 1, dupliate samples from class 2 to the size of class 1
        train_0 = train[train["Reviewer_Score"]==0].sample(frac=(counts_train[1]/counts_train[0]), random_state=42)
        train_1 = train[train["Reviewer_Score"]==1]
        train_2 = train[train["Reviewer_Score"]==2]
        train = train_0.append(train_1).append(train_2)
        train = train.sample(frac=1, random_state=42)
    
    if (print_distribution):
      unique, counts_train = np.unique(train["Reviewer_Score"], return_counts=True)
      plt.bar(unique, counts_train)
      unique, counts = np.unique(test["Reviewer_Score"], return_counts=True)
      plt.bar(unique, counts)
      unique, counts = np.unique(valid["Reviewer_Score"], return_counts=True)
      plt.bar(unique, counts)
      plt.title('Class Frequency')
      plt.xlabel('Class')
      plt.ylabel('Frequency')
      plt.show()
    
    if drop_1:
      train = train.loc[train['Reviewer_Score'] != 1]
      valid = valid.loc[valid['Reviewer_Score'] != 1]
      test = test.loc[test['Reviewer_Score'] != 1]
    return train,valid,test

# function to load all feature sets from Google Drive
def load_from_source():
  # list that holds a list for each category of features with the file paths for the feature data
  feature_filepaths=[["/fast_text/fast_text_nonswr_features.pkl", "/fast_text/fast_text_swr_features.pkl"],["/doc2vec/Pretrained_withScore.csv", "/doc2vec/Owntrained_withScore.csv"],["/BOW/tf_561-woerter.pkl", "/BOW/tfidf_561-woerter.pkl"]]
  # list that holds a list for each category of features with the labels for the feature data, fill this in the same way the filepath array is filled
  feature_labels = [["fast text without stop-word removal", "fast text with stop-word removal"],["Doc2Vec Pretrained","Doc2Vec Owntrained"],["BOW TF", "BOW TF IDF"]]
  # Load the dataframes and safe them in the same structure like the filepath and labels
  dataframes = []
  for feature_type_filepaths in feature_filepaths:
    feature_type_dataframes = []
    for feature_filepath in feature_type_filepaths:
      if feature_filepath[-3:] == "csv":
        df =  pd.read_csv("/content/drive/My Drive/Feature_generated_sets" + feature_filepath)
        if 'Unnamed: 0' in df.columns:
          df = df.drop('Unnamed: 0', 1)
      if feature_filepath[-3:] == "pkl":
        df =  pd.read_pickle("/content/drive/My Drive/Feature_generated_sets" + feature_filepath)
        if 'Unnamed: 0' in df.columns:
          df = df.drop('Unnamed: 0', 1)
      feature_type_dataframes.append(df)
    dataframes.append(feature_type_dataframes)
  return dataframes

# function to split all dataframes with the option of a reduced test size and passing of upsampling and drop class "ok" to the split function
def split_dataframes(dataframes,test_boolean=False,test_size=100000000, upsampling = False,drop_1=False):
  # split the dataframes with the upper method and save them in a dictonary in arrays like the filepath
  test_samples = lambda df: df[0:test_size] if test_boolean else df
  split_dataframes = [] 
  for feature_type_dataframes in dataframes:
    feature_type_split_dataframes = []
    for feature_data in feature_type_dataframes:
      train, valid, test = train_test_valid_split(feature_data, upsampling=upsampling, drop_1=drop_1)
      train, valid, test = test_samples(train),test_samples(valid), test_samples(test)
      feature_type_split_dataframes.append({"train": train, "valid": valid, "test":test}) 

    split_dataframes.append(feature_type_split_dataframes)

  return split_dataframes

# function to transform the data to feature and label numpy arrays if their are split
def trans_to_numpy_split(split_dataframes):
  # transform data to numpy arrays
  split_types = ["train", "valid", "test"]
  for feature_type_dataframes in split_dataframes:
    for feature_data in feature_type_dataframes:
      if len(feature_data["train"].columns) == 2:
        for st in split_types:
          features = np.array(feature_data[st]["Review"].tolist())
          label = np.array(feature_data[st]["Reviewer_Score"].tolist())
          feature_data[st] = {"features": features, "label": label}
      elif len(feature_data["train"].columns) == 301:
        for st in split_types:
          features = np.array(feature_data[st].loc[:, :'299'].values)
          label = np.array(feature_data[st]["Reviewer_Score"].values)
          feature_data[st] = {"features": features, "label": label}
      elif len(feature_data["train"].columns) == 562:
        for st in split_types:
          features = np.array(feature_data[st].loc[:, :'yet'].values)
          label = np.array(feature_data[st]["Reviewer_Score"].values)
          feature_data[st] = {"features": features, "label": label}
  return split_dataframes

# function to transform the data to feature and label numpy arrays if their are not split
def trans_to_numpy_unsplit(dataframes, test, test_size):
  # transform data to numpy arrays
  df_result = []
  for feature_type_dataframes in dataframes:
    df_types = []
    for feature_data in feature_type_dataframes:
      if test:
        feature_data = feature_data[:test_size]
      if len(feature_data.columns) == 2:
        features = np.array(feature_data["Review"].tolist())
        label = np.array(feature_data["Reviewer_Score"].tolist())
        feature_data = {"features": features, "label": label}
      elif len(feature_data.columns) == 301:
        features = np.array(feature_data.loc[:, :'299'].values)
        label = np.array(feature_data["Reviewer_Score"].values)
        feature_data = {"features": features, "label": label}
      elif len(feature_data.columns) == 562:
        features = np.array(feature_data.loc[:, :'yet'].values)
        label = np.array(feature_data["Reviewer_Score"].values)
        feature_data = {"features": features, "label": label}
      df_types.append(feature_data)
    df_result.append(df_types)
  return df_result

In [0]:
# load all dataframes from google drive
dataframes = load_from_source()

## LinearSVC Test only binary classification
We removed all "ok" class samples in both train, valid, test set

In [0]:
dicts = trans_to_numpy_split(split_dataframes(dataframes, False, upsampling=True, drop_1=True))
train_x, train_y, valid_x, valid_y, test_x, test_y = dicts[2][0]["train"]["features"],dicts[2][0]["train"]["label"], dicts[2][0]["valid"]["features"],dicts[2][0]["valid"]["label"], dicts[2][0]["test"]["features"],dicts[2][0]["test"]["label"]
svc = svm.LinearSVC()
param_grid =  { 'dual' : [False] ,"tol": [0.000001,0.00001,0.0001,0.001], 'C': [0.01, 0.1, 1, 10], "multi_class": ["ovr","crammer_singer"]}
grid_svm = model_selection.GridSearchCV(svc,
                    param_grid= param_grid, 
                    scoring="f1_macro",
                    cv=4,   
                    n_jobs=-1) 
print("Training now")
time1 = time.time()
grid_svm.fit(train_x, train_y)
print(time.time()-time1)
# from https://scikit-learn.org/stable/auto_examples/model_selection/plot_grid_search_digits.html
print("Best parameters set found on development set:")
print()
print(grid_svm.best_params_)
print()
print("Detailed classification report:")
print()
y_true, y_pred = test_y, grid_svm.predict(test_x)
print(metrics.classification_report(y_true, y_pred))
print()

## SVC binary classification

In [0]:
# Train the binary SVC classifier and find the optimal threshold value.
# We use the BOW TF data set.
# Output: Multiple classification reports for each threshold in the defined range [0.5, 0.8]
dicts_train = trans_to_numpy_split(split_dataframes(dataframes, True, 20000, upsampling=True, drop_1=True))
dicts_valid = trans_to_numpy_split(split_dataframes(dataframes, True, 20000))
train_x, train_y = dicts_train[2][0]["train"]["features"],dicts_train[2][0]["train"]["label"]
valid_x, valid_y, test_x, test_y = dicts_valid[2][0]["valid"]["features"],dicts_valid[2][0]["valid"]["label"], dicts_valid[2][0]["test"]["features"],dicts_valid[2][0]["test"]["label"]
svc = svm.SVC(probability=True)
t0 = time.time()
svc.fit(train_x, train_y)
t1 = time.time()
prediction_proba = svc.predict_proba(valid_x)
predictions = []
confidence_levels = np.arange(0.5, 0.83, 0.03)
for level in confidence_levels:
  prediction = []
  for instance in prediction_proba:
    high_confidence_class = 1
    for i, proba in enumerate(instance):
      if proba > level:
        if i == 0: high_confidence_class = 0
        else: high_confidence_class = 2
    prediction.append(high_confidence_class)
  predictions.append(prediction)
  print("Report " + str(level))
  report = metrics.classification_report(valid_y, prediction, output_dict=True)
  print(report)
t2 = time.time()
time_train = t1-t0
time_predict = t2-t1
print(time_train)
print(time_predict)



Report 0.5
{'0': {'precision': 0.7445170321978535, 'recall': 0.8406955299903398, 'f1-score': 0.789688595586719, 'support': 11387}, '1': {'precision': 0.32558139534883723, 'recall': 0.005345551737304314, 'f1-score': 0.010518407212622089, 'support': 5238}, '2': {'precision': 0.401218820861678, 'recall': 0.8388148148148148, 'f1-score': 0.5428051001821493, 'support': 3375}, 'accuracy': 0.6216, 'macro avg': {'precision': 0.4904390828027896, 'recall': 0.5616186321808196, 'f1-score': 0.4476707009938301, 'support': 20000}, 'weighted avg': {'precision': 0.5768662157441166, 'recall': 0.6216, 'f1-score': 0.5439623334020219, 'support': 20000}}
Report 0.53
{'0': {'precision': 0.7494042891183479, 'recall': 0.8285764468253272, 'f1-score': 0.787004212370188, 'support': 11387}, '1': {'precision': 0.3751987281399046, 'recall': 0.045055364642993506, 'f1-score': 0.08044997443327083, 'support': 5238}, '2': {'precision': 0.41085385636336824, 'recall': 0.8254814814814815, 'f1-score': 0.5486411973217802, 'sup

In [0]:
# We test the classifiere with the optimal threshold of 0.77.
# Output: The classification report of the classifier on the test set.
prediction_proba = svc.predict_proba(test_x)
predictions = []
confidence_levels = [0.77]
for level in confidence_levels:
  prediction = []
  for instance in prediction_proba:
    high_confidence_class = 1
    for i, proba in enumerate(instance):
      if proba > level:
        if i == 0: high_confidence_class = 0
        else: high_confidence_class = 2
    prediction.append(high_confidence_class)
  predictions.append(prediction)
  print("Report " + str(level))
  report = metrics.classification_report(test_y, prediction, output_dict=True)
  print(report)
t2 = time.time()
time_train = t1-t0
time_predict = t2-t1
print(time_train)
print(time_predict)

## Random Forest binary classification

In [0]:
# Train the binary Random Forest classifier and find the optimal threshold value.
# We use the BOW TF-IDF data set.
# Output: Multiple classification reports for each threshold in the defined range [0.5, 0.8]
from sklearn.ensemble import RandomForestClassifier
dicts_train = trans_to_numpy_split(split_dataframes(dataframes, upsampling=True, drop_1=True))
dicts_valid = trans_to_numpy_split(split_dataframes(dataframes))
train_x, train_y = dicts_train[2][1]["train"]["features"],dicts_train[2][1]["train"]["label"]
valid_x, valid_y, test_x, test_y = dicts_valid[2][1]["valid"]["features"],dicts_valid[2][1]["valid"]["label"], dicts_valid[2][1]["test"]["features"],dicts_valid[2][1]["test"]["label"]
classification = RandomForestClassifier(random_state=0, n_jobs=-1)
classification.fit(train_x, train_y)
prediction_proba = classification.predict_proba(valid_x)
predictions = []
confidence_levels = np.arange(0.5, 0.83, 0.03)
for level in confidence_levels:
  prediction = []
  for instance in prediction_proba:
    high_confidence_class = 1
    for i, proba in enumerate(instance):
      if proba > level:
        if i == 0: high_confidence_class = 0
        else: high_confidence_class = 2
    prediction.append(high_confidence_class)
  predictions.append(prediction)
  print("Report " + str(level))
  report = metrics.classification_report(valid_y, prediction, output_dict=True)
  print(report)


Report 0.5
{'0': {'precision': 0.7653844531612468, 'recall': 0.8230265097398916, 'f1-score': 0.7931595913238266, 'support': 44097}, '1': {'precision': 0.41496598639455784, 'recall': 0.009042842318525474, 'f1-score': 0.017699970983654126, 'support': 20237}, '2': {'precision': 0.4128533658735001, 'recall': 0.9349811929070392, 'f1-score': 0.572785628629876, 'support': 13027}, 'accuracy': 0.6289474024379209, 'macro avg': {'precision': 0.5310679351431016, 'recall': 0.5890168483218187, 'f1-score': 0.46121506364578563, 'support': 77361}, 'weighted avg': {'precision': 0.6143543348063268, 'recall': 0.6289474024379209, 'f1-score': 0.5531964581063377, 'support': 77361}}
Report 0.53
{'0': {'precision': 0.7756444209039548, 'recall': 0.7970156700002268, 'f1-score': 0.786184835978481, 'support': 44097}, '1': {'precision': 0.40143027413587606, 'recall': 0.08321391510599398, 'f1-score': 0.13785199738048462, 'support': 20237}, '2': {'precision': 0.4335104473325196, 'recall': 0.9269210102095647, 'f1-scor

In [0]:
# We test the classifiere with the optimal threshold of 0.74.
# Output: The classification report of the classifier on the test set.
prediction_proba = classification.predict_proba(test_x)
predictions = []
confidence_levels = [0.74]
for level in confidence_levels:
  prediction = []
  for instance in prediction_proba:
    high_confidence_class = 1
    for i, proba in enumerate(instance):
      if proba > level:
        if i == 0: high_confidence_class = 0
        else: high_confidence_class = 2
    prediction.append(high_confidence_class)
  predictions.append(prediction)
  print("Report " + str(level))
  report = metrics.classification_report(test_y, prediction, output_dict=True)
  print(report)


Report 0.74
{'0': {'precision': 0.8392571559123992, 'recall': 0.5831367924528302, 'f1-score': 0.6881380879106176, 'support': 44096}, '1': {'precision': 0.37742771550708537, 'recall': 0.5540841033750061, 'f1-score': 0.4490049253193449, 'support': 20237}, '2': {'precision': 0.6209957091635808, 'recall': 0.8109456555112067, 'f1-score': 0.7033720581871444, 'support': 13028}, 'accuracy': 0.6139010612582567, 'macro avg': {'precision': 0.6125601935276884, 'recall': 0.6493888504463478, 'f1-score': 0.6135050238057023, 'support': 77361}, 'weighted avg': {'precision': 0.6816900030352914, 'recall': 0.6139010612582567, 'f1-score': 0.6281483043427089, 'support': 77361}}


### Error analysis of the Random Forest binary classification
We use this code to compare the predictions and ground truth classes on the test set with the raw textual reviews.
Output: Predictions, ground truths from the test set, raw textual review and ground truths from the raw data set (to confirm the correct order)

In [0]:
#import the raw data
df =  pd.read_csv("/content/drive/My Drive/Feature_generated_sets/Hotel_reviews_features_selected.csv")
if 'Unnamed: 0' in df.columns:
  df = df.drop('Unnamed: 0', 1)
pd.set_option('display.max_colwidth', None)
train, valid, test  =  train_test_valid_split(df)


# Display the data in the range of i and j
i = 600
j = 605
# Predicted class
print("Prediction: " + str(prediction[i:j]))
# True class
print("True class :" + str(test_y[i:j]))
# Review
print("Raw data")
print(test.iloc[i:j,0])
print(test.iloc[i:j,1])

Prediction: [0, 2, 2, 0, 1]
True class :[1 2 1 0 1]
Raw data
316204     I liked the style of the hotel the room was comfortable and uniquely designed Typically hotel rooms are very plain i e neutral colors typical layout I thought that the room looked nice The layout wallpaper and furniture arrangement was visually pleasing The staff was also friendly and helpful   I did not love the location the building was a little confusing at first 
457317                                                                                                                                                                                                                                                                                                                             Good breakfast and comfortable room  Poor staff attention
105673                                                                                                                                                                           

## Ensemble with binary classification

In [0]:
# Train the binary Ensemble classifier and find the optimal threshold value.
# We use the BOW TF-IDF data set.
# Output: Multiple classification reports for each threshold in the defined range [0.5, 0.8]
dicts_train = trans_to_numpy_split(split_dataframes(dataframes, upsampling=True, drop_1=True))
dicts_valid = trans_to_numpy_split(split_dataframes(dataframes))
train_x, train_y = dicts_train[2][1]["train"]["features"],dicts_train[2][1]["train"]["label"]
valid_x, valid_y, test_x, test_y = dicts_valid[2][1]["valid"]["features"],dicts_valid[2][1]["valid"]["label"], dicts_valid[2][1]["test"]["features"],dicts_valid[2][1]["test"]["label"]
firstTime = time.time()
rf_estimator = RandomForestClassifier(random_state=0, n_jobs=-1)
lg_estimator = LogisticRegression(random_state=0, n_jobs=-1)
mnb_estimator = MultinomialNB()
estimators=[('lr', lg_estimator), ('rf', rf_estimator),('mnb',mnb_estimator)]


voting_clf = VotingClassifier(estimators=estimators, voting='soft', n_jobs=-1) #Test so, d ann zu soft und bei svm probability auf True setzen obs bei KNN geht kp (brraucht Methode predict_proba())
voting_clf = voting_clf.fit(train_x, train_y)
print ("--- %s seconds ---" % round(time.time()-firstTime,4))


prediction_proba = voting_clf.predict_proba(valid_x)
predictions = []
confidence_levels = np.arange(0.5, 0.83, 0.03)
for level in confidence_levels:
  prediction = []
  for instance in prediction_proba:
    high_confidence_class = 1
    for i, proba in enumerate(instance):
      if proba > level:
        if i == 0: high_confidence_class = 0
        else: high_confidence_class = 2
    prediction.append(high_confidence_class)
  predictions.append(prediction)
  print("Report " + str(level))
  report = metrics.classification_report(valid_y, prediction, output_dict=True)
  print(report)

--- 77.7416 seconds ---
Report 0.5
{'0': {'precision': 0.7613748967795211, 'recall': 0.8363607501644102, 'f1-score': 0.7971081837535255, 'support': 44097}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 20237}, '2': {'precision': 0.40541475052729853, 'recall': 0.9000537345513165, 'f1-score': 0.559025460093449, 'support': 13027}, 'accuracy': 0.6283010819405127, 'macro avg': {'precision': 0.3889298824356065, 'recall': 0.5788048282385756, 'f1-score': 0.45204454794899157, 'support': 77361}, 'weighted avg': {'precision': 0.5022645361151699, 'recall': 0.6283010819405127, 'f1-score': 0.5484999450319487, 'support': 77361}}


  _warn_prf(average, modifier, msg_start, len(result))


Report 0.53
{'0': {'precision': 0.7712648365564164, 'recall': 0.8148853663514525, 'f1-score': 0.7924752999294283, 'support': 44097}, '1': {'precision': 0.3851235625152924, 'recall': 0.07777832682709888, 'f1-score': 0.12941950337115607, 'support': 20237}, '2': {'precision': 0.42884982947944383, 'recall': 0.8784063867352422, 'f1-score': 0.5763283807605138, 'support': 13027}, 'accuracy': 0.6327606933726296, 'macro avg': {'precision': 0.5284127428503842, 'recall': 0.5903566933045978, 'f1-score': 0.4994077280203661, 'support': 77361}, 'weighted avg': {'precision': 0.6125933966841043, 'recall': 0.6327606933726296, 'f1-score': 0.582627882355157, 'support': 77361}}
Report 0.56
{'0': {'precision': 0.7842955807339864, 'recall': 0.7880127899857133, 'f1-score': 0.7861497912966755, 'support': 44097}, '1': {'precision': 0.3866035675282126, 'recall': 0.15743440233236153, 'f1-score': 0.22375166795421025, 'support': 20237}, '2': {'precision': 0.4489401144515193, 'recall': 0.855147002379673, 'f1-score':

In [0]:
# We test the classifier with the optimal threshold of 0.71.
# Output: The classification report of the classifier on the test set.
prediction_proba = voting_clf.predict_proba(test_x)
predictions = []
confidence_levels = [0.71]
for level in confidence_levels:
  prediction = []
  for instance in prediction_proba:
    high_confidence_class = 1
    for i, proba in enumerate(instance):
      if proba > level:
        if i == 0: high_confidence_class = 0
        else: high_confidence_class = 2
    prediction.append(high_confidence_class)
  predictions.append(prediction)
  print("Report " + str(level))
  report = metrics.classification_report(test_y, prediction, output_dict=True)
  print(report)

Report 0.71
{'0': {'precision': 0.8381532217148655, 'recall': 0.6368831640058055, 'f1-score': 0.7237864515547079, 'support': 44096}, '1': {'precision': 0.37131595125302863, 'recall': 0.5073874586154075, 'f1-score': 0.4288160367508875, 'support': 20237}, '2': {'precision': 0.5692241219677797, 'recall': 0.7078599938593798, 'f1-score': 0.6310171405111362, 'support': 13028}, 'accuracy': 0.6149610268740063, 'macro avg': {'precision': 0.592897764978558, 'recall': 0.6173768721601975, 'f1-score': 0.5945398762722439, 'support': 77361}, 'weighted avg': {'precision': 0.6707433620460242, 'recall': 0.6149610268740063, 'f1-score': 0.6310017813893718, 'support': 77361}}


## Logistic Regression binary classification

In [0]:
# Train the binary Logistic Regression classifier and find the optimal threshold value.
# We use the BOW TF-IDF data set.
# Output: Multiple classification reports for each threshold in the defined range [0.5, 0.8]
dicts_train = trans_to_numpy_split(split_dataframes(dataframes, upsampling=True, drop_1=True))
dicts_valid = trans_to_numpy_split(split_dataframes(dataframes))
train_x, train_y = dicts_train[2][1]["train"]["features"],dicts_train[2][1]["train"]["label"]
valid_x, valid_y, test_x, test_y = dicts_valid[2][1]["valid"]["features"],dicts_valid[2][1]["valid"]["label"], dicts_valid[2][1]["test"]["features"],dicts_valid[2][1]["test"]["label"]
firstTime = time.time()
lg_estimator = LogisticRegression(random_state=0, n_jobs=-1)
lg_estimator.fit(train_x, train_y)
print ("--- %s seconds ---" % round(time.time()-firstTime,4))


prediction_proba = lg_estimator.predict_proba(valid_x)
predictions = []
confidence_levels = np.arange(0.5, 0.90, 0.03)
for level in confidence_levels:
  prediction = []
  for instance in prediction_proba:
    high_confidence_class = 1
    for i, proba in enumerate(instance):
      if proba > level:
        if i == 0: high_confidence_class = 0
        else: high_confidence_class = 2
    prediction.append(high_confidence_class)
  predictions.append(prediction)
  print("Report " + str(level))
  report = metrics.classification_report(valid_y, prediction, output_dict=True)
  print(report)


--- 161.9474 seconds ---
Report 0.5
{'0': {'precision': 0.7230162550449883, 'recall': 0.8856158015284487, 'f1-score': 0.7960982968270631, 'support': 44097}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 20237}, '2': {'precision': 0.44189831670021845, 'recall': 0.7919705227604207, 'f1-score': 0.567273327101776, 'support': 13027}, 'accuracy': 0.6381768591409108, 'macro avg': {'precision': 0.38830485724840225, 'recall': 0.5591954414296231, 'f1-score': 0.4544572079762797, 'support': 77361}, 'weighted avg': {'precision': 0.4865430536106384, 'recall': 0.6381768591409108, 'f1-score': 0.5493131710724763, 'support': 77361}}


  _warn_prf(average, modifier, msg_start, len(result))


Report 0.53
{'0': {'precision': 0.7304290053151101, 'recall': 0.8725990430187994, 'f1-score': 0.7952096055881047, 'support': 44097}, '1': {'precision': 0.3811965811965812, 'recall': 0.05509709937243663, 'f1-score': 0.0962783870132113, 'support': 20237}, '2': {'precision': 0.46047067475638903, 'recall': 0.7690181929838029, 'f1-score': 0.5760285196791536, 'support': 13027}, 'accuracy': 0.6413050503483667, 'macro avg': {'precision': 0.5240320870893601, 'recall': 0.565571445125013, 'f1-score': 0.48917217076015657, 'support': 77361}, 'weighted avg': {'precision': 0.5936137658653211, 'recall': 0.6413050503483667, 'f1-score': 0.5754665428506011, 'support': 77361}}
Report 0.56
{'0': {'precision': 0.7382268233828909, 'recall': 0.8602852801777898, 'f1-score': 0.7945960098444781, 'support': 44097}, '1': {'precision': 0.39532744665194997, 'recall': 0.10619162919405051, 'f1-score': 0.16741323569508823, 'support': 20237}, '2': {'precision': 0.4740711885864537, 'recall': 0.7473708451677286, 'f1-score

In [0]:
# We test the classifier with the optimal threshold of 0.77 .
# Output: The classification report of the classifier on the test set.
prediction_proba = lg_estimator.predict_proba(test_x)
predictions = []
confidence_levels = [0.77]
for level in confidence_levels:
  prediction = []
  for instance in prediction_proba:
    high_confidence_class = 1
    for i, proba in enumerate(instance):
      if proba > level:
        if i == 0: high_confidence_class = 0
        else: high_confidence_class = 2
    prediction.append(high_confidence_class)
  predictions.append(prediction)
  print("Report " + str(level))
  report = metrics.classification_report(test_y, prediction, output_dict=True)
  print(report)

Report 0.77
{'0': {'precision': 0.8002054314059525, 'recall': 0.7243514150943396, 'f1-score': 0.7603913726610484, 'support': 44096}, '1': {'precision': 0.38119887778085115, 'recall': 0.47670109205909966, 'f1-score': 0.4236342877217636, 'support': 20237}, '2': {'precision': 0.5944142362827484, 'recall': 0.553807184525637, 'f1-score': 0.5733926726535802, 'support': 13028}, 'accuracy': 0.6308475847003012, 'macro avg': {'precision': 0.5919395151565173, 'recall': 0.5849532305596922, 'f1-score': 0.5858061110121308, 'support': 77361}, 'weighted avg': {'precision': 0.6559404488465713, 'recall': 0.6308475847003012, 'f1-score': 0.6408069284111859, 'support': 77361}}
