# Multilable Classification: Classical Model Benchmarks

In [None]:
# Imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm

from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier

from sklearn.model_selection import cross_val_score, train_test_split

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import (accuracy_score, multilabel_confusion_matrix, 
                             ConfusionMatrixDisplay, classification_report, 
                             f1_score, recall_score, precision_score, 
                             roc_curve, roc_auc_score, hamming_loss, jaccard_score)


In [None]:
data_folder = "../assets/data/jigsaw_data"

print('Loading Development Data')
dev = pd.read_csv(data_folder + "/development_data.csv")
print('Done Loading Development Data')

print('Loading Training Original Data')
train_orig = pd.read_csv(data_folder + "/train_original_data.csv")
print('Done Loading Training Original Data')

print('Loading Training Augmented Data')
train_aug = pd.read_csv(data_folder + "/train_aug_data.csv")
print('Done Loading Training Augmented Data')

print('Loading Validation Data')
val = pd.read_csv(data_folder + "/validation_data.csv")
print('Done Loading Validation Data')

print('Loading Testing Data')
test = pd.read_csv(data_folder + "/test_data.csv")
print('Done Loading Testing Data')

In [None]:
required_cols = ['id', 'comment_text', 'cleaned_comment_text', 'toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate', 'neutral']
toxicity_classes = ['toxic', 'severe_toxic', 'obscene', 'threat','insult', 'identity_hate']

In [None]:
train_orig = train_orig[required_cols]
train_aug = train_aug[required_cols]
val = val[required_cols]
test = test[required_cols]

In [None]:
# Original
X_train_orig = train_orig["cleaned_comment_text"]
y_train_orig = train_orig[toxicity_classes]

X_train_orig.shape, y_train_orig.shape

In [None]:
# Augmented
X_train_aug = train_aug["cleaned_comment_text"]
y_train_aug = train_aug[toxicity_classes]

X_train_aug.shape, y_train_aug.shape

In [None]:
# Validation
X_val = val["cleaned_comment_text"]
y_val = val[toxicity_classes]

X_val.shape, y_val.shape

In [None]:
# Test
X_test = test["cleaned_comment_text"]
y_test = test[toxicity_classes]

X_test.shape, y_test.shape

In [None]:
def custom_jaccard_score(y_true, y_pred):
    jaccard = np.minimum(y_true, y_pred).sum(axis = 1)/np.maximum(y_true, y_pred).sum(axis = 1)
    return jaccard.mean()*100

def evaluate_model(X_train, y_train, X_val, y_val, X_test, y_test, y_pred_val, y_pred_test, classifier, clf):
    print("-----------------------------------------------")
    print("Clf: ", classifier.__class__.__name__)
    model_results = {}
    
    model_results['Classifier'] = classifier.__class__.__name__
    
    model_results['Accuracy (train)'] = clf.score(X_train, y_train)
    
    # Validation Performance
    model_results['Accuracy (val)'] = clf.score(X_val, y_val)
    model_results['Accuracy (test)'] = clf.score(X_test, y_test)
    
    model_results['Recall (val)'] = recall_score(y_val, y_pred_val, average='weighted', labels=np.unique(y_pred_val))
    model_results['Precision (val)'] = precision_score(y_val, y_pred_val, average='weighted', labels=np.unique(y_pred_val))
    model_results['F1-score (val)'] = f1_score(y_val, y_pred_val, average='weighted', labels=np.unique(y_pred_val))
    model_results['AUC-ROC (val)'] = roc_auc_score(y_val, y_pred_val)
    model_results['Jacard score (val)'] = jaccard_score(y_val, y_pred_val,average='weighted', labels=np.unique(y_pred_test))
    model_results['Hamming Loss (val)'] = hamming_loss(y_val, y_pred_val)
    
    model_results['CV Accuracy'] = cross_val_score(clf, X_train, y_train, cv=5).mean()
    
    # Test Performance
    model_results['Recall (test)'] = recall_score(y_test, y_pred_test, average='weighted', labels=np.unique(y_pred_test))
    model_results['Precision (test)'] = precision_score(y_test, y_pred_test, average='weighted', labels=np.unique(y_pred_test))
    model_results['F1-score (test)'] = f1_score(y_test, y_pred_test, average='weighted', labels=np.unique(y_pred_test))
    model_results['AUC-ROC (test)'] = roc_auc_score(y_test, y_pred_test)
    model_results['Jacard score (test)'] = jaccard_score(y_test, y_pred_test, average='weighted', labels=np.unique(y_pred_test))
    model_results['Hamming Loss (test)'] = hamming_loss(y_test, y_pred_test)

    return model_results


def run_model(classifier, X_train, y_train, X_val, y_val, X_test, y_test):
    model = OneVsRestClassifier(classifier)
    model.fit(X_train, y_train)
    y_pred_val = model.predict(X_val)
    y_pred_test = model.predict(X_test)
    
    return model, y_pred_val, y_pred_test

In [None]:
def run_it(X_train, y_train, X_val, y_val, X_test, y_test):
    # TFIDF Vectorizer
    tfidf = TfidfVectorizer(analyzer='word', max_features=10000, ngram_range=(1,3), stop_words='english')

    # Vectorize X_train
    X_train = tfidf.fit_transform(X_train)

    # Vectorize X_val
    X_val = tfidf.fit_transform(X_val)

    # Vectorize X_test
    X_test = tfidf.fit_transform(X_test)
    
    
    model_benchmark = pd.DataFrame(columns=['Classifier', 'Accuracy (train)', 'Accuracy (val)', 'Accuracy (test)', 'CV Accuracy', 
                                            'Recall (val)', 'Precision (val)', 'F1-score (val)', 'AUC-ROC (val)', 'Jacard score (val)', 'Hamming Loss (val)',
                                            'Recall (test)', 'Precision (test)', 'F1-score (test)', 'AUC-ROC (test)', 'Jacard score (test)', 'Hamming Loss (test)'
                                           ])

    sgd = SGDClassifier()
    lr = LogisticRegression(solver='lbfgs')
    svc = LinearSVC()
    mnb = MultinomialNB()
    dt = DecisionTreeClassifier()
    gb = GradientBoostingClassifier()
    ada = AdaBoostClassifier()
    classifier_list = [sgd, lr, svc, mnb, dt, gb, ada]
    models_object_list = []
    
    for classifier in classifier_list:        
        clf, y_pred_val, y_pred_test = run_model(classifier, X_train, y_train, X_val, y_val, X_test, y_test)
        model_result = evaluate_model(X_train, y_train, X_val, y_val, X_test, y_test, y_pred_val, y_pred_test, classifier, clf)
        model_benchmark = model_benchmark.append(model_result, ignore_index=True)
        
        models_object_list.append(clf)

    return model_benchmark, models_object_list    

In [None]:
model_benchmark_Original, models_object_list_Original = run_it(X_train = X_train_orig, 
                                              y_train = y_train_orig,
                                              X_val = X_val, 
                                              y_val = y_val, 
                                              X_test = X_test, 
                                              y_test = y_test)

In [None]:
model_benchmark_Original

In [None]:
text_to_predict = "Arabs are committing genocide in Iraq, but no protests in Europe"

In [None]:
tfidf = TfidfVectorizer(analyzer='word', max_features=10000, ngram_range=(1,3), stop_words='english')

for model in models_object_list_Original:
    
    x = [text_to_predict]  

    tfidf.fit_transform(X_train_orig)
    xt = tfidf.transform(x)
    print(model)
    print(model.predict(xt))

In [None]:
model_benchmark_Augmented, models_object_list_Augmented = run_it(X_train = X_train_aug, 
                                  y_train = y_train_aug,
                                  X_val = X_val, 
                                  y_val = y_val, 
                                  X_test = X_test, 
                                  y_test = y_test)

In [None]:
rearranged_columns=['Classifier', 'Accuracy (train)', 'Accuracy (val)', 'Accuracy (test)', 'CV Accuracy', 
         'Recall (val)','Recall (test)', 'Precision (val)', 'Precision (test)', 'F1-score (val)', 'F1-score (test)',
         'AUC-ROC (val)', 'AUC-ROC (test)', 'Jacard score (val)', 'Jacard score (test)', 'Hamming Loss (val)', 'Hamming Loss (test)'
        ]

In [None]:
model_benchmark_Original[rearranged_columns]

In [None]:
model_benchmark_Augmented[rearranged_columns]