In [1]:
import joblib
import pickle

import time
import datetime
import numpy as np
import pandas as pd
from keras.src.saving import load_model
from transformers import AutoTokenizer, AutoModelForSequenceClassification, BertTokenizer
from nltk import word_tokenize
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from keras.src.utils import pad_sequences

from sklearn.metrics import recall_score, precision_score, accuracy_score, f1_score
from sklearn.metrics import matthews_corrcoef

In [2]:
import warnings

warnings.filterwarnings('ignore')

In [3]:
device = torch.device("cpu")
device

device(type='cpu')

### Funktionen

#### Datenvorbereitung etc.

In [4]:
def create_dataset(tokenizer, max_len, labels_data, tweets_data):
    input_ids = []
    attention_masks = []

    for tweet in tweets_data:
        encoded_dict = tokenizer.encode_plus(
            tweet,
            add_special_tokens=True,
            max_length=256,
            pad_to_max_length=True,
            return_attention_mask=True,
            return_tensors='pt',
        )

        input_ids.append(encoded_dict['input_ids'])
        attention_masks.append(encoded_dict['attention_mask'])

    input_ids = torch.cat(input_ids, dim=0)
    attention_masks = torch.cat(attention_masks, dim=0)
    labels = torch.tensor(labels_data)
    dataset = TensorDataset(input_ids, attention_masks, labels)

    return dataset

In [5]:
def get_dataloader(batch_size, dataset, sampler):
    if sampler == "random":
        return DataLoader(
            dataset,
            sampler=RandomSampler(dataset),
            batch_size=batch_size
        )

    if sampler == "sequential":
        return DataLoader(
            dataset,
            sampler=SequentialSampler(dataset),
            batch_size=batch_size
        )

In [6]:
def get_max_len(tweets, tokenizer):
    max_len = 0

    for sent in tweets:
        input_ids = tokenizer.encode(sent, add_special_tokens=True)
        max_len = max(max_len, len(input_ids))
    return max_len

In [7]:
def format_time(elapsed):
    elapsed_rounded = int(round((elapsed)))
    return str(datetime.timedelta(seconds=elapsed_rounded))

In [8]:
def vectorize_w2v(tweets, loaded_vectorizer, vector_size=300):
    x_tokenized = tweets.map(word_tokenize)

    def w2v_vector(x_tokenized, vector_size):
        vec = np.zeros(vector_size).reshape((1, vector_size))
        count = 0
        for word in x_tokenized:
            try:
                vec += loaded_vectorizer.wv[word].reshape((1, vector_size))
                count += 1
            except KeyError:

                continue
        if count != 0:
            vec /= count
        return vec

    tweets_w2v = np.zeros((len(x_tokenized), 300))
    for i in range(len(x_tokenized)):
        tweets_w2v[i, :] = w2v_vector(x_tokenized.iloc[i], 300)

    return tweets_w2v

#### Evaluationsfunktionen

##### Metriken

In [9]:
def get_predictions_bert(test_dataloader, model):
    predictions = []
    print("Prediction start.")
    total_t0 = time.time()
    batches_completed = 0

    for batch in test_dataloader:
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        with torch.no_grad():
            output = model(b_input_ids,
                           token_type_ids=None,
                           attention_mask=b_input_mask)
            logits = output.logits
            logits = logits.detach().cpu().numpy()
            pred_flat = np.argmax(logits, axis=1).flatten()

            predictions.extend(list(pred_flat))

        batches_completed += 1

    print("Total Prediction took {:} (h:mm:ss)".format(format_time(time.time() - total_t0)))
    return predictions

In [10]:
def add_to_eval_df(model_name, y_prediction, y_true, sample_size, df, dataset_name, threshold):
    accuracy = accuracy_score(y_true=y_true, y_pred=y_prediction)
    precision = precision_score(y_true, y_prediction)
    recall = recall_score(y_true, y_prediction)
    f1 = f1_score(y_true, y_prediction)
    mcc = matthews_corrcoef(y_true, y_prediction)

    df.loc[len(df.index)] = [model_name, dataset_name, sample_size, threshold, accuracy, precision, recall, f1, mcc]

In [11]:
def eval_model_bert(tokenizer, model, model_name, labels, tweets, batch_size, data_size, df, dataset_name, threshold,
                    max_len=None):
    if max_len is None:
        max_len = get_max_len(tweets, tokenizer)
    dataset = create_dataset(tokenizer=tokenizer, max_len=max_len, labels_data=labels,
                             tweets_data=tweets)
    dataloader = get_dataloader(batch_size=batch_size, sampler="sequential", dataset=dataset)
    predictions = get_predictions_bert(test_dataloader=dataloader, model=model)

    add_to_eval_df(
        model_name=model_name,
        y_prediction=predictions,
        y_true=labels,
        sample_size=data_size,
        df=df,
        dataset_name=dataset_name,
        threshold=threshold
    )

In [12]:
def eval_model_ensemble(vectorizer, model, model_name, labels, tweets, data_size, df, dataset_name, threshold):
    X_vectorized = vectorizer.transform(tweets)
    predictions = (model.predict_proba(X_vectorized)[:, 1] >= threshold)
    # predictions = model.predict(X_vectorized)

    add_to_eval_df(
        model_name=model_name,
        y_prediction=predictions,
        y_true=labels,
        sample_size=data_size,
        df=df,
        dataset_name=dataset_name,
        threshold=threshold
    )

In [13]:
def eval_model_bayes(vectorizer, model, model_name, labels, tweets, data_size, df, dataset_name, threshold):
    X_vectorized = vectorize_w2v(tweets, vectorizer)
    predictions = (model.predict_proba(X_vectorized)[:, 1] >= threshold)
    # predictions = model.predict(X_vectorized)

    add_to_eval_df(
        model_name=model_name,
        y_prediction=predictions,
        y_true=labels,
        sample_size=data_size,
        df=df,
        dataset_name=dataset_name,
        threshold=threshold
    )

In [14]:
def eval_model_svm(vectorizer, model, model_name, labels, tweets, data_size, df, dataset_name, threshold):
    X_vectorized = vectorizer.transform(tweets)
    predictions = model.predict(X_vectorized)

    add_to_eval_df(
        model_name=model_name,
        y_prediction=predictions,
        y_true=labels,
        sample_size=data_size,
        df=df,
        dataset_name=dataset_name,
        threshold=threshold
    )

In [15]:
def eval_model_gru(tokenizer, model, model_name, labels, tweets, data_size, df, dataset_name, threshold, max_len):
    X_data = [str(x) for x in tweets]
    tweets_seq = tokenizer.texts_to_sequences(X_data)
    tweets_padded = pad_sequences(tweets_seq, padding='post', maxlen=max_len)
    predictions = (model.predict(tweets_padded) > threshold).astype(int)

    add_to_eval_df(
        model_name=model_name,
        y_prediction=predictions,
        y_true=labels,
        sample_size=data_size,
        df=df,
        dataset_name=dataset_name,
        threshold=threshold
    )

##### Wahrscheinlichkeiten

In [192]:
def get_predictions_with_probabilities_bert(test_dataloader, model, threshold):
    predictions = []
    predictions_proba = []
    print("Prediction start.")
    total_t0 = time.time()
    batches_completed = 0

    for batch in test_dataloader:
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        with torch.no_grad():
            output = model(b_input_ids,
                           token_type_ids=None,
                           attention_mask=b_input_mask)
            logits = output.logits
            logits = logits.detach().cpu().numpy()
            pred_flat = np.argmax(logits, axis=1).flatten()

            pred_with_thresh = [1 if value[1] > threshold else 0 for value in output.logits.softmax(dim=-1).tolist()]
            pred_with_thresh = np.array(pred_with_thresh)

            predictions.extend(list(pred_with_thresh))
            predictions_proba.extend(output.logits.softmax(dim=-1).tolist())

        batches_completed += 1

    print("Prediction finished.")
    print("Total Prediction took {:} (h:mm:ss)".format(format_time(time.time() - total_t0)))
    return predictions, predictions_proba

In [180]:
def add_to_detail_eval_df(dataframe, predictions, probabilities=None):
    if probabilities is not None:
        dataframe["zero_proba"] = pd.Series((v[0] for v in probabilities), index=dataframe.index)
        dataframe["one_proba"] = pd.Series((v[1] for v in probabilities), index=dataframe.index)
    else:
        dataframe["zero_proba"] = 0
        dataframe["one_proba"] = 0
    dataframe["pred"] = predictions

    return dataframe

In [181]:
def add_pred_and_probas_bert(model, tokenizer, data, label_name, tweet_name, batch_size, threshold):
    x_data = data[tweet_name].values
    y_data = data[label_name].values
    max_len = get_max_len(x_data, tokenizer)
    test_dataset = create_dataset(tokenizer=tokenizer, max_len=max_len, labels_data=y_data,
                                  tweets_data=x_data)
    test_dataloader = get_dataloader(batch_size=batch_size, sampler="sequential", dataset=test_dataset)

    predictions, predictions_proba = get_predictions_with_probabilities_bert(test_dataloader=test_dataloader,
                                                                             model=model, threshold=threshold)

    add_to_detail_eval_df(dataframe=data,
                          predictions=predictions,
                          probabilities=predictions_proba)

In [182]:
def add_pred_and_probas_ensemble(model, vectorizer, data, tweet_name, threshold):
    x_data = data[tweet_name].values

    X_vectorized = vectorizer.transform(x_data)
    probabilities = model.predict_proba(X_vectorized)
    predictions = (probabilities[:, 1] >= threshold).astype(int)

    add_to_detail_eval_df(dataframe=data,
                          predictions=predictions,
                          probabilities=probabilities)

    return data

In [183]:
def add_pred_and_probas_nb(model, vectorizer, data, tweet_name, threshold):
    x_data = data[tweet_name]

    X_vectorized = vectorize_w2v(x_data, vectorizer)
    probabilities = model.predict_proba(X_vectorized)
    predictions = (probabilities[:, 1] >= threshold).astype(int)

    add_to_detail_eval_df(dataframe=data,
                          predictions=predictions,
                          probabilities=probabilities)

    return data

In [184]:
def add_pred_and_probas_svm(model, vectorizer, data, tweet_name, threshold):
    x_data = data[tweet_name].values

    X_vectorized = vectorizer.transform(x_data)
    predictions = model.predict(X_vectorized)

    add_to_detail_eval_df(dataframe=data,
                          predictions=predictions)

    return data

In [185]:
def add_pred_and_probas_gru(model, tokenizer, data, tweet_name, threshold, max_len):
    x_data = data[tweet_name].values

    X_data = [str(x) for x in x_data]
    tweets_seq = tokenizer.texts_to_sequences(X_data)
    tweets_padded = pad_sequences(tweets_seq, padding='post', maxlen=max_len)

    probabilities = model.predict(tweets_padded)
    predictions = (probabilities > threshold).astype(int)

    new_array = []
    for proba in probabilities:
        complement = 1 - proba[0]
        new_array.append([complement, proba[0]])
    probabilities = pd.Series(new_array)

    add_to_detail_eval_df(dataframe=data,
                          predictions=predictions,
                          probabilities=probabilities)

    return data

### 1. Modelle

In [16]:
MODELS_PATH = "../../webapp/backend/app/models"

#### 1.1 Bayes

In [17]:
nb_vectorizer = joblib.load(MODELS_PATH + "/bayes/vectorizer_w2v.joblib")
nb_model = joblib.load(MODELS_PATH + "/bayes/model_nb_w2v.joblib")
nb_modelname = "GaussianNB"

#### 1.2 Ensemble

In [18]:
ensemble_vectorizer = joblib.load(MODELS_PATH + "/ensemble/tfidf_vectorizer_for_brf.joblib")
ensemble_model = joblib.load(MODELS_PATH + "/ensemble/tfidf_balancedrandomforest.joblib")
ensemble_modelname = "BalancedRandomForest"

#### 1.3 SVM

In [19]:
svm_vectorizer = joblib.load(MODELS_PATH + "/svm/tfidf_vectorizer_svm.joblib")
svm_model = joblib.load(MODELS_PATH + "/svm/model_svc.joblib")
svm_modelname = "LinearSVC"

#### 1.4 RNN-GRU

In [20]:
max_len_gru = 40
gru_model = load_model(MODELS_PATH + "/gru/gru-model_mixed-dataset.keras")
with open(MODELS_PATH + "/gru/tokenizer_mixed-dataset.pkl", 'rb') as f:
    gru_tokenizer = pickle.load(f)
gru_modelname = "RNN-GRU"

#### 1.5 RNN-LSTM

In [21]:
# todo

#### 1.6 BERT

In [22]:
PATH_BERT_TUNED = MODELS_PATH + "/bert/bert_mixed_imran"
bert_tokenizer = BertTokenizer.from_pretrained(PATH_BERT_TUNED, local_files_only=True)
bert_model = AutoModelForSequenceClassification.from_pretrained(PATH_BERT_TUNED, local_files_only=True)
bert_modelname = "bert_tuned_uncased"

#### 1.7 RoBERTa

In [23]:
PATH_ROBERTA_TUNED = MODELS_PATH + "/roberta/roberta_hate_mixed_cleaned"
roberta_tokenizer = AutoTokenizer.from_pretrained(PATH_ROBERTA_TUNED, local_files_only=True)
roberta_model = AutoModelForSequenceClassification.from_pretrained(PATH_ROBERTA_TUNED, local_files_only=True)
roberta_modelname = "roberta_hate_mixed_cleaned"

### 2. Daten

#### 2.1 Mixed-Train (cleaned)

In [24]:
df_mixed_train_cleaned = pd.read_csv("../../data/mixed_dataset/train_cleaned.csv", index_col=0)
df_mixed_train_cleaned = df_mixed_train_cleaned[df_mixed_train_cleaned.tweet_cleaned.notna()]
df_mixed_train_cleaned = df_mixed_train_cleaned.sample(n=100)

df_mixed_train_cleaned = df_mixed_train_cleaned[["label", "tweet_cleaned"]]
df_mixed_train_cleaned.head(1)

Unnamed: 0,label,tweet_cleaned
165527,0,really go crazy guy sometimes feel sorry idiot...


#### 2.2 Mixed-Train (uncleaned)

In [25]:
df_mixed_train_uncleaned = pd.read_csv("../../data/mixed_dataset/train_cleaned.csv", index_col=0)
df_mixed_train_uncleaned = df_mixed_train_uncleaned[df_mixed_train_uncleaned.tweet.notna()]
df_mixed_train_uncleaned = df_mixed_train_uncleaned.sample(n=100)

df_mixed_train_uncleaned = df_mixed_train_uncleaned[["label", "tweet"]]
print(df_mixed_train_uncleaned.head(1))
print("Daten: \t", len(df_mixed_train_uncleaned))

       label                                              tweet
21553      0   @user i can't believe how big my sons thing i...
Daten: 	 100


#### 2.3 Mixed-Test (cleaned)

In [26]:
df_mixed_test_cleaned = pd.read_csv("../../data/mixed_dataset/test_cleaned.csv", index_col=0)
df_mixed_test_cleaned = df_mixed_test_cleaned[df_mixed_test_cleaned.tweet_cleaned.notna()]
df_mixed_test_cleaned = df_mixed_test_cleaned.sample(n=100)

df_mixed_test_cleaned = df_mixed_test_cleaned[["label", "tweet_cleaned"]]
print(df_mixed_test_cleaned.head(1))
print("Daten: \t", len(df_mixed_test_cleaned))

       label                       tweet_cleaned
87977      0  right bi girl love see tit ashamed
Daten: 	 100


#### 2.4 Mixed-Test (cleaned-RNN)

In [27]:
df_mixed_test_cleaned_rnn = pd.read_csv("../../data/mixed_dataset/test_cleaned_rnn.csv", index_col=0)
df_mixed_test_cleaned_rnn = df_mixed_test_cleaned_rnn[df_mixed_test_cleaned_rnn.tweet_cleaned.notna()]
df_mixed_test_cleaned_rnn = df_mixed_test_cleaned_rnn.sample(n=100)

df_mixed_test_cleaned_rnn = df_mixed_test_cleaned_rnn[["label", "tweet_cleaned"]]
print(df_mixed_test_cleaned_rnn.head(1))
print("Daten: \t", len(df_mixed_test_cleaned_rnn))

        label tweet_cleaned
116414      0    bitch boys
Daten: 	 100


#### 2.5 Mixed-Test (uncleaned)

In [28]:
df_mixed_test_uncleaned = pd.read_csv("../../data/mixed_dataset/test_cleaned.csv", index_col=0)
df_mixed_test_uncleaned = df_mixed_test_uncleaned[df_mixed_test_uncleaned.tweet.notna()]
df_mixed_test_uncleaned = df_mixed_test_uncleaned.sample(n=100)

df_mixed_test_uncleaned = df_mixed_test_uncleaned[["label", "tweet", ]]
print(df_mixed_test_uncleaned.head(1))
print("Daten: \t", len(df_mixed_test_uncleaned))

       label                                              tweet
50893      0  OMG oh yes I want borrow your Girl and I would...
Daten: 	 100


#### 2.6 Mixed-Test (manual-labeled, uncleaned)

In [166]:
df_mixed_manual_uncleaned = pd.read_csv("../../data/manual_labeled/manual_labeled_group.csv", index_col=0)
df_mixed_manual_uncleaned = df_mixed_manual_uncleaned[df_mixed_manual_uncleaned.tweet.notna()]

df_mixed_manual_uncleaned = df_mixed_manual_uncleaned[["label_manual", "tweet", ]]
print(df_mixed_manual_uncleaned.head(1))
print("Daten: \t", len(df_mixed_manual_uncleaned))

   label_manual                                  tweet
0             1  Post the shit or kill yourself faggot
Daten: 	 400


### 3. Allgemeine Evaluation - Metriken

In [33]:
def evaluate(label_col, tweet_col, data, dataset_name, threshold):
    evaluation_df = pd.DataFrame(
        columns=["model", "dataset_name", "sample_size", "threshold", "accuracy", "precision", "recall", "f1_score",
                 "mcc"])
    eval_model_ensemble(vectorizer=ensemble_vectorizer,
                        model=ensemble_model,
                        model_name=ensemble_modelname,
                        labels=data[label_col],
                        tweets=data[tweet_col],
                        data_size=len(data),
                        df=evaluation_df,
                        dataset_name=dataset_name,
                        threshold=threshold)

    eval_model_bayes(vectorizer=nb_vectorizer,
                     model=nb_model,
                     model_name=nb_modelname,
                     labels=data[label_col],
                     tweets=data[tweet_col],
                     data_size=len(data),
                     df=evaluation_df,
                     dataset_name=dataset_name,
                     threshold=threshold)

    eval_model_svm(vectorizer=svm_vectorizer,
                   model=svm_model,
                   model_name=svm_modelname,
                   labels=data[label_col],
                   tweets=data[tweet_col],
                   data_size=len(data),
                   df=evaluation_df,
                   dataset_name=dataset_name,
                   threshold="None")

    # LSTM todo

    eval_model_gru(tokenizer=gru_tokenizer,
                   model=gru_model,
                   model_name=gru_modelname,
                   labels=data[label_col],
                   tweets=data[tweet_col],
                   data_size=len(data),
                   df=evaluation_df,
                   dataset_name=dataset_name,
                   threshold=threshold,
                   max_len=40)

    eval_model_bert(tokenizer=bert_tokenizer,
                    model=bert_model,
                    model_name=bert_modelname,
                    labels=data[label_col].values,
                    tweets=data[tweet_col].values,
                    batch_size=16,
                    data_size=len(data),
                    df=evaluation_df,
                    dataset_name=dataset_name,
                    threshold=threshold)

    eval_model_bert(tokenizer=roberta_tokenizer,
                    model=roberta_model,
                    model_name=roberta_modelname,
                    labels=data[label_col].values,
                    tweets=data[tweet_col].values,
                    batch_size=16,
                    data_size=len(data),
                    df=evaluation_df,
                    dataset_name=dataset_name,
                    threshold=threshold)

    return evaluation_df

#### 3.1 Alle Datensätze - Schwellwert 0.5

##### 3.1.1 Mixed-Train (cleaned) - df_mixed_train_cleaned

In [34]:
evaluation_mixed_train_cleaned = evaluate(label_col="label",
                                          tweet_col="tweet_cleaned",
                                          data=df_mixed_train_cleaned,
                                          dataset_name="df_mixed_train_cleaned",
                                          threshold=0.5,
                                          )

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
Prediction start.
Total Prediction took 0:00:15 (h:mm:ss)
Prediction start.
Total Prediction took 0:00:13 (h:mm:ss)


In [35]:
evaluation_mixed_train_cleaned.sort_values(by=["f1_score"], ascending=False)

Unnamed: 0,model,dataset_name,sample_size,threshold,accuracy,precision,recall,f1_score,mcc
4,bert_tuned_uncased,df_mixed_train_cleaned,100,0.5,0.94,0.764706,0.866667,0.8125,0.779108
5,roberta_hate_mixed_cleaned,df_mixed_train_cleaned,100,0.5,0.93,0.722222,0.866667,0.787879,0.750825
0,BalancedRandomForest,df_mixed_train_cleaned,100,0.5,0.78,0.393939,0.866667,0.541667,0.479453
3,RNN-GRU,df_mixed_train_cleaned,100,0.5,0.77,0.382353,0.866667,0.530612,0.467047
2,LinearSVC,df_mixed_train_cleaned,100,,0.71,0.333333,0.933333,0.491228,0.436915
1,GaussianNB,df_mixed_train_cleaned,100,0.5,0.59,0.229167,0.733333,0.349206,0.213013


##### 3.1.2 Mixed-Train (uncleaned) - df_mixed_train_uncleaned

In [36]:
evaluation_mixed_train_uncleaned = evaluate(label_col="label",
                                            tweet_col="tweet",
                                            data=df_mixed_train_uncleaned,
                                            dataset_name="df_mixed_train_uncleaned",
                                            threshold=0.5)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
Prediction start.
Total Prediction took 0:00:15 (h:mm:ss)
Prediction start.
Total Prediction took 0:00:15 (h:mm:ss)


In [37]:
evaluation_mixed_train_uncleaned.sort_values(by=["f1_score"], ascending=False)

Unnamed: 0,model,dataset_name,sample_size,threshold,accuracy,precision,recall,f1_score,mcc
3,RNN-GRU,df_mixed_train_uncleaned,100,0.5,0.93,0.75,0.947368,0.837209,0.802172
4,bert_tuned_uncased,df_mixed_train_uncleaned,100,0.5,0.88,0.64,0.842105,0.727273,0.662266
5,roberta_hate_mixed_cleaned,df_mixed_train_uncleaned,100,0.5,0.88,0.652174,0.789474,0.714286,0.64388
2,LinearSVC,df_mixed_train_uncleaned,100,,0.84,0.555556,0.789474,0.652174,0.566702
0,BalancedRandomForest,df_mixed_train_uncleaned,100,0.5,0.85,0.583333,0.736842,0.651163,0.56343
1,GaussianNB,df_mixed_train_uncleaned,100,0.5,0.7,0.351351,0.684211,0.464286,0.315198


##### 3.1.3 Mixed-Test (cleaned) - df_mixed_test_cleaned

In [38]:
evaluation_mixed_test_cleaned = evaluate(label_col="label",
                                         tweet_col="tweet_cleaned",
                                         data=df_mixed_test_cleaned,
                                         dataset_name="df_mixed_test_cleaned",
                                         threshold=0.5)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
Prediction start.
Total Prediction took 0:00:15 (h:mm:ss)
Prediction start.
Total Prediction took 0:00:14 (h:mm:ss)


In [39]:
evaluation_mixed_test_cleaned.sort_values(by=["f1_score"], ascending=False)

Unnamed: 0,model,dataset_name,sample_size,threshold,accuracy,precision,recall,f1_score,mcc
4,bert_tuned_uncased,df_mixed_test_cleaned,100,0.5,0.84,0.55,0.611111,0.578947,0.481536
5,roberta_hate_mixed_cleaned,df_mixed_test_cleaned,100,0.5,0.85,0.6,0.5,0.545455,0.459243
3,RNN-GRU,df_mixed_test_cleaned,100,0.5,0.71,0.342857,0.666667,0.45283,0.311058
0,BalancedRandomForest,df_mixed_test_cleaned,100,0.5,0.73,0.354839,0.611111,0.44898,0.305036
2,LinearSVC,df_mixed_test_cleaned,100,,0.68,0.325,0.722222,0.448276,0.308162
1,GaussianNB,df_mixed_test_cleaned,100,0.5,0.55,0.271186,0.888889,0.415584,0.284722


##### 3.1.4 Mixed-Test (cleaned-RNN) - df_mixed_test_cleaned_rnn

In [40]:
evaluation_mixed_test_cleaned_rnn = evaluate(label_col="label",
                                             tweet_col="tweet_cleaned",
                                             data=df_mixed_test_cleaned_rnn,
                                             dataset_name="df_mixed_test_cleaned_rnn",
                                             threshold=0.5)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
Prediction start.
Total Prediction took 0:00:14 (h:mm:ss)
Prediction start.
Total Prediction took 0:00:14 (h:mm:ss)


In [41]:
evaluation_mixed_test_cleaned_rnn.sort_values(by=["f1_score"], ascending=False)

Unnamed: 0,model,dataset_name,sample_size,threshold,accuracy,precision,recall,f1_score,mcc
0,BalancedRandomForest,df_mixed_test_cleaned_rnn,100,0.5,0.88,0.625,0.833333,0.714286,0.650902
2,LinearSVC,df_mixed_test_cleaned_rnn,100,,0.86,0.571429,0.888889,0.695652,0.635363
4,bert_tuned_uncased,df_mixed_test_cleaned_rnn,100,0.5,0.85,0.555556,0.833333,0.666667,0.594499
5,roberta_hate_mixed_cleaned,df_mixed_test_cleaned_rnn,100,0.5,0.85,0.56,0.777778,0.651163,0.571057
3,RNN-GRU,df_mixed_test_cleaned_rnn,100,0.5,0.8,0.466667,0.777778,0.583333,0.488479
1,GaussianNB,df_mixed_test_cleaned_rnn,100,0.5,0.63,0.297872,0.777778,0.430769,0.288921


##### 3.1.5 Mixed-Test (uncleaned) - df_mixed_test_uncleaned

In [42]:
evaluation_mixed_test_uncleaned = evaluate(label_col="label",
                                           tweet_col="tweet",
                                           data=df_mixed_test_uncleaned,
                                           dataset_name="df_mixed_test_uncleaned",
                                           threshold=0.5)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
Prediction start.
Total Prediction took 0:00:14 (h:mm:ss)
Prediction start.
Total Prediction took 0:00:14 (h:mm:ss)


In [43]:
evaluation_mixed_test_uncleaned.sort_values(by=["f1_score"], ascending=False)

Unnamed: 0,model,dataset_name,sample_size,threshold,accuracy,precision,recall,f1_score,mcc
4,bert_tuned_uncased,df_mixed_test_uncleaned,100,0.5,0.79,0.4,0.8,0.533333,0.458349
5,roberta_hate_mixed_cleaned,df_mixed_test_uncleaned,100,0.5,0.78,0.37931,0.733333,0.5,0.410429
0,BalancedRandomForest,df_mixed_test_uncleaned,100,0.5,0.78,0.36,0.6,0.45,0.33955
2,LinearSVC,df_mixed_test_uncleaned,100,,0.72,0.314286,0.733333,0.44,0.337615
3,RNN-GRU,df_mixed_test_uncleaned,100,0.5,0.75,0.321429,0.6,0.418605,0.299392
1,GaussianNB,df_mixed_test_uncleaned,100,0.5,0.65,0.205882,0.466667,0.285714,0.112328


##### 3.1.6 Mixed-Test (manual-labeled, uncleaned) - df_mixed_manual_uncleaned

In [44]:
evaluation_mixed_manual_uncleaned = evaluate(label_col="label_manual",
                                             tweet_col="tweet",
                                             data=df_mixed_manual_uncleaned,
                                             dataset_name="df_mixed_manual_uncleaned",
                                             threshold=0.5)

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Prediction start.
Total Prediction took 0:00:55 (h:mm:ss)
Prediction start.
Total Prediction took 0:00:55 (h:mm:ss)


In [45]:
evaluation_mixed_manual_uncleaned.sort_values(by=["f1_score"], ascending=False)

Unnamed: 0,model,dataset_name,sample_size,threshold,accuracy,precision,recall,f1_score,mcc
5,roberta_hate_mixed_cleaned,df_mixed_manual_uncleaned,400,0.5,0.885,0.914439,0.850746,0.881443,0.771967
2,LinearSVC,df_mixed_manual_uncleaned,400,,0.8325,0.828431,0.840796,0.834568,0.665041
4,bert_tuned_uncased,df_mixed_manual_uncleaned,400,0.5,0.82,0.824121,0.81592,0.82,0.640041
3,RNN-GRU,df_mixed_manual_uncleaned,400,0.5,0.815,0.862857,0.751244,0.803191,0.635618
0,BalancedRandomForest,df_mixed_manual_uncleaned,400,0.5,0.7975,0.837079,0.741294,0.78628,0.599194
1,GaussianNB,df_mixed_manual_uncleaned,400,0.5,0.6925,0.682243,0.726368,0.703614,0.385601


#### 3.2 Alle Datensätze - Schwellwert 0.35

##### 3.2.1 Mixed-Train (cleaned) - df_mixed_train_cleaned

In [46]:
evaluation_mixed_train_cleaned2 = evaluate(label_col="label",
                                           tweet_col="tweet_cleaned",
                                           data=df_mixed_train_cleaned,
                                           dataset_name="df_mixed_train_cleaned",
                                           threshold=0.35,
                                           )

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
Prediction start.
Total Prediction took 0:00:15 (h:mm:ss)
Prediction start.
Total Prediction took 0:00:14 (h:mm:ss)


In [47]:
evaluation_mixed_train_cleaned2.sort_values(by=["f1_score"], ascending=False)

Unnamed: 0,model,dataset_name,sample_size,threshold,accuracy,precision,recall,f1_score,mcc
4,bert_tuned_uncased,df_mixed_train_cleaned,100,0.35,0.94,0.764706,0.866667,0.8125,0.779108
5,roberta_hate_mixed_cleaned,df_mixed_train_cleaned,100,0.35,0.93,0.722222,0.866667,0.787879,0.750825
0,BalancedRandomForest,df_mixed_train_cleaned,100,0.35,0.71,0.333333,0.933333,0.491228,0.436915
2,LinearSVC,df_mixed_train_cleaned,100,,0.71,0.333333,0.933333,0.491228,0.436915
3,RNN-GRU,df_mixed_train_cleaned,100,0.35,0.72,0.333333,0.866667,0.481481,0.410538
1,GaussianNB,df_mixed_train_cleaned,100,0.35,0.58,0.22449,0.733333,0.34375,0.204482


##### 3.2.2 Mixed-Train (uncleaned) - df_mixed_train_uncleaned

In [48]:
evaluation_mixed_train_uncleaned2 = evaluate(label_col="label",
                                             tweet_col="tweet",
                                             data=df_mixed_train_uncleaned,
                                             dataset_name="df_mixed_train_uncleaned",
                                             threshold=0.35)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
Prediction start.
Total Prediction took 0:00:14 (h:mm:ss)
Prediction start.
Total Prediction took 0:00:14 (h:mm:ss)


In [49]:
evaluation_mixed_train_uncleaned2.sort_values(by=["f1_score"], ascending=False)

Unnamed: 0,model,dataset_name,sample_size,threshold,accuracy,precision,recall,f1_score,mcc
3,RNN-GRU,df_mixed_train_uncleaned,100,0.35,0.89,0.642857,0.947368,0.765957,0.719871
4,bert_tuned_uncased,df_mixed_train_uncleaned,100,0.35,0.88,0.64,0.842105,0.727273,0.662266
5,roberta_hate_mixed_cleaned,df_mixed_train_uncleaned,100,0.35,0.88,0.652174,0.789474,0.714286,0.64388
2,LinearSVC,df_mixed_train_uncleaned,100,,0.84,0.555556,0.789474,0.652174,0.566702
0,BalancedRandomForest,df_mixed_train_uncleaned,100,0.35,0.82,0.516129,0.842105,0.64,0.55722
1,GaussianNB,df_mixed_train_uncleaned,100,0.35,0.7,0.351351,0.684211,0.464286,0.315198


##### 3.2.3 Mixed-Test (cleaned) - df_mixed_test_cleaned

In [50]:
evaluation_mixed_test_cleaned2 = evaluate(label_col="label",
                                          tweet_col="tweet_cleaned",
                                          data=df_mixed_test_cleaned,
                                          dataset_name="df_mixed_test_cleaned",
                                          threshold=0.35)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
Prediction start.
Total Prediction took 0:00:15 (h:mm:ss)
Prediction start.
Total Prediction took 0:00:15 (h:mm:ss)


In [51]:
evaluation_mixed_test_cleaned2.sort_values(by=["f1_score"], ascending=False)

Unnamed: 0,model,dataset_name,sample_size,threshold,accuracy,precision,recall,f1_score,mcc
4,bert_tuned_uncased,df_mixed_test_cleaned,100,0.35,0.84,0.55,0.611111,0.578947,0.481536
5,roberta_hate_mixed_cleaned,df_mixed_test_cleaned,100,0.35,0.85,0.6,0.5,0.545455,0.459243
3,RNN-GRU,df_mixed_test_cleaned,100,0.35,0.69,0.333333,0.722222,0.45614,0.319125
2,LinearSVC,df_mixed_test_cleaned,100,,0.68,0.325,0.722222,0.448276,0.308162
0,BalancedRandomForest,df_mixed_test_cleaned,100,0.35,0.59,0.27451,0.777778,0.405797,0.250969
1,GaussianNB,df_mixed_test_cleaned,100,0.35,0.53,0.262295,0.888889,0.405063,0.267894


##### 3.2.4 Mixed-Test (cleaned-RNN) - df_mixed_test_cleaned_rnn

In [52]:
evaluation_mixed_test_cleaned_rnn2 = evaluate(label_col="label",
                                              tweet_col="tweet_cleaned",
                                              data=df_mixed_test_cleaned_rnn,
                                              dataset_name="df_mixed_test_cleaned_rnn",
                                              threshold=0.35)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
Prediction start.
Total Prediction took 0:00:15 (h:mm:ss)
Prediction start.
Total Prediction took 0:00:14 (h:mm:ss)


In [53]:
evaluation_mixed_test_cleaned_rnn2.sort_values(by=["f1_score"], ascending=False)

Unnamed: 0,model,dataset_name,sample_size,threshold,accuracy,precision,recall,f1_score,mcc
2,LinearSVC,df_mixed_test_cleaned_rnn,100,,0.86,0.571429,0.888889,0.695652,0.635363
4,bert_tuned_uncased,df_mixed_test_cleaned_rnn,100,0.35,0.85,0.555556,0.833333,0.666667,0.594499
5,roberta_hate_mixed_cleaned,df_mixed_test_cleaned_rnn,100,0.35,0.85,0.56,0.777778,0.651163,0.571057
0,BalancedRandomForest,df_mixed_test_cleaned_rnn,100,0.35,0.81,0.485714,0.944444,0.641509,0.583915
3,RNN-GRU,df_mixed_test_cleaned_rnn,100,0.35,0.78,0.441176,0.833333,0.576923,0.487931
1,GaussianNB,df_mixed_test_cleaned_rnn,100,0.35,0.62,0.3,0.833333,0.441176,0.312348


##### 3.2.5 Mixed-Test (uncleaned) - df_mixed_test_uncleaned

In [54]:
evaluation_mixed_test_uncleaned2 = evaluate(label_col="label",
                                            tweet_col="tweet",
                                            data=df_mixed_test_uncleaned,
                                            dataset_name="df_mixed_test_uncleaned",
                                            threshold=0.35)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
Prediction start.
Total Prediction took 0:00:14 (h:mm:ss)
Prediction start.
Total Prediction took 0:00:15 (h:mm:ss)


In [55]:
evaluation_mixed_test_uncleaned2.sort_values(by=["f1_score"], ascending=False)

Unnamed: 0,model,dataset_name,sample_size,threshold,accuracy,precision,recall,f1_score,mcc
4,bert_tuned_uncased,df_mixed_test_uncleaned,100,0.35,0.79,0.4,0.8,0.533333,0.458349
5,roberta_hate_mixed_cleaned,df_mixed_test_uncleaned,100,0.35,0.78,0.37931,0.733333,0.5,0.410429
3,RNN-GRU,df_mixed_test_uncleaned,100,0.35,0.75,0.34375,0.733333,0.468085,0.372226
2,LinearSVC,df_mixed_test_uncleaned,100,,0.72,0.314286,0.733333,0.44,0.337615
0,BalancedRandomForest,df_mixed_test_uncleaned,100,0.35,0.66,0.268293,0.733333,0.392857,0.276165
1,GaussianNB,df_mixed_test_uncleaned,100,0.35,0.61,0.184211,0.466667,0.264151,0.075007


##### 3.2.6 Mixed-Test (manual-labeled, uncleaned) - df_mixed_manual_uncleaned

In [56]:
evaluation_mixed_manual_uncleaned2 = evaluate(label_col="label_manual",
                                              tweet_col="tweet",
                                              data=df_mixed_manual_uncleaned,
                                              dataset_name="df_mixed_manual_uncleaned",
                                              threshold=0.35)

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Prediction start.
Total Prediction took 0:00:52 (h:mm:ss)
Prediction start.
Total Prediction took 0:00:56 (h:mm:ss)


In [57]:
evaluation_mixed_manual_uncleaned2.sort_values(by=["f1_score"], ascending=False)

Unnamed: 0,model,dataset_name,sample_size,threshold,accuracy,precision,recall,f1_score,mcc
5,roberta_hate_mixed_cleaned,df_mixed_manual_uncleaned,400,0.35,0.885,0.914439,0.850746,0.881443,0.771967
2,LinearSVC,df_mixed_manual_uncleaned,400,,0.8325,0.828431,0.840796,0.834568,0.665041
4,bert_tuned_uncased,df_mixed_manual_uncleaned,400,0.35,0.82,0.824121,0.81592,0.82,0.640041
0,BalancedRandomForest,df_mixed_manual_uncleaned,400,0.35,0.8025,0.777273,0.850746,0.812352,0.607553
3,RNN-GRU,df_mixed_manual_uncleaned,400,0.35,0.79,0.806283,0.766169,0.785714,0.580821
1,GaussianNB,df_mixed_manual_uncleaned,400,0.35,0.6875,0.669643,0.746269,0.705882,0.37713


### 4. Detaillierte Evaluation - Tweetbetrachtung

Fragen:
- gibt es Tweets, die von keinem Modell korrekt klassifiziert werden können? bzw. nur von wenigen? was haben diese Tweets gemeinsam?
- gibt es Tweets, die von allen Modellen korrekt klassifiziert werden können? bzw. von vielen? was haben diese Tweets gemeinsam?
-


#### 4.1 manuell gelabelte Tweets - Schwellwert 0.5

In [195]:
merged_false = df_mixed_manual_uncleaned.copy()

In [196]:
add_pred_and_probas_ensemble(model=ensemble_model,
                             vectorizer=ensemble_vectorizer,
                             data=merged_false,
                             tweet_name="tweet",
                             threshold=0.5)
merged_false.rename(columns={'zero_proba': '0_ens',
                             'one_proba': '1_ens',
                             'pred': 'pred_ens'}, inplace=True)

In [197]:
add_pred_and_probas_nb(model=nb_model,
                       vectorizer=nb_vectorizer,
                       data=merged_false,
                       tweet_name="tweet",
                       threshold=0.5)
merged_false.rename(columns={'zero_proba': '0_nb',
                             'one_proba': '1_nb',
                             'pred': 'pred_nb'}, inplace=True)

In [198]:
add_pred_and_probas_svm(model=svm_model,
                        vectorizer=svm_vectorizer,
                        data=merged_false,
                        tweet_name="tweet",
                        threshold=0.5)
merged_false.rename(columns={'zero_proba': '0_svm',
                             'one_proba': '1_svm',
                             'pred': 'pred_svm'}, inplace=True)

In [199]:
# LSTM todo

In [200]:
add_pred_and_probas_gru(model=gru_model,
                        tokenizer=gru_tokenizer,
                        data=merged_false,
                        tweet_name="tweet",
                        threshold=0.5,
                        max_len=40)
merged_false.rename(columns={'zero_proba': '0_gru',
                             'one_proba': '1_gru',
                             'pred': 'pred_gru'}, inplace=True)

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step


In [201]:
add_pred_and_probas_bert(model=bert_model,
                         tokenizer=bert_tokenizer,
                         data=merged_false,
                         label_name="label_manual",
                         tweet_name="tweet",
                         batch_size=16,
                         threshold=0.5)
merged_false.rename(columns={'zero_proba': '0_bert',
                             'one_proba': '1_bert',
                             'pred': 'pred_bert'}, inplace=True)

Prediction start.
Prediction finished.
Total Prediction took 0:00:59 (h:mm:ss)


In [202]:
add_pred_and_probas_bert(model=roberta_model,
                         tokenizer=roberta_tokenizer,
                         data=merged_false,
                         label_name="label_manual",
                         tweet_name="tweet",
                         batch_size=16,
                         threshold=0.5)
merged_false.rename(columns={'zero_proba': '0_rob',
                             'one_proba': '1_rob',
                             'pred': 'pred_rob'}, inplace=True)

Prediction start.
Prediction finished.
Total Prediction took 0:01:03 (h:mm:ss)


In [203]:
merged_false

Unnamed: 0,label_manual,tweet,0_ens,1_ens,pred_ens,0_nb,1_nb,pred_nb,0_svm,1_svm,pred_svm,0_gru,1_gru,pred_gru,0_bert,1_bert,pred_bert,0_rob,1_rob,pred_rob
0,1,Post the shit or kill yourself faggot,0.06,0.94,1,2.778007e-13,1.000000,1,0,0,1,0.011137,0.988863,1,0.180920,0.819080,1,0.192266,0.807734,1
2,1,i hate jews so much i wish the holocaust actua...,0.16,0.84,1,9.101217e-02,0.908988,1,0,0,1,0.195016,0.804984,1,0.512623,0.487377,0,0.023317,0.976683,1
3,1,Poor kid would have been better off with his h...,0.48,0.52,1,1.087948e-02,0.989121,1,0,0,1,0.036294,0.963706,1,0.157057,0.842943,1,0.221822,0.778178,1
10,1,"Close your fucking legs, hoe. Use birth contro...",0.60,0.40,0,7.472763e-08,1.000000,1,0,0,0,0.006292,0.993708,1,0.713035,0.286965,0,0.158761,0.841239,1
11,1,@user @user Please take away their license to ...,0.70,0.30,0,9.999939e-01,0.000006,0,0,0,1,0.997577,0.002423,0,0.865542,0.134458,0,0.961045,0.038955,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,1,wow men are fucking trash and disgusting,0.19,0.81,1,6.262303e-09,1.000000,1,0,0,1,0.903850,0.096150,0,0.523429,0.476571,0,0.428509,0.571491,1
96,1,I hope your an anal only slut. No one should h...,0.17,0.83,1,4.681266e-07,1.000000,1,0,0,1,0.012478,0.987522,1,0.265263,0.734737,1,0.052255,0.947745,1
97,1,do you have autism or something? why do you ta...,0.04,0.96,1,8.632649e-08,1.000000,1,0,0,1,0.019523,0.980477,1,0.475986,0.524014,1,0.047550,0.952450,1
98,1,Stupid Asian Bitch You're an idiot and you sho...,0.20,0.80,1,6.193825e-06,0.999994,1,0,0,1,0.011486,0.988514,1,0.222132,0.777868,1,0.057266,0.942734,1


In [216]:
all_false_merged = merged_false[
    (merged_false.pred_ens != merged_false.label_manual) &
    (merged_false.pred_nb != merged_false.label_manual) &
    (merged_false.pred_svm != merged_false.label_manual) &
    (merged_false.pred_gru != merged_false.label_manual) &
    (merged_false.pred_bert != merged_false.label_manual) &
    (merged_false.pred_rob != merged_false.label_manual)]

len(all_false_merged)

8

#### 4.2 manuell gelabelte Tweets - Schwellwert 0.35

In [207]:
merged_false2 = df_mixed_manual_uncleaned.copy()

In [208]:
add_pred_and_probas_ensemble(model=ensemble_model,
                             vectorizer=ensemble_vectorizer,
                             data=merged_false2,
                             tweet_name="tweet",
                             threshold=0.35)
merged_false2.rename(columns={'zero_proba': '0_ens',
                             'one_proba': '1_ens',
                             'pred': 'pred_ens'}, inplace=True)

In [209]:
add_pred_and_probas_nb(model=nb_model,
                       vectorizer=nb_vectorizer,
                       data=merged_false2,
                       tweet_name="tweet",
                       threshold=0.35)
merged_false2.rename(columns={'zero_proba': '0_nb',
                             'one_proba': '1_nb',
                             'pred': 'pred_nb'}, inplace=True)

In [210]:
add_pred_and_probas_svm(model=svm_model,
                        vectorizer=svm_vectorizer,
                        data=merged_false2,
                        tweet_name="tweet",
                        threshold=0.35)
merged_false2.rename(columns={'zero_proba': '0_svm',
                             'one_proba': '1_svm',
                             'pred': 'pred_svm'}, inplace=True)

In [211]:
# LSTM todo

In [212]:
add_pred_and_probas_gru(model=gru_model,
                        tokenizer=gru_tokenizer,
                        data=merged_false2,
                        tweet_name="tweet",
                        threshold=0.35,
                        max_len=40)
merged_false2.rename(columns={'zero_proba': '0_gru',
                             'one_proba': '1_gru',
                             'pred': 'pred_gru'}, inplace=True)

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step


In [213]:
add_pred_and_probas_bert(model=bert_model,
                         tokenizer=bert_tokenizer,
                         data=merged_false2,
                         label_name="label_manual",
                         tweet_name="tweet",
                         batch_size=16,
                         threshold=0.35)
merged_false2.rename(columns={'zero_proba': '0_bert',
                             'one_proba': '1_bert',
                             'pred': 'pred_bert'}, inplace=True)

Prediction start.
Prediction finished.
Total Prediction took 0:01:00 (h:mm:ss)


In [214]:
add_pred_and_probas_bert(model=roberta_model,
                         tokenizer=roberta_tokenizer,
                         data=merged_false2,
                         label_name="label_manual",
                         tweet_name="tweet",
                         batch_size=16,
                         threshold=0.35)
merged_false2.rename(columns={'zero_proba': '0_rob',
                             'one_proba': '1_rob',
                             'pred': 'pred_rob'}, inplace=True)

Prediction start.
Prediction finished.
Total Prediction took 0:00:56 (h:mm:ss)


In [215]:
merged_false2

Unnamed: 0,label_manual,tweet,0_ens,1_ens,pred_ens,0_nb,1_nb,pred_nb,0_svm,1_svm,pred_svm,0_gru,1_gru,pred_gru,0_bert,1_bert,pred_bert,0_rob,1_rob,pred_rob
0,1,Post the shit or kill yourself faggot,0.06,0.94,1,2.778007e-13,1.000000,1,0,0,1,0.011137,0.988863,1,0.180920,0.819080,1,0.192266,0.807734,1
2,1,i hate jews so much i wish the holocaust actua...,0.16,0.84,1,9.101217e-02,0.908988,1,0,0,1,0.195016,0.804984,1,0.512623,0.487377,1,0.023317,0.976683,1
3,1,Poor kid would have been better off with his h...,0.48,0.52,1,1.087948e-02,0.989121,1,0,0,1,0.036294,0.963706,1,0.157057,0.842943,1,0.221822,0.778178,1
10,1,"Close your fucking legs, hoe. Use birth contro...",0.60,0.40,1,7.472763e-08,1.000000,1,0,0,0,0.006292,0.993708,1,0.713035,0.286965,0,0.158761,0.841239,1
11,1,@user @user Please take away their license to ...,0.70,0.30,0,9.999939e-01,0.000006,0,0,0,1,0.997577,0.002423,0,0.865542,0.134458,0,0.961045,0.038955,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,1,wow men are fucking trash and disgusting,0.19,0.81,1,6.262303e-09,1.000000,1,0,0,1,0.903850,0.096150,0,0.523429,0.476571,1,0.428509,0.571491,1
96,1,I hope your an anal only slut. No one should h...,0.17,0.83,1,4.681266e-07,1.000000,1,0,0,1,0.012478,0.987522,1,0.265263,0.734737,1,0.052255,0.947745,1
97,1,do you have autism or something? why do you ta...,0.04,0.96,1,8.632649e-08,1.000000,1,0,0,1,0.019523,0.980477,1,0.475986,0.524014,1,0.047550,0.952450,1
98,1,Stupid Asian Bitch You're an idiot and you sho...,0.20,0.80,1,6.193825e-06,0.999994,1,0,0,1,0.011486,0.988514,1,0.222132,0.777868,1,0.057266,0.942734,1


In [217]:
all_false_merged2 = merged_false2[
    (merged_false2.pred_ens != merged_false2.label_manual) &
    (merged_false2.pred_nb != merged_false2.label_manual) &
    (merged_false2.pred_svm != merged_false2.label_manual) &
    (merged_false2.pred_gru != merged_false2.label_manual) &
    (merged_false2.pred_bert != merged_false2.label_manual) &
    (merged_false2.pred_rob != merged_false2.label_manual)]

len(all_false_merged2)

8