In [None]:
import joblib
import pickle

import time
import datetime
import numpy as np
import pandas as pd
from keras.src.saving import load_model
from transformers import AutoTokenizer, AutoModelForSequenceClassification, BertTokenizer
from nltk import word_tokenize
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from keras.src.utils import pad_sequences
from tensorflow.keras.preprocessing.text import tokenizer_from_json

from sklearn.metrics import recall_score, precision_score, accuracy_score, f1_score
from sklearn.metrics import matthews_corrcoef

In [None]:
import warnings

warnings.filterwarnings('ignore')

In [None]:
device = torch.device("cpu")
device

### Funktionen

#### Datenvorbereitung etc.

In [None]:
def create_dataset(tokenizer, max_len, labels_data, tweets_data):
    input_ids = []
    attention_masks = []

    for tweet in tweets_data:
        encoded_dict = tokenizer.encode_plus(
            tweet,
            add_special_tokens=True,
            max_length=256,
            pad_to_max_length=True,
            return_attention_mask=True,
            return_tensors='pt',
        )

        input_ids.append(encoded_dict['input_ids'])
        attention_masks.append(encoded_dict['attention_mask'])

    input_ids = torch.cat(input_ids, dim=0)
    attention_masks = torch.cat(attention_masks, dim=0)
    labels = torch.tensor(labels_data)
    dataset = TensorDataset(input_ids, attention_masks, labels)

    return dataset

In [None]:
def get_dataloader(batch_size, dataset, sampler):
    if sampler == "random":
        return DataLoader(
            dataset,
            sampler=RandomSampler(dataset),
            batch_size=batch_size
        )

    if sampler == "sequential":
        return DataLoader(
            dataset,
            sampler=SequentialSampler(dataset),
            batch_size=batch_size
        )

In [None]:
def get_max_len(tweets, tokenizer):
    max_len = 0

    for sent in tweets:
        input_ids = tokenizer.encode(sent, add_special_tokens=True)
        max_len = max(max_len, len(input_ids))
    return max_len

In [None]:
def format_time(elapsed):
    elapsed_rounded = int(round((elapsed)))
    return str(datetime.timedelta(seconds=elapsed_rounded))

In [None]:
def vectorize_w2v(tweets, loaded_vectorizer, vector_size=300):
    x_tokenized = tweets.map(word_tokenize)

    def w2v_vector(x_tokenized, vector_size):
        vec = np.zeros(vector_size).reshape((1, vector_size))
        count = 0
        for word in x_tokenized:
            try:
                vec += loaded_vectorizer.wv[word].reshape((1, vector_size))
                count += 1
            except KeyError:

                continue
        if count != 0:
            vec /= count
        return vec

    tweets_w2v = np.zeros((len(x_tokenized), 300))
    for i in range(len(x_tokenized)):
        tweets_w2v[i, :] = w2v_vector(x_tokenized.iloc[i], 300)

    return tweets_w2v

In [None]:
def tokenize_and_vectorize_for_lstm(tokenizer, glove_embeddings, tweets, vector_size=200, max_seq_len=50):
    X_sequences = tokenizer.texts_to_sequences(tweets)
    X_padded = pad_sequences(X_sequences, padding='post', maxlen=max_seq_len)

    def get_glove_vectors(sequence, glove_embeddings, vector_size, max_seq_len):
        vectors = [glove_embeddings.get(tokenizer.index_word.get(idx, ''), np.zeros(vector_size)) for idx in sequence]

        # Padding sicherstellen
        if len(vectors) < max_seq_len:
            vectors.extend([np.zeros(vector_size)] * (max_seq_len - len(vectors)))
        else:
            vectors = vectors[:max_seq_len]

        return np.array(vectors, dtype=np.float32)

    X_vectors = np.array([get_glove_vectors(seq, glove_embeddings, vector_size, max_seq_len) for seq in X_padded],
                         dtype=np.float32)

    return X_vectors

#### Evaluationsfunktionen

##### Metriken

In [None]:
def get_predictions_bert(test_dataloader, model):
    predictions = []
    print("Prediction start.")
    total_t0 = time.time()
    batches_completed = 0

    for batch in test_dataloader:
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        with torch.no_grad():
            output = model(b_input_ids,
                           token_type_ids=None,
                           attention_mask=b_input_mask)
            logits = output.logits
            logits = logits.detach().cpu().numpy()
            pred_flat = np.argmax(logits, axis=1).flatten()

            predictions.extend(list(pred_flat))

        batches_completed += 1

    print("Total Prediction took {:} (h:mm:ss)".format(format_time(time.time() - total_t0)))
    return predictions

In [None]:
def add_to_eval_df(model_name, y_prediction, y_true, sample_size, df, dataset_name, threshold):
    accuracy = accuracy_score(y_true=y_true, y_pred=y_prediction)
    precision = precision_score(y_true, y_prediction)
    recall = recall_score(y_true, y_prediction)
    f1 = f1_score(y_true, y_prediction)
    mcc = matthews_corrcoef(y_true, y_prediction)

    df.loc[len(df.index)] = [model_name, dataset_name, sample_size, threshold, accuracy, precision, recall, f1, mcc]

In [None]:
def eval_model_bert(tokenizer, model, model_name, labels, tweets, batch_size, data_size, df, dataset_name, threshold,
                    max_len=None):
    if max_len is None:
        max_len = get_max_len(tweets, tokenizer)
    dataset = create_dataset(tokenizer=tokenizer, max_len=max_len, labels_data=labels,
                             tweets_data=tweets)
    dataloader = get_dataloader(batch_size=batch_size, sampler="sequential", dataset=dataset)
    predictions = get_predictions_bert(test_dataloader=dataloader, model=model)

    add_to_eval_df(
        model_name=model_name,
        y_prediction=predictions,
        y_true=labels,
        sample_size=data_size,
        df=df,
        dataset_name=dataset_name,
        threshold=threshold
    )

In [None]:
def eval_model_ensemble(vectorizer, model, model_name, labels, tweets, data_size, df, dataset_name, threshold):
    X_vectorized = vectorizer.transform(tweets)
    predictions = (model.predict_proba(X_vectorized)[:, 1] >= threshold)
    # predictions = model.predict(X_vectorized)

    add_to_eval_df(
        model_name=model_name,
        y_prediction=predictions,
        y_true=labels,
        sample_size=data_size,
        df=df,
        dataset_name=dataset_name,
        threshold=threshold
    )

In [None]:
def eval_model_bayes(vectorizer, model, model_name, labels, tweets, data_size, df, dataset_name, threshold):
    X_vectorized = vectorize_w2v(tweets, vectorizer)
    predictions = (model.predict_proba(X_vectorized)[:, 1] >= threshold)
    # predictions = model.predict(X_vectorized)

    add_to_eval_df(
        model_name=model_name,
        y_prediction=predictions,
        y_true=labels,
        sample_size=data_size,
        df=df,
        dataset_name=dataset_name,
        threshold=threshold
    )

In [None]:
def eval_model_svm(vectorizer, model, model_name, labels, tweets, data_size, df, dataset_name, threshold):
    X_vectorized = vectorizer.transform(tweets)
    predictions = model.predict(X_vectorized)

    add_to_eval_df(
        model_name=model_name,
        y_prediction=predictions,
        y_true=labels,
        sample_size=data_size,
        df=df,
        dataset_name=dataset_name,
        threshold=threshold
    )

In [None]:
def eval_model_gru(tokenizer, model, model_name, labels, tweets, data_size, df, dataset_name, threshold, max_len):
    X_data = [str(x) for x in tweets]
    tweets_seq = tokenizer.texts_to_sequences(X_data)
    tweets_padded = pad_sequences(tweets_seq, padding='post', maxlen=max_len)
    predictions = (model.predict(tweets_padded) > threshold).astype(int)

    add_to_eval_df(
        model_name=model_name,
        y_prediction=predictions,
        y_true=labels,
        sample_size=data_size,
        df=df,
        dataset_name=dataset_name,
        threshold=threshold
    )

In [None]:
def eval_model_lstm(tokenizer, vectorizer, model, labels, tweets, threshold, model_name, data_size, df, dataset_name):
    X_data_vectorized = tokenize_and_vectorize_for_lstm(
        glove_embeddings=vectorizer,
        tweets=tweets,
        tokenizer=tokenizer
    )

    predictions = (model.predict(X_data_vectorized) > threshold).astype(int)

    add_to_eval_df(
        model_name=model_name,
        y_prediction=predictions,
        y_true=labels,
        sample_size=data_size,
        df=df,
        dataset_name=dataset_name,
        threshold=threshold
    )

##### Wahrscheinlichkeiten

In [None]:
def get_predictions_with_probabilities_bert(test_dataloader, model, threshold):
    predictions = []
    predictions_proba = []
    print("Prediction start.")
    total_t0 = time.time()
    batches_completed = 0

    for batch in test_dataloader:
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        with torch.no_grad():
            output = model(b_input_ids,
                           token_type_ids=None,
                           attention_mask=b_input_mask)
            logits = output.logits
            logits = logits.detach().cpu().numpy()
            pred_flat = np.argmax(logits, axis=1).flatten()

            pred_with_thresh = [1 if value[1] > threshold else 0 for value in output.logits.softmax(dim=-1).tolist()]
            pred_with_thresh = np.array(pred_with_thresh)

            predictions.extend(list(pred_with_thresh))
            predictions_proba.extend(output.logits.softmax(dim=-1).tolist())

        batches_completed += 1

    print("Prediction finished.")
    print("Total Prediction took {:} (h:mm:ss)".format(format_time(time.time() - total_t0)))
    return predictions, predictions_proba

In [None]:
def add_to_detail_eval_df(dataframe, predictions, probabilities=None):
    if probabilities is not None:
        dataframe["zero_proba"] = pd.Series((v[0] for v in probabilities), index=dataframe.index)
        dataframe["one_proba"] = pd.Series((v[1] for v in probabilities), index=dataframe.index)
    else:
        dataframe["zero_proba"] = 0
        dataframe["one_proba"] = 0
    dataframe["pred"] = predictions

    return dataframe

In [None]:
def add_pred_and_probas_bert(model, tokenizer, data, label_name, tweet_name, batch_size, threshold):
    x_data = data[tweet_name].values
    y_data = data[label_name].values
    max_len = get_max_len(x_data, tokenizer)
    test_dataset = create_dataset(tokenizer=tokenizer, max_len=max_len, labels_data=y_data,
                                  tweets_data=x_data)
    test_dataloader = get_dataloader(batch_size=batch_size, sampler="sequential", dataset=test_dataset)

    predictions, predictions_proba = get_predictions_with_probabilities_bert(test_dataloader=test_dataloader,
                                                                             model=model, threshold=threshold)

    add_to_detail_eval_df(dataframe=data,
                          predictions=predictions,
                          probabilities=predictions_proba)

In [None]:
def add_pred_and_probas_ensemble(model, vectorizer, data, tweet_name, threshold):
    x_data = data[tweet_name].values

    X_vectorized = vectorizer.transform(x_data)
    probabilities = model.predict_proba(X_vectorized)
    predictions = (probabilities[:, 1] >= threshold).astype(int)

    add_to_detail_eval_df(dataframe=data,
                          predictions=predictions,
                          probabilities=probabilities)

    return data

In [None]:
def add_pred_and_probas_nb(model, vectorizer, data, tweet_name, threshold):
    x_data = data[tweet_name]

    X_vectorized = vectorize_w2v(x_data, vectorizer)
    probabilities = model.predict_proba(X_vectorized)
    predictions = (probabilities[:, 1] >= threshold).astype(int)

    add_to_detail_eval_df(dataframe=data,
                          predictions=predictions,
                          probabilities=probabilities)

    return data

In [None]:
def add_pred_and_probas_svm(model, vectorizer, data, tweet_name, threshold):
    x_data = data[tweet_name].values

    X_vectorized = vectorizer.transform(x_data)
    predictions = model.predict(X_vectorized)

    add_to_detail_eval_df(dataframe=data,
                          predictions=predictions)

    return data

In [None]:
def add_pred_and_probas_gru(model, tokenizer, data, tweet_name, threshold, max_len):
    x_data = data[tweet_name].values

    X_data = [str(x) for x in x_data]
    tweets_seq = tokenizer.texts_to_sequences(X_data)
    tweets_padded = pad_sequences(tweets_seq, padding='post', maxlen=max_len)

    probabilities = model.predict(tweets_padded)
    predictions = (probabilities > threshold).astype(int)

    new_array = []
    for proba in probabilities:
        complement = 1 - proba[0]
        new_array.append([complement, proba[0]])
    probabilities = pd.Series(new_array)

    add_to_detail_eval_df(dataframe=data,
                          predictions=predictions,
                          probabilities=probabilities)

    return data

In [None]:
def add_pred_and_probas_lstm(model, tokenizer, vectorizer, data, tweet_name, threshold):
    x_data = data[tweet_name].values

    X_data_vectorized = tokenize_and_vectorize_for_lstm(
        glove_embeddings=vectorizer,
        tweets=x_data,
        tokenizer=tokenizer
    )

    probabilities = model.predict(X_data_vectorized)
    predictions = (probabilities > threshold).astype(int)

    new_array = []
    for proba in probabilities:
        complement = 1 - proba[0]
        new_array.append([complement, proba[0]])
    probabilities = pd.Series(new_array)

    add_to_detail_eval_df(dataframe=data,
                          predictions=predictions,
                          probabilities=probabilities)

    return data

### 1. Modelle

In [None]:
MODELS_PATH = "../../webapp/backend/app/models"

#### 1.1 Bayes

In [None]:
nb_vectorizer = joblib.load(MODELS_PATH + "/nb/vectorizer_nb_tfidf.joblib")
nb_model = joblib.load(MODELS_PATH + "/nb/model_nb_tfidf_comp.joblib")
nb_modelname = "ComplementNB"

#### 1.2 Ensemble

In [None]:
ensemble_vectorizer = joblib.load(MODELS_PATH + "/ensemble/tfidf_vectorizer_for_brf.joblib")
ensemble_model = joblib.load(MODELS_PATH + "/ensemble/tfidf_balancedrandomforest.joblib")
ensemble_modelname = "BalancedRandomForest"

#### 1.3 SVM

In [None]:
svm_vectorizer = joblib.load(MODELS_PATH + "/svc/tfidf_vectorizer_svm.joblib")
svm_model = joblib.load(MODELS_PATH + "/svc/model_svc.joblib")
svm_modelname = "LinearSVC"

#### 1.4 RNN-GRU

In [None]:
max_len_gru = 40
gru_model = load_model(MODELS_PATH + "/gru/gru-model_mixed-dataset.keras")
with open(MODELS_PATH + "/gru/tokenizer_mixed-dataset.pkl", 'rb') as f:
    gru_tokenizer = pickle.load(f)
gru_modelname = "RNN-GRU"

#### 1.5 RNN-LSTM

In [None]:
lstm_model = load_model("models/lstm/model_lstm_17.keras")
with open("models/lstm/tokenizer_lstm.json", "r", encoding="utf-8") as f:
    tokenizer_data = f.read()
    lstm_tokenizer = tokenizer_from_json(tokenizer_data)
lstm_glove_embeddings = {}
with open("models/lstm/glove.6B.200d.txt", encoding='utf-8') as f:
    for line in f:
        values = line.split()
        word = values[0]
        vector = np.asarray(values[1:], dtype='float32')
        lstm_glove_embeddings[word] = vector
lstm_modelname = "RNN-LSTM"

#### 1.6 BERT

In [None]:
PATH_BERT_TUNED = MODELS_PATH + "/bert/bert_mixed_imran"
bert_tokenizer = BertTokenizer.from_pretrained(PATH_BERT_TUNED, local_files_only=True)
bert_model = AutoModelForSequenceClassification.from_pretrained(PATH_BERT_TUNED, local_files_only=True)
bert_modelname = "bert_tuned_uncased"

#### 1.7 RoBERTa

In [None]:
PATH_ROBERTA_TUNED = MODELS_PATH + "/roberta/roberta_hate_mixed_cleaned"
roberta_tokenizer = AutoTokenizer.from_pretrained(PATH_ROBERTA_TUNED, local_files_only=True)
roberta_model = AutoModelForSequenceClassification.from_pretrained(PATH_ROBERTA_TUNED, local_files_only=True)
roberta_modelname = "roberta_hate_mixed_cleaned"

### 2. Daten

#### 2.1 Mixed-Train (cleaned)

In [None]:
df_mixed_train_cleaned = pd.read_csv("../../data/mixed_dataset/train_cleaned.csv", index_col=0)
df_mixed_train_cleaned = df_mixed_train_cleaned[df_mixed_train_cleaned.tweet_cleaned.notna()]
df_mixed_train_cleaned = df_mixed_train_cleaned.sample(n=100)

df_mixed_train_cleaned = df_mixed_train_cleaned[["label", "tweet_cleaned"]]
df_mixed_train_cleaned.head(1)

#### 2.2 Mixed-Train (uncleaned)

In [None]:
df_mixed_train_uncleaned = pd.read_csv("../../data/mixed_dataset/train_cleaned.csv", index_col=0)
df_mixed_train_uncleaned = df_mixed_train_uncleaned[df_mixed_train_uncleaned.tweet.notna()]
df_mixed_train_uncleaned = df_mixed_train_uncleaned.sample(n=100)

df_mixed_train_uncleaned = df_mixed_train_uncleaned[["label", "tweet"]]
print(df_mixed_train_uncleaned.head(1))
print("Daten: \t", len(df_mixed_train_uncleaned))

#### 2.3 Mixed-Test (cleaned)

In [None]:
df_mixed_test_cleaned = pd.read_csv("../../data/mixed_dataset/test_cleaned.csv", index_col=0)
df_mixed_test_cleaned = df_mixed_test_cleaned[df_mixed_test_cleaned.tweet_cleaned.notna()]
df_mixed_test_cleaned = df_mixed_test_cleaned.sample(n=100)

df_mixed_test_cleaned = df_mixed_test_cleaned[["label", "tweet_cleaned"]]
print(df_mixed_test_cleaned.head(1))
print("Daten: \t", len(df_mixed_test_cleaned))

#### 2.4 Mixed-Test (cleaned-RNN)

In [None]:
df_mixed_test_cleaned_rnn = pd.read_csv("../../data/mixed_dataset/test_cleaned_rnn.csv", index_col=0)
df_mixed_test_cleaned_rnn = df_mixed_test_cleaned_rnn[df_mixed_test_cleaned_rnn.tweet_cleaned.notna()]
df_mixed_test_cleaned_rnn = df_mixed_test_cleaned_rnn.sample(n=100)

df_mixed_test_cleaned_rnn = df_mixed_test_cleaned_rnn[["label", "tweet_cleaned"]]
print(df_mixed_test_cleaned_rnn.head(1))
print("Daten: \t", len(df_mixed_test_cleaned_rnn))

#### 2.5 Mixed-Test (uncleaned)

In [None]:
df_mixed_test_uncleaned = pd.read_csv("../../data/mixed_dataset/test_cleaned.csv", index_col=0)
df_mixed_test_uncleaned = df_mixed_test_uncleaned[df_mixed_test_uncleaned.tweet.notna()]
df_mixed_test_uncleaned = df_mixed_test_uncleaned.sample(n=100)

df_mixed_test_uncleaned = df_mixed_test_uncleaned[["label", "tweet", ]]
print(df_mixed_test_uncleaned.head(1))
print("Daten: \t", len(df_mixed_test_uncleaned))

#### 2.6 Mixed-Test (manual-labeled, uncleaned)

In [None]:
df_mixed_manual_uncleaned = pd.read_csv("../../data/manual_labeled/manual_labeled_group.csv", index_col=0)
df_mixed_manual_uncleaned = df_mixed_manual_uncleaned[df_mixed_manual_uncleaned.tweet.notna()]

df_mixed_manual_uncleaned = df_mixed_manual_uncleaned[["label_manual", "tweet", ]]
print(df_mixed_manual_uncleaned.head(1))
print("Daten: \t", len(df_mixed_manual_uncleaned))

### 3. Allgemeine Evaluation - Metriken

In [None]:
def evaluate(label_col, tweet_col, data, dataset_name, threshold):
    evaluation_df = pd.DataFrame(
        columns=["model", "dataset_name", "sample_size", "threshold", "accuracy", "precision", "recall", "f1_score",
                 "mcc"])
    eval_model_ensemble(vectorizer=ensemble_vectorizer,
                        model=ensemble_model,
                        model_name=ensemble_modelname,
                        labels=data[label_col],
                        tweets=data[tweet_col],
                        data_size=len(data),
                        df=evaluation_df,
                        dataset_name=dataset_name,
                        threshold=threshold)

    eval_model_bayes(vectorizer=nb_vectorizer,
                     model=nb_model,
                     model_name=nb_modelname,
                     labels=data[label_col],
                     tweets=data[tweet_col],
                     data_size=len(data),
                     df=evaluation_df,
                     dataset_name=dataset_name,
                     threshold=threshold)

    eval_model_svm(vectorizer=svm_vectorizer,
                   model=svm_model,
                   model_name=svm_modelname,
                   labels=data[label_col],
                   tweets=data[tweet_col],
                   data_size=len(data),
                   df=evaluation_df,
                   dataset_name=dataset_name,
                   threshold="None")

    eval_model_lstm(tokenizer=lstm_tokenizer,
                    vectorizer=lstm_glove_embeddings,
                    model=lstm_model,
                    model_name=lstm_modelname,
                    labels=data[label_col],
                    tweets=data[tweet_col],
                    data_size=len(data),
                    df=evaluation_df,
                    dataset_name=dataset_name,
                    threshold=threshold)

    eval_model_gru(tokenizer=gru_tokenizer,
                   model=gru_model,
                   model_name=gru_modelname,
                   labels=data[label_col],
                   tweets=data[tweet_col],
                   data_size=len(data),
                   df=evaluation_df,
                   dataset_name=dataset_name,
                   threshold=threshold,
                   max_len=40)

    eval_model_bert(tokenizer=bert_tokenizer,
                    model=bert_model,
                    model_name=bert_modelname,
                    labels=data[label_col].values,
                    tweets=data[tweet_col].values,
                    batch_size=16,
                    data_size=len(data),
                    df=evaluation_df,
                    dataset_name=dataset_name,
                    threshold=threshold)

    eval_model_bert(tokenizer=roberta_tokenizer,
                    model=roberta_model,
                    model_name=roberta_modelname,
                    labels=data[label_col].values,
                    tweets=data[tweet_col].values,
                    batch_size=16,
                    data_size=len(data),
                    df=evaluation_df,
                    dataset_name=dataset_name,
                    threshold=threshold)

    return evaluation_df

#### 3.1 Alle Datensätze - Schwellwert 0.5

##### 3.1.1 Mixed-Train (cleaned) - df_mixed_train_cleaned

In [None]:
evaluation_mixed_train_cleaned = evaluate(label_col="label",
                                          tweet_col="tweet_cleaned",
                                          data=df_mixed_train_cleaned,
                                          dataset_name="df_mixed_train_cleaned",
                                          threshold=0.5,
                                          )

In [None]:
evaluation_mixed_train_cleaned.sort_values(by=["f1_score"], ascending=False)

##### 3.1.2 Mixed-Train (uncleaned) - df_mixed_train_uncleaned

In [None]:
evaluation_mixed_train_uncleaned = evaluate(label_col="label",
                                            tweet_col="tweet",
                                            data=df_mixed_train_uncleaned,
                                            dataset_name="df_mixed_train_uncleaned",
                                            threshold=0.5)

In [None]:
evaluation_mixed_train_uncleaned.sort_values(by=["f1_score"], ascending=False)

##### 3.1.3 Mixed-Test (cleaned) - df_mixed_test_cleaned

In [None]:
evaluation_mixed_test_cleaned = evaluate(label_col="label",
                                         tweet_col="tweet_cleaned",
                                         data=df_mixed_test_cleaned,
                                         dataset_name="df_mixed_test_cleaned",
                                         threshold=0.5)

In [None]:
evaluation_mixed_test_cleaned.sort_values(by=["f1_score"], ascending=False)

##### 3.1.4 Mixed-Test (cleaned-RNN) - df_mixed_test_cleaned_rnn

In [None]:
evaluation_mixed_test_cleaned_rnn = evaluate(label_col="label",
                                             tweet_col="tweet_cleaned",
                                             data=df_mixed_test_cleaned_rnn,
                                             dataset_name="df_mixed_test_cleaned_rnn",
                                             threshold=0.5)

In [None]:
evaluation_mixed_test_cleaned_rnn.sort_values(by=["f1_score"], ascending=False)

##### 3.1.5 Mixed-Test (uncleaned) - df_mixed_test_uncleaned

In [None]:
evaluation_mixed_test_uncleaned = evaluate(label_col="label",
                                           tweet_col="tweet",
                                           data=df_mixed_test_uncleaned,
                                           dataset_name="df_mixed_test_uncleaned",
                                           threshold=0.5)

In [None]:
evaluation_mixed_test_uncleaned.sort_values(by=["f1_score"], ascending=False)

##### 3.1.6 Mixed-Test (manual-labeled, uncleaned) - df_mixed_manual_uncleaned

In [None]:
evaluation_mixed_manual_uncleaned = evaluate(label_col="label_manual",
                                             tweet_col="tweet",
                                             data=df_mixed_manual_uncleaned,
                                             dataset_name="df_mixed_manual_uncleaned",
                                             threshold=0.5)

In [None]:
evaluation_mixed_manual_uncleaned.sort_values(by=["f1_score"], ascending=False)

#### 3.2 Alle Datensätze - Schwellwert 0.35

##### 3.2.1 Mixed-Train (cleaned) - df_mixed_train_cleaned

In [None]:
evaluation_mixed_train_cleaned2 = evaluate(label_col="label",
                                           tweet_col="tweet_cleaned",
                                           data=df_mixed_train_cleaned,
                                           dataset_name="df_mixed_train_cleaned",
                                           threshold=0.35,
                                           )

In [None]:
evaluation_mixed_train_cleaned2.sort_values(by=["f1_score"], ascending=False)

##### 3.2.2 Mixed-Train (uncleaned) - df_mixed_train_uncleaned

In [None]:
evaluation_mixed_train_uncleaned2 = evaluate(label_col="label",
                                             tweet_col="tweet",
                                             data=df_mixed_train_uncleaned,
                                             dataset_name="df_mixed_train_uncleaned",
                                             threshold=0.35)

In [None]:
evaluation_mixed_train_uncleaned2.sort_values(by=["f1_score"], ascending=False)

##### 3.2.3 Mixed-Test (cleaned) - df_mixed_test_cleaned

In [None]:
evaluation_mixed_test_cleaned2 = evaluate(label_col="label",
                                          tweet_col="tweet_cleaned",
                                          data=df_mixed_test_cleaned,
                                          dataset_name="df_mixed_test_cleaned",
                                          threshold=0.35)

In [None]:
evaluation_mixed_test_cleaned2.sort_values(by=["f1_score"], ascending=False)

##### 3.2.4 Mixed-Test (cleaned-RNN) - df_mixed_test_cleaned_rnn

In [None]:
evaluation_mixed_test_cleaned_rnn2 = evaluate(label_col="label",
                                              tweet_col="tweet_cleaned",
                                              data=df_mixed_test_cleaned_rnn,
                                              dataset_name="df_mixed_test_cleaned_rnn",
                                              threshold=0.35)

In [None]:
evaluation_mixed_test_cleaned_rnn2.sort_values(by=["f1_score"], ascending=False)

##### 3.2.5 Mixed-Test (uncleaned) - df_mixed_test_uncleaned

In [None]:
evaluation_mixed_test_uncleaned2 = evaluate(label_col="label",
                                            tweet_col="tweet",
                                            data=df_mixed_test_uncleaned,
                                            dataset_name="df_mixed_test_uncleaned",
                                            threshold=0.35)

In [None]:
evaluation_mixed_test_uncleaned2.sort_values(by=["f1_score"], ascending=False)

##### 3.2.6 Mixed-Test (manual-labeled, uncleaned) - df_mixed_manual_uncleaned

In [None]:
evaluation_mixed_manual_uncleaned2 = evaluate(label_col="label_manual",
                                              tweet_col="tweet",
                                              data=df_mixed_manual_uncleaned,
                                              dataset_name="df_mixed_manual_uncleaned",
                                              threshold=0.35)

In [None]:
evaluation_mixed_manual_uncleaned2.sort_values(by=["f1_score"], ascending=False)

### 4. Detaillierte Evaluation - Tweetbetrachtung

Fragen:
- gibt es Tweets, die von keinem Modell korrekt klassifiziert werden können? bzw. nur von wenigen? was haben diese Tweets gemeinsam?
- gibt es Tweets, die von allen Modellen korrekt klassifiziert werden können? bzw. von vielen? was haben diese Tweets gemeinsam?
-


#### 4.1 manuell gelabelte Tweets - Schwellwert 0.5

In [None]:
merged_false = df_mixed_manual_uncleaned.copy()

In [None]:
add_pred_and_probas_ensemble(model=ensemble_model,
                             vectorizer=ensemble_vectorizer,
                             data=merged_false,
                             tweet_name="tweet",
                             threshold=0.5)
merged_false.rename(columns={'zero_proba': '0_ens',
                             'one_proba': '1_ens',
                             'pred': 'pred_ens'}, inplace=True)

In [None]:
# Aufruf wie Ensemble => richtig!
add_pred_and_probas_ensemble(model=nb_model,
                             vectorizer=nb_vectorizer,
                             data=merged_false,
                             tweet_name="tweet",
                             threshold=0.5)
merged_false.rename(columns={'zero_proba': '0_nb',
                             'one_proba': '1_nb',
                             'pred': 'pred_nb'}, inplace=True)

In [None]:
add_pred_and_probas_svm(model=svm_model,
                        vectorizer=svm_vectorizer,
                        data=merged_false,
                        tweet_name="tweet",
                        threshold=0.5)
merged_false.rename(columns={'zero_proba': '0_svm',
                             'one_proba': '1_svm',
                             'pred': 'pred_svm'}, inplace=True)

In [None]:
add_pred_and_probas_lstm(model=lstm_model,
                         tokenizer=lstm_tokenizer,
                         vectorizer=lstm_glove_embeddings,
                         data=merged_false,
                         tweet_name="tweet",
                         threshold=0.5)
merged_false.rename(columns={'zero_proba': '0_lstm',
                             'one_proba': '1_lstm',
                             'pred': 'pred_lstm'}, inplace=True)

In [None]:
add_pred_and_probas_gru(model=gru_model,
                        tokenizer=gru_tokenizer,
                        data=merged_false,
                        tweet_name="tweet",
                        threshold=0.5,
                        max_len=40)
merged_false.rename(columns={'zero_proba': '0_gru',
                             'one_proba': '1_gru',
                             'pred': 'pred_gru'}, inplace=True)

In [None]:
add_pred_and_probas_bert(model=bert_model,
                         tokenizer=bert_tokenizer,
                         data=merged_false,
                         label_name="label_manual",
                         tweet_name="tweet",
                         batch_size=16,
                         threshold=0.5)
merged_false.rename(columns={'zero_proba': '0_bert',
                             'one_proba': '1_bert',
                             'pred': 'pred_bert'}, inplace=True)

In [None]:
add_pred_and_probas_bert(model=roberta_model,
                         tokenizer=roberta_tokenizer,
                         data=merged_false,
                         label_name="label_manual",
                         tweet_name="tweet",
                         batch_size=16,
                         threshold=0.5)
merged_false.rename(columns={'zero_proba': '0_rob',
                             'one_proba': '1_rob',
                             'pred': 'pred_rob'}, inplace=True)

In [None]:
merged_false

In [None]:
all_false_merged = merged_false[
    (merged_false.pred_ens != merged_false.label_manual) &
    (merged_false.pred_nb != merged_false.label_manual) &
    (merged_false.pred_svm != merged_false.label_manual) &
    (merged_false.pred_gru != merged_false.label_manual) &
    (merged_false.pred_bert != merged_false.label_manual) &
    (merged_false.pred_rob != merged_false.label_manual)]

len(all_false_merged)

#### 4.2 manuell gelabelte Tweets - Schwellwert 0.35

In [None]:
merged_false2 = df_mixed_manual_uncleaned.copy()

In [None]:
add_pred_and_probas_ensemble(model=ensemble_model,
                             vectorizer=ensemble_vectorizer,
                             data=merged_false2,
                             tweet_name="tweet",
                             threshold=0.35)
merged_false2.rename(columns={'zero_proba': '0_ens',
                              'one_proba': '1_ens',
                              'pred': 'pred_ens'}, inplace=True)

In [None]:
# Aufruf wie Ensemble => richtig!
add_pred_and_probas_nb(model=nb_model,
                       vectorizer=nb_vectorizer,
                       data=merged_false2,
                       tweet_name="tweet",
                       threshold=0.35)
merged_false2.rename(columns={'zero_proba': '0_nb',
                              'one_proba': '1_nb',
                              'pred': 'pred_nb'}, inplace=True)

In [None]:
add_pred_and_probas_svm(model=svm_model,
                        vectorizer=svm_vectorizer,
                        data=merged_false2,
                        tweet_name="tweet",
                        threshold=0.35)
merged_false2.rename(columns={'zero_proba': '0_svm',
                              'one_proba': '1_svm',
                              'pred': 'pred_svm'}, inplace=True)

In [None]:
add_pred_and_probas_lstm(model=lstm_model,
                         tokenizer=lstm_tokenizer,
                         vectorizer=lstm_glove_embeddings,
                         data=merged_false2,
                         tweet_name="tweet",
                         threshold=0.35)
merged_false2.rename(columns={'zero_proba': '0_lstm',
                             'one_proba': '1_lstm',
                             'pred': 'pred_lstm'}, inplace=True)

In [None]:
add_pred_and_probas_gru(model=gru_model,
                        tokenizer=gru_tokenizer,
                        data=merged_false2,
                        tweet_name="tweet",
                        threshold=0.35,
                        max_len=40)
merged_false2.rename(columns={'zero_proba': '0_gru',
                              'one_proba': '1_gru',
                              'pred': 'pred_gru'}, inplace=True)

In [None]:
add_pred_and_probas_bert(model=bert_model,
                         tokenizer=bert_tokenizer,
                         data=merged_false2,
                         label_name="label_manual",
                         tweet_name="tweet",
                         batch_size=16,
                         threshold=0.35)
merged_false2.rename(columns={'zero_proba': '0_bert',
                              'one_proba': '1_bert',
                              'pred': 'pred_bert'}, inplace=True)

In [None]:
add_pred_and_probas_bert(model=roberta_model,
                         tokenizer=roberta_tokenizer,
                         data=merged_false2,
                         label_name="label_manual",
                         tweet_name="tweet",
                         batch_size=16,
                         threshold=0.35)
merged_false2.rename(columns={'zero_proba': '0_rob',
                              'one_proba': '1_rob',
                              'pred': 'pred_rob'}, inplace=True)

In [None]:
merged_false2

In [None]:
all_false_merged2 = merged_false2[
    (merged_false2.pred_ens != merged_false2.label_manual) &
    (merged_false2.pred_nb != merged_false2.label_manual) &
    (merged_false2.pred_svm != merged_false2.label_manual) &
    (merged_false2.pred_gru != merged_false2.label_manual) &
    (merged_false2.pred_bert != merged_false2.label_manual) &
    (merged_false2.pred_rob != merged_false2.label_manual)]

len(all_false_merged2)