## Imports

In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# # Run if you run on colab
# %pip install transformers
# %pip install spacy
# !python -m spacy download en_core_web_sm
# !python -m spacy download en_core_web_lg

In [None]:
from transformers import AutoTokenizer, AutoModel, BertModel
from torch import nn
import torch
from torch.optim import Adam
from tqdm import tqdm
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.metrics import precision_score, recall_score, f1_score
import torch.nn.functional as F

from sklearn.preprocessing import QuantileTransformer

In [None]:
# Algorithms
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB, BernoulliNB, MultinomialNB
from sklearn.compose import ColumnTransformer

from sklearn.svm import LinearSVC, SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.pipeline import Pipeline

from xgboost import XGBClassifier

from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    accuracy_score,
    f1_score,
)
from sklearn.metrics import balanced_accuracy_score

import string

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

In [None]:
import spacy, re

nlp = spacy.load("en_core_web_sm")

def remove_mentions_and_hashtags(text):
    text = re.sub(r"@\w+", "", text)
    text = re.sub(r"#\w+", "", text)
    return text


def remove_numbers(text):
    text = re.sub(r"\d+", "", text)
    return text


def remove_punctuation(text):

    translator = str.maketrans("", "", string.punctuation)

    # Remove punctuation using the translation table
    text_without_punct = text.translate(translator)

    return text_without_punct


def remove_stopwords(text, to_lemmatize:bool = False):
    filtered_sentence = []
    doc = nlp(text)
    for token in doc:
        if token.is_stop == False:
            if to_lemmatize:
                filtered_sentence.append(token.lemma_)
            else:
                filtered_sentence.append(token.text)

    return " ".join(filtered_sentence)


def clean_text(text,to_lemmatize:bool = True):

    # Standardize text
    # text = standardize_accented_chars(text)

    # Remove URLs
    # text = re.sub(r"http\S+", "", text)

    # Remove mentions and hashtags
    # text = remove_mentions_and_hashtags(text)

    # Lowercase
    text = text.lower()

    # Remove punctuation
    text = remove_punctuation(text)

    # Remove numbers
    # text = remove_numbers(text)

    # Remove all the special characters
    text = re.sub(r"\W", " ", text)

    # Remove stopwords
    text = remove_stopwords(text,to_lemmatize)

    # Substituting multiple spaces with single space
    text = re.sub(r"\s+", " ", text, flags=re.I)

    # if to_lemmatize:
    #     text = lemmatize(text)

    return text


def lemmatize(text):
    doc = nlp(text)
    text = " ".join([token.lemma_ for token in doc])
    return text

## Utils functions

In [None]:
def print_metrics(y_pred, y_test, title: str = "Confusion Matrix"):
    from sklearn.metrics import matthews_corrcoef

    print(f"Reports for {title}")
    print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred)}")
    print(f"Precision: {precision_score(y_test, y_pred,average='weighted')}")
    print(f"Recall: {recall_score(y_test, y_pred,average='weighted')}")
    print(f"F1: {f1_score(y_test, y_pred,average='weighted')}")
    print(f"Metthew corr: {matthews_corrcoef(y_test, y_pred)}")
    print(classification_report(y_test, y_pred))

    # cm = confusion_matrix(y_test, y_pred)
    # sns.heatmap(cm, annot=True, fmt="g")
    # plt.title(title)
    # plt.xlabel("Predicted")
    # plt.ylabel("True")
    # plt.show()

In [None]:
def split_dataset(data):
    """Function to split dataset into train, val and test"""
    # np.random.seed(112)
    # df_train, df_val, df_test = np.split(
    #     data.sample(frac=1, random_state=42),
    #     [int(0.8 * len(data)), int(0.9 * len(data))],
    # )

    # # Print dimensions
    # print(len(df_train), len(df_val), len(df_test))

    # Split data into train (80%) and temp_test (20%)
    df_train, temp_test = train_test_split(
        data, test_size=0.05, random_state=42, stratify=data["target"]
    )

    # Split temp_test into validation (50% of temp_test, 10% of total) and test (50% of temp_test, 10% of total)
    df_val, df_test = train_test_split(
        temp_test, test_size=0.4, random_state=42, stratify=temp_test["target"]
    )

    # Print dimensions to verify
    print(
        f"Train size: {len(df_train)}, Validation size: {len(df_val)}, Test size: {len(df_test)}"
    )

    return df_train, df_val, df_test

### Data Labeling

In [None]:
df_to_label = pd.read_csv("all_data.csv")
df_to_label.drop(columns="Unnamed: 0", inplace=True)
df_to_label

In [None]:
df_to_label["label_gpt1"] = df_to_label["label_gpt1"].str.replace("'", "")
df_to_label["label_gpt2"] = df_to_label["label_gpt2"].str.replace("'", "")

In [None]:
df_filtered_same_label = df_to_label[
    (df_to_label["label_gpt1"] == df_to_label["label_gpt2"])
    & (df_to_label["label_gpt2"] == df_to_label["label_alex"])
]
print(df_filtered_same_label)

In [None]:
# Conditions for at least two columns being equal
condition1 = df_to_label["label_gpt1"] == df_to_label["label_gpt2"]
condition2 = df_to_label["label_gpt1"] == df_to_label["label_alex"]
condition3 = df_to_label["label_gpt2"] == df_to_label["label_alex"]

In [None]:
df_filter_at_least_two = df_to_label[condition1 | condition2 | condition3]

In [None]:
df_filter_at_least_two

In [None]:
df_filter_at_least_two["label_naics"] = df_filter_at_least_two.apply(
    lambda row: (
        row["label_gpt1"]
        if row["label_gpt1"] == row["label_gpt2"]
        or row["label_gpt1"] == row["label_alex"]
        else row["label_gpt2"]
    ),
    axis=1,
)

In [None]:
df_filter_at_least_two=df_filter_at_least_two[["label", "description", "label_naics"]]

In [None]:
df_filter_at_least_two.to_csv("same_label_business_2.csv")

In [None]:
df_filtered_same_label = df_filtered_same_label[["label", "description", "label_gpt1"]]

In [None]:
df_filter_at_least_two

In [None]:
df_filter_at_least_two

In [None]:
label_dict = pd.Series(
    df_filter_at_least_two["label_naics"].values, index=df_filter_at_least_two["label"]
).to_dict()

In [None]:
df = pd.read_csv("cleaned_dataset.csv")

In [None]:
df["naics_label"] = 

In [None]:
import json
map_company_label = pd.Series(
    df["naics_label"].values, index=df["main_business_category_clean"]
).to_dict()

with open("map_company_naics.json", "w") as f:
    json.dump(map_company_label, f)


map_company_label_comercial = pd.Series(
    df["main_business_category_clean"].values, index=df["commercial_name_clean"]
).to_dict()

with open("map_company_comercial.json", "w") as f:
    json.dump(map_company_label_comercial, f)

In [None]:
naics_keywrod = pd.read_csv(
    "/Users/marianluca/Projects/HackingBigNumbers/Tournament/marian/naics_summary_keywords.csv"
)

In [None]:
naics_keywrod["keywords"] = naics_keywrod["keywords"].str.lower()

In [None]:
naics_keywrod

In [None]:
map_naics_keyword = pd.Series(
    naics_keywrod["keywords"].values, index=naics_keywrod["naics_label"]
).to_dict()

with open("map_naics_keywords.json", "w") as f:
    json.dump(map_company_label_comercial, f)

In [None]:
map_company_description = pd.Series(
    df["commercial_name_clean"].values, index=df["naics_label"].values
).to_dict()

In [None]:
df

In [None]:
df

In [None]:
df

## Data test


In [None]:
test_data = pd.read_csv("/Users/marianluca/Projects/HackingBigNumbers/Tournament/data.csv")

In [None]:
test_data.drop("Unnamed: 0",inplace=True,axis=1)


In [None]:
test_data

In [None]:
label_dict = pd.Series(
    df_filter_at_least_two["label_naics"].values, index=df_filter_at_least_two["label"]
).to_dict()

In [None]:
map_business_to_naics = pd.Series(
    test_data["answer"].values, index=test_data["round_5"]
).to_dict()

In [None]:
def separate_number_and_text(input_string):
    # Split the input string into parts based on space
    parts = input_string.split(" ", 1)  # Only split at the first space
    number = parts[0]  # The first part will be the number
    text = parts[1]  # The second part will be the rest of the text
    return number, text


# Example usage:
input_string = "621 Ambulatory Health Care Services"
number, text = separate_number_and_text(input_string)
print("Number:", number)
print("Text:", text)

In [None]:
import json
def create_dataframe_from_json(directory):
    data = []  # List to hold data from all JSON files

    # Loop through every file in the specified directory
    for filename in os.listdir(directory):
        if filename.endswith(".json"):  # Check for JSON files
            file_path = os.path.join(directory, filename)  # Full path to file

            # Open and load the JSON file
            with open(file_path, "r") as file:
                content = json.load(file)
                data.append(content)  # Append the data to the list

    # Convert list of dictionaries into a DataFrame
    df = pd.DataFrame(data)
    return df

In [None]:
test_data = create_dataframe_from_json(
    "/Users/marianluca/Projects/HackingBigNumbers/Tournament/companies"
)

In [None]:
test_data

In [None]:
# test_data = test_data[["naics_code", "label_naics"]] = test_data["answer"].str.split(
    # " ", 1)

test_data[["naics_code", "naics_label"]] = test_data["answer"].str.split(
    " ", n=1, expand=True
)

In [None]:
test_data.rename(
    columns={
        "round_1": "commercial_name",
        "round_2": "business_tags",
        "round_3": "short_description",
        "round_4": "description",
        "round_5": "main_business_category",
    },
    inplace=True
)

In [None]:
test_data

In [None]:
true_bussines_to_naics = pd.Series(
    test_data["naics_label"].values, index=test_data["main_business_category"]
).to_dict()

In [None]:
len(true_bussines_to_naics)

In [None]:
label_dict

In [None]:
common_keys = set(label_dict.keys()) & set(true_bussines_to_naics.keys())

In [None]:
len(common_keys)

In [None]:
true_bussines_to_naics

In [None]:
acc = 0
for key in common_keys:
    if true_bussines_to_naics[key] == label_dict[key]:
        acc +=1
    else:
        label_dict[key] = true_bussines_to_naics[key]

In [None]:
print(acc)

In [None]:
label_dict.update(true_bussines_to_naics)

In [None]:
len(label_dict)

In [None]:
map_business_to_naics

In [None]:
import json

with open("map_business_to_naics.json", "w") as f:
    json.dump(label_dict, f)

In [None]:
df_filtered_same_label.rename(columns={"label_gpt1":"label_naics"},inplace=True)
df_filtered_same_label.to_csv("same_label_business_to_naics.csv")

#### Dicts

In [None]:
df_naics = pd.read_csv(
    "/Users/marianluca/Projects/HackingBigNumbers/Tournament/marian/naics_summary.csv"
)

In [None]:
map_naics_to_code = pd.Series(
    df_naics["naics_code"].values,
    index=df_naics["naics_label"],
).to_dict()

## Traditional algorithms

### Data Colab

In [None]:
# current_directory = os.getcwd()

# parent_directory = os.path.dirname(current_directory)

# DATA_PATH = os.path.join(parent_directory, "data")
# DATA_TOURNAMENT = os.path.join(DATA_PATH, "tournament_hints_data.parquet")

### Data


In [None]:
current_directory = os.getcwd()

parent_directory = os.path.dirname(current_directory)

DATA_PATH = os.path.join(parent_directory,"data")
DATA_TOURNAMENT = os.path.join(DATA_PATH,"tournament_hints_data.parquet")

DATA_NAICS = os.path.join(DATA_PATH, "tournament_hints_data.parquet")

print("Current Directory:", current_directory)
print("Parent Directory:", parent_directory)

In [None]:
df = pd.read_parquet(DATA_TOURNAMENT)
df

In [None]:
df = df.drop_duplicates(subset=["commercial_name"])
df

In [None]:
df["label_naics"]= df["main_business_category"].apply(lambda x: label_dict.get(x, "Not Found"))

In [None]:
df = df.reset_index(drop=True)

In [None]:
# columns_round = set(df.columns)
# columns_round.remove("main_business_category")
# print(columns_round)

In [None]:
# from sklearn.preprocessing import LabelEncoder

# # Creating an instance of LabelEncoder
# encoder = LabelEncoder()

# # Fitting the encoder
# encoder.fit(df["main_business_category"].unique())

# # Transforming the data
# df["target"] = encoder.transform(df["main_business_category"])
# # print("Encoded data:", encoded_data)

# # Inverse transforming the data
# # decoded_data = encoder.inverse_transform(encoded_data)
# # print("Decoded data:", decoded_data)

In [None]:
def remove_rare_classes(df, target_column, more_than:int = 1):
    """Remove rows where the target class has only one occurrence.

    Args:
        df (DataFrame): The dataset to be filtered.
        target_column (str): The column name of the target variable.

    Returns:
        DataFrame: Filtered dataset without rare classes.
    """
    # Calculate the count of each class in the target column
    value_counts = df[target_column].value_counts()

    # Identify classes where the count is more than 1
    classes_to_keep = value_counts[value_counts > more_than].index

    # Filter the DataFrame to keep only rows with classes that have more than one occurrence
    filtered_df = df[df[target_column].isin(classes_to_keep)]

    print(
        f"From a total of {len(df[target_column].unique())} it remains {len(filtered_df[target_column].unique())} classes. So {len(df[target_column].unique()) - len(filtered_df[target_column].unique())} was deleted"
    )

    return filtered_df


# Example usage:
df_filtered = remove_rare_classes(df, "main_business_category", 0)

In [None]:
df_filtered["label_naics"] = df_filtered["main_business_category"].apply(
    lambda x: label_dict.get(x, "Not Found")
)

In [None]:
df = df[df["label_naics"]!= "Not Found"]

In [None]:
df

In [None]:
df["naics_label"] = df["main_business_category"].apply(
    lambda x: label_dict.get(x, "Not Found")
)

In [None]:
df

In [None]:
df.to_csv("cleaned_dataset.csv")

In [None]:
# df.drop("label_naics", inplace=True,axis=1)

In [None]:
df

In [None]:
df_filtered[df_filtered["label_naics"] == "Not Found"]

In [None]:
df_filtered = df_filtered[df_filtered["label_naics"] != "Not Found"]

In [None]:
df_filtered["target"] = df_filtered["label_naics"].apply(
    lambda x: map_naics_to_code.get(x, "Not Found")
)


In [None]:
test_data.to_csv("test_data1.csv")

In [None]:
df_filtered = df_filtered[df_filtered["target"] != "Not Found"]

In [None]:
df_filtered["target"] = df_filtered["target"].astype(int)

In [None]:
len(df_filtered)

In [None]:
df_filtered

In [None]:
df_filtered = remove_rare_classes(df_filtered, "target", 50)

In [None]:
df_filtered["label_naics"].value_counts()

In [None]:
top_10_values = df_filtered["label_naics"].value_counts().head(10).index

# Create a new DataFrame that only contains rows with top 10 'label_naics' values
df_top_10 = df_filtered[df_filtered["label_naics"].isin(top_10_values)]

In [None]:
df_train, df_val, df_test = split_dataset(df_top_10)

In [None]:
df_filtered["clean_text"] = df_filtered["Tweet"].apply(lambda x: clean_text(x))

In [None]:
df

In [None]:
df["commercial_name_clean"] = df["commercial_name"].apply(
    lambda x: clean_text(x)
)
df["business_tags_clean"] = df["business_tags"].apply(
    lambda x: clean_text(x)
)
df["short_description_clean"] = df["short_description"].apply(
    lambda x: clean_text(x)
)
df["description_clean"] = df["description"].apply(lambda x: clean_text(x))
df["main_business_category_clean"] = df["main_business_category"].apply(
    lambda x: clean_text(x)
)

In [None]:
df.read_csv("cleaned_dataset.csv")

In [None]:
df = pd.read_csv("cleaned_data.csv")

In [None]:
df

In [None]:
df_train, df_val, df_test = split_dataset(df)

### Algorithms


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import f1_score, precision_score, recall_score


def apply_tfidf(X_train, X_test):
    """
    Transforms text data into TF-IDF vectors.
    Args:
    X_train (list): Training text data.
    X_test (list): Testing text data.

    Returns:
    tuple: Transformed X_train and X_test.
    """
    print("Apply TF-IDF")
    tfidf_transformer = TfidfVectorizer()
    X_train_transformed = tfidf_transformer.fit_transform(X_train)
    X_test_transformed = tfidf_transformer.transform(X_test)

    return X_train_transformed, X_test_transformed


def evaluate_models(df_train, df_test, models, threshold=0.5):
    """
    Trains models and evaluates them based on the macro F1 score.
    Args:
    df_train (DataFrame): Training DataFrame with 'clean_text' and 'target'.
    df_test (DataFrame): Testing DataFrame with 'clean_text' and 'target'.
    models (dict): Dictionary of model names and their classifier instances.
    threshold (float): Threshold for converting probabilities to class predictions.

    Returns:
    tuple: Best model, its name, and its F1 score.
    """
    X_train, y_train = df_train["clean_text"], df_train["target"]
    X_test, y_test = df_test["clean_text"], df_test["target"]

    X_train, X_test = apply_tfidf(X_train, X_test)

    best_model, best_model_name, best_f1_score = None, None, 0

    for name, model in models.items():
        print(f"Start training {name}")
        model.fit(X_train, y_train)
        y_proba = model.predict_proba(X_test)
        y_pred = (y_proba[:, 1] >= threshold).astype(int)
        current_f1_score = f1_score(y_test, y_pred, average="macro")

        if current_f1_score > best_f1_score:
            best_f1_score = current_f1_score
            best_model = model
            best_model_name = name

        print_metrics(y_pred, y_test, name + " - Text")
        print(f"Test set F1-score: {current_f1_score}")
        print(f"Test set Precision: {precision_score(y_test, y_pred, average='macro')}")
        print(f"Test set Recall: {recall_score(y_test, y_pred, average='macro')}")
        print("\n\n")
        print(
            "-----------------------------------------------------------------------------------------------------------------------"
        )

    return best_model, best_model_name, best_f1_score

In [None]:
pipeline_classifiers = {
    # "XGBClassifier": XGBClassifier(scale_pos_weight=1, use_label_encoder=False, eval_metric='mlogloss'),
    "GaussianNB":GaussianNB(),
    "RandomForestClassifier": RandomForestClassifier(),
    "SVC": SVC(class_weight="balanced", probability=True),
}

In [None]:
print(columns_round)

In [None]:
df_val

In [None]:
col_name = "short_description"
df_tmp_train, df_tmp_test = (
    df_train[[col_name, "target"]],
    df_test[[col_name, "target"]],
)
# df_tmp_train.rename(columns={col_name: "clean_text"}, inplace=True)
# df_tmp_test.rename(columns={col_name:"clean_text"},inplace=True)

df_tmp_train["clean_text"] = df_tmp_train[col_name].apply(
    lambda x: clean_text(x, to_lemmatize=True)
)
df_tmp_test["clean_text"] = df_tmp_test[col_name].apply(
    lambda x: clean_text(x, to_lemmatize=True)
)

evaluate_models(df_tmp_train, df_tmp_test, pipeline_classifiers)

In [None]:
df_tmp_train

## Bert

In [None]:
import torch.nn as nn
from transformers import BertTokenizer, BertModel


class BERTClass(nn.Module):

    def __init__(
        self,
        bert_model: str = "bert-base-uncased",
        num_classes: int = 2,
        droput_rate: int = 0.3,
    ):
        super(BERTClass, self).__init__()
        self.bert_model = BertModel.from_pretrained(bert_model, return_dict=True)
        self.dropout = nn.Dropout(droput_rate)
        self.linear = nn.Linear(768, num_classes)
        self.relu = nn.ReLU()


    def forward(self, input_ids, attention_mask, token_type_ids):
        output = self.bert_model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
        )
        output_dropout = self.dropout(output.pooler_output)
        output = self.linear(output_dropout)

        return output

In [None]:
class Dataset(torch.utils.data.Dataset):

    def __init__(self, data:pd.DataFrame,tokenizer,max_length:int=128):

        self.labels = data["target"].to_list()
        self.max_length = max_length
        self.texts = [
            tokenizer(
                text,
                padding="max_length",
                max_length=self.max_length,
                truncation=True,
                return_tensors="pt",
            )
            for text in df["text"]
        ]

    def get_max_len(self, df):
        return max(self.texts)

    def classes(self):
        return self.labels

    def __len__(self):
        return len(self.labels)

    def get_batch_labels(self, idx):
        # Fetch a batch of labels
        return np.array(self.labels[idx])

    def get_batch_texts(self, idx):
        # Fetch a batch of inputs
        return self.texts[idx]

    def __getitem__(self, idx):

        batch_texts = self.get_batch_texts(idx)
        batch_y = self.get_batch_labels(idx)

        return batch_texts, batch_y

In [None]:
# Train
def train(model, train_data, val_data, learning_rate, epochs):

    train, val = Dataset(train_data), Dataset(val_data)

    train_dataloader = torch.utils.data.DataLoader(train, batch_size=16, shuffle=True)
    val_dataloader = torch.utils.data.DataLoader(val, batch_size=16, shuffle=False)

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr=learning_rate)

    if use_cuda:

        model = model.cuda()
        criterion = criterion.cuda()

    for epoch_num in range(epochs):

        total_acc_train = 0
        total_loss_train = 0

        model.train()

        for train_input, train_label in tqdm(train_dataloader):

            train_label = train_label.to(device)
            mask = train_input["attention_mask"].to(device)
            input_id = train_input["input_ids"].squeeze(1).to(device)

            output = model(input_id, mask)

            batch_loss = criterion(output, train_label.long())
            total_loss_train += batch_loss.item()

            acc = (output.argmax(dim=1) == train_label).sum().item()
            total_acc_train += acc

            model.zero_grad()
            batch_loss.backward()
            optimizer.step()

        total_acc_val = 0
        total_loss_val = 0

        with torch.no_grad():

            for val_input, val_label in val_dataloader:

                val_label = val_label.to(device)
                mask = val_input["attention_mask"].to(device)
                input_id = val_input["input_ids"].squeeze(1).to(device)

                output = model(input_id, mask)

                batch_loss = criterion(output, val_label.long())
                total_loss_val += batch_loss.item()

                acc = (output.argmax(dim=1) == val_label).sum().item()
                total_acc_val += acc

        print(
            f"Epochs: {epoch_num + 1} | Train Loss: {total_loss_train / len(train_data): .3f} \
                | Train Accuracy: {total_acc_train / len(train_data): .3f} \
                | Val Loss: {total_loss_val / len(val_data): .3f} \
                | Val Accuracy: {total_acc_val / len(val_data): .3f}"
        )

# Evaluate


def evaluate(model, test_data, model_name: str = "Model name"):

    test = Dataset(test_data)

    test_dataloader = torch.utils.data.DataLoader(test, batch_size=2)

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    if use_cuda:

        model = model.cuda()

    total_acc_test = 0
    y_true = []
    y_pred = []

    model.eval()
    with torch.no_grad():

        for test_input, test_label in test_dataloader:

            test_label = test_label.to(device)
            mask = test_input["attention_mask"].to(device)
            input_id = test_input["input_ids"].squeeze(1).to(device)

            output = model(input_id, mask)

            # predictions = output.argmax(dim=1)

            prediction = F.softmax(output, dim=1).detach().cpu().numpy()
            threshold = 0.3
            preds = np.where(prediction[:, 1] > threshold, 1, 0)

            acc = (output.argmax(dim=1) == test_label).sum().item()

            total_acc_test += acc

            y_true.extend(test_label.cpu().numpy().tolist())
            y_pred.extend(preds)

    print(classification_report(y_true, y_pred))

    # plot confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    sns.heatmap(cm, annot=True, fmt="g")
    plt.title(model_name)
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.show()

    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    print(f"Test Accuracy: {total_acc_test / len(test_data): .3f}")
    print(f"Test Precison: {precision: .3f}")
    print(f"Test Recall: {recall: .3f}")
    print(f"Test F1: {f1: .3f}")