# Model Evaluation

## Imports

In [1]:
from sklearn.metrics import f1_score, precision_score, accuracy_score, recall_score
from src.model import DistilBertForClassification
from transformers import DistilBertTokenizer
from pathlib import Path
from tqdm import tqdm
import pandas as pd
import torch

tqdm.pandas()

## Functions

In [2]:
def get_predictions(model, tokenizer, input_text, classes):
    inputs = tokenizer(
        input_text, return_tensors="pt", padding=True, truncation=True, max_length=512
    )
    with torch.no_grad():
        pred = model(inputs["input_ids"], inputs["attention_mask"])

    return torch.argmax(pred, dim=1).item()

## Initial Variables

In [None]:
model_path = Path(
    "..", "data", "models", "Amazon_Product_Classifier.pth"
)
dataset_path = Path("..", "data", "datasets", "test.csv")
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
model_path, dataset_path, device

### Classes

In [4]:
classes = [
    "All Electronics",
    "Amazon Fashion",
    "Amazon Home",
    "Arts, Crafts & Sewing",
    "Automotive",
    "Books",
    "Camera & Photo",
    "Cell Phones & Accessories",
    "Computers",
    "Digital Music",
    "Grocery",
    "Health & Personal Care",
    "Home Audio & Theater",
    "Industrial & Scientific",
    "Movies & TV",
    "Musical Instruments",
    "Office Products",
    "Pet Supplies",
    "Sports & Outdoors",
    "Tools & Home Improvement",
    "Toys & Games",
    "Video Games",
]

### Dataset Load

In [None]:
df = pd.read_csv(dataset_path)
df

### Define Model and Tokenizer

In [None]:
model = DistilBertForClassification(num_classes=22)
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")

model.load_state_dict(torch.load(model_path, map_location=torch.device(device)))
model.eval()


## Prepare data

In [None]:
df["predictions"] = df["feature_concat"].progress_apply(
    lambda x: get_predictions(model, tokenizer, x, classes)
)
df

### Calculate Metrics

In [None]:
y_true = df["main_cat"]
y_pred = df["predictions"]

accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average="weighted")
recall = recall_score(y_true, y_pred, average="weighted")
f1 = f1_score(y_true, y_pred, average="weighted")

print(
    f"Accuracy: {accuracy*100:2f}%, Precision: {precision*100:2f}%, Recall: {recall*100:2f}%, F1: {f1*100:2f}%\n\n"
)