In [15]:
import numpy as np
import pandas as pd
import torch
from torch import nn
from tqdm.notebook import tqdm
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, StratifiedKFold
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, get_linear_schedule_with_warmup
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from projAll import batch_size, filtered_labels_at_least_5_list, CustomBertModel, create_dataset, test

In [16]:
batch_size = batch_size
labels_list = filtered_labels_at_least_5_list
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
label_mapping = {label: idx for idx, label in enumerate(filtered_labels_at_least_5_list)}
reverse_label_mapping = {v: k for k, v in label_mapping.items()}
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [18]:
model_path = "model_2024-12-16_12-53-34.pth"
model_path = "model_2024-12-16_14-53-35.pth"


In [22]:
baseline_csv = "baseline.csv"
output_csv = "baselineVSmodel_predictions.csv"

baseline_df = pd.read_csv(baseline_csv)

_model = CustomBertModel(num_labels=len(filtered_labels_at_least_5_list))

state_dict = torch.load(model_path, weights_only=True)
_model.load_state_dict(state_dict)
_model.to(device)
_model.eval()

dataset = create_dataset(baseline_df, tokenizer, label_mapping)
data_loader = DataLoader(dataset, sampler=SequentialSampler(dataset), batch_size=batch_size)

predictions, _ = test(_model, data_loader, device)
predictions = [reverse_label_mapping[pred] for pred in predictions]


prediction_column_name = f"{model_path} prediction"

baseline_df[prediction_column_name] = predictions


baseline_df['baseline Prediction'] = baseline_df['Prediction']

# Save the predictions to CSV
baseline_df[['Artifact Id', 'baseline Prediction', prediction_column_name]].to_csv(output_csv, index=False)
print(f"Predictions saved to {output_csv}")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

F1 Score (Weighted): 0.5568
Accuracy: 62.50%
Predictions saved to baselineVSmodel_predictions.csv


In [21]:
true_labels = baseline_df['Artifact Id'].tolist()
true_labels = [label.replace('d3f:', '') for label in baseline_df['Artifact Id'].tolist()]

baseline_predictions = baseline_df['baseline Prediction'].tolist()
model_predictions = baseline_df[prediction_column_name].tolist()
model_predictions = [label.replace('d3f:', '') for label in baseline_df[prediction_column_name].tolist()]

# Calculate F1 Score for baseline model (assuming the labels are categorical)
f1_baseline = f1_score(true_labels, baseline_predictions, average='weighted')
f1_model = f1_score(true_labels, model_predictions, average='weighted')

# Calculate accuracy for both models
accuracy_baseline = accuracy_score(true_labels, baseline_predictions) * 100
accuracy_model = accuracy_score(true_labels, model_predictions) * 100

# Print out the results
print(f"Baseline Model F1 Score (Weighted): {f1_baseline:.4f}")
print(f"Baseline Model Accuracy: {accuracy_baseline:.2f}%")
print(f"Model F1 Score (Weighted): {f1_model:.4f}")
print(f"Model Accuracy: {accuracy_model:.2f}%")

Baseline Model F1 Score (Weighted): 0.1667
Baseline Model Accuracy: 18.75%
Model F1 Score (Weighted): 0.5568
Model Accuracy: 62.50%
