In [1]:
import sys
sys.path.append("..")  # Go to parent directory
from generating_data.generate_loaders import (
    create_Tang_multi_loaders,
    create_GM_test_multi_loaders
    )
from models.CNNs import (multiTrainer, CNN_1convlayer_k3_1afflayer)

In [2]:
Tang_train_loader, Tang_val_loader, Tang_test_loader = create_Tang_multi_loaders('../data/pkl/moves/Tang_moves.pkl')
model = CNN_1convlayer_k3_1afflayer()
trainer = multiTrainer(net=model, train_loader=Tang_train_loader, val_loader=Tang_val_loader)

In [None]:
import torch.nn as nn
completed_model, best_model, train_losses, val_losses, train_acc, val_acc = trainer.train(
    optim_name='adamw',
    loss_function=nn.CrossEntropyLoss(),
    num_epochs=10,
    learning_rate=0.001,
    momentum=0.9,
    step_size=5,
    learning_rate_decay=0.9,
    acc_frequency=1,
    verbose=True
)

100%|██████████| 1186/1186 [00:37<00:00, 31.98it/s]


Epoch 1/10: Train Loss = 1.4683, Train Acc = 0.3781, Val Loss = 1.3715, Val Acc = 0.4194


100%|██████████| 1186/1186 [00:34<00:00, 34.41it/s]


Epoch 2/10: Train Loss = 1.3439, Train Acc = 0.4307, Val Loss = 1.3257, Val Acc = 0.4383


 42%|████▏     | 496/1186 [00:14<00:19, 34.64it/s]

In [None]:
import torch
import tqdm
from sklearn.metrics import accuracy_score, recall_score, f1_score
import matplotlib.pyplot as plt
import pandas as pd

def evaluate_per_piece_accuracy(model, test_loader, device):
    model.eval()  # Set the model to evaluation mode
    class_correct = [0] * 6
    class_total = [0] * 6
    all_labels = []
    all_preds = []

    classes = ['P', 'N', 'B', 'R', 'Q', 'K']

    with torch.no_grad():
        for data in tqdm.tqdm(test_loader):
            X, y = data[0].to(device), data[1].to(device)

            # Forward pass
            outputs = model(X)
            _, predicted = torch.max(outputs, 1)

            # Collect all labels and predictions for metrics
            all_labels.extend(y.cpu().numpy())
            all_preds.extend(predicted.cpu().numpy())

            # Calculate per-class accuracy
            for i in range(len(y)):
                label = y[i].item()
                class_total[label] += 1
                if predicted[i] == y[i]:
                    class_correct[label] += 1

    # Calculate metrics for each piece
    correct_counts = []
    incorrect_counts = []
    accuracies = []
    recalls = []
    f1_scores = []

    for i in range(6):
        if class_total[i] > 0:
            accuracy = 100 * class_correct[i] / class_total[i]
            recall = recall_score(all_labels, all_preds, labels=[i], average='macro')
            f1 = f1_score(all_labels, all_preds, labels=[i], average='macro')
            correct_counts.append(class_correct[i])
            incorrect_counts.append(class_total[i] - class_correct[i])
            accuracies.append(accuracy)
            recalls.append(recall * 100)
            f1_scores.append(f1 * 100)
            print(f"{classes[i]}: Accuracy: {accuracy:.2f}%")
        else:
            print(f"{classes[i]}: No samples available.")
            correct_counts.append(0)
            incorrect_counts.append(0)
            accuracies.append(0)
            recalls.append(0)
            f1_scores.append(0)

    # ✅ Plotting the results
    plt.figure(figsize=(10, 6))
    bar_width = 0.35
    index = range(len(classes))

    plt.bar(index, correct_counts, bar_width, label='Correct')
    plt.bar([i + bar_width for i in index], incorrect_counts, bar_width, label='Incorrect')

    plt.xlabel('Piece Type')
    plt.ylabel('Count')
    plt.title('Correct vs Incorrect Predictions by Piece Type')
    plt.xticks([i + bar_width / 2 for i in index], classes)
    plt.legend()
    plt.show()

    # ✅ Create a DataFrame for a summary
    df = pd.DataFrame({
        'Piece Type': classes,
        'Accuracy (%)': accuracies,
        'Recall (%)': recalls,
        'F1 Score (%)': f1_scores
    })

    # ✅ Calculate weighted metrics
    weighted_accuracy = accuracy_score(all_labels, all_preds) * 100
    weighted_recall = recall_score(all_labels, all_preds, average='weighted') * 100
    weighted_f1 = f1_score(all_labels, all_preds, average='weighted') * 100

    # ✅ Add weighted metrics to the DataFrame
    df.loc['Weighted'] = ['Weighted', weighted_accuracy, weighted_recall, weighted_f1]

    print("\nOverall Accuracy Metrics:")
    print(df)

    return df

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
df_metrics = evaluate_per_piece_accuracy(best_model, Tang_test_loader, device)