In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import classification_report

In [2]:
# Data Augmentations 
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [None]:
# Model Architecture 
class ClassicalCNN(nn.Module):
    def __init__(self):
        super().__init__()
        # 1st Convo layer
        self.conv1 = nn.Conv2d(1, 2, kernel_size=5)
        # Input: Takes one Greyscale image as Input
        # Output:  2 feature maps
        # Kernel: 5x5

        # 2nd Convo layer
        self.conv2 = nn.Conv2d(2, 16, kernel_size=5) # 2nd Convo layer
        # Input: 2 input channel
        # Output:  16 feature maps
        # Kernel: 5x5

        # regularization : Drupout Layer
        self.dropout = nn.Dropout2d()
        
        # 1st Fully connected layer:
        # Flat Convolutional features (400) ---> 64 features
        # converts convolutional features to dense representation
        # For 32x32 CIFAR images, after convolutions and pooling we get 16x5x5=400 features
        self.fc1 = nn.Linear(400, 64)  # Dense layer

        # 2nd Fully connected layer:
        # 64 features ----> 2 features
        self.fc2 = nn.Linear(64, 2)  # Output to 2 classes
        
        # 3rd Fully Connected Layer:
        # 2 features ----> 1 featuture
        self.fc3 = nn.Linear(2, 1)

        # 4th Fully Connected Layer:
        # 1 feature ------> 2 features
        self.fc4 = nn.Linear(1, 2)

    def forward(self, x):
        # 1st Layer
        x = F.relu(self.conv1(x)) # Relu Function
        x = F.max_pool2d(x, 2) # Max Pooling

        # 2nd Convv Layer 
        x = F.relu(self.conv2(x)) # ReLU activation
        x = F.max_pool2d(x, 2)    # Max Pooling

        # dropout for regularization
        x = self.dropout(x)
        
        # Flatten layer
        # Flatten the 3D tensor to 1D for dense layers
        x = x.view(x.shape[0], -1)  # Shape becomes [batch_size, 400]
        
        # 1st Fully connected layer:
        x = F.relu(self.fc1(x)) # ReLU

        # 2nd Fully connected layer:
        x = self.fc2(x) # No activation

        # 3rd Fully Connected Layer:
        x = F.relu(self.fc3(x)) # ReLU

        # 4th Fully Connected Layer:
        x = self.fc4(x)
        return F.log_softmax(x, dim=1) # softmax Function

In [None]:
from torchinfo import summary
demo_Model = ClassicalCNN()
summary(demo_Model, input_size=(1, 1, 32, 32))  # CIFAR-10 images: 32×32

Layer (type:depth-idx)                   Output Shape              Param #
ClassicalCNN                             [1, 2]                    --
├─Conv2d: 1-1                            [1, 2, 28, 28]            52
├─Conv2d: 1-2                            [1, 16, 10, 10]           816
├─Dropout2d: 1-3                         [1, 16, 5, 5]             --
├─Linear: 1-4                            [1, 64]                   25,664
├─Linear: 1-5                            [1, 2]                    130
├─Linear: 1-6                            [1, 1]                    3
├─Linear: 1-7                            [1, 2]                    4
Total params: 26,669
Trainable params: 26,669
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 0.15
Input size (MB): 0.00
Forward/backward pass size (MB): 0.03
Params size (MB): 0.11
Estimated Total Size (MB): 0.14

In [None]:
# Load CIFAR-10 dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

In [None]:
def generate_combinations(digits=10, length=2):
    """
    Generate combinations of CIFAR-10
    """
    result = []    
    if length == 2:
        for i in range(digits):
            for j in range(i + 1, digits):
                result.append([i, j])
    else:
        def backtrack(current_combination, start_index):
            if len(current_combination) == length:
                result.append(current_combination[:])
                return
            for i in range(start_index, digits):
                current_combination.append(i)
                backtrack(current_combination, i + 1)
                current_combination.pop()
        
        backtrack([], 0)
    
    return result
a = generate_combinations()
print(f"Number of combinations: {len(a)}")


In [None]:
def evaluate(class_indices):
    print(f"\n\n=== Evaluating classes {class_indices} ===")  
# All Possible classes       
    class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
# Select Class     
    selected_classes = [class_names[idx] for idx in class_indices]
    print(f"Selected classes: {selected_classes}")

# Training    
# Filter data
    filtered_train_data = [(image, label) for image, label in trainset if label in class_indices]
    filtered_train_images, filtered_train_labels = zip(*filtered_train_data)    
# Map labels to binary 
    filtered_train_labels = [0 if label == class_indices[0] else 1 for label in filtered_train_labels]

# Testing 
# Filter data
    filtered_test_data = [(image, label) for image, label in testset if label in class_indices]
    filtered_test_images, filtered_test_labels = zip(*filtered_test_data)    
# Map labels to binary 
    filtered_test_labels = [0 if label == class_indices[0] else 1 for label in filtered_test_labels]
    
    
# Create datasets and loaders
    train_dataset = torch.utils.data.TensorDataset(
        torch.stack(filtered_train_images),
        torch.tensor(filtered_train_labels)
        )
    test_dataset = torch.utils.data.TensorDataset(
        torch.stack(filtered_test_images),
        torch.tensor(filtered_test_labels)
    )
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

# Use the best available device to run 
# MPS for Mac, CUDA for NVIDIA GPU, or CPU    
    device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    
    # Initialize the CNN model
    model = ClassicalCNN().to(device)

    criterion = nn.NLLLoss() # loss function
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # optimizer
    
    num_epochs = 10 # Number of Epochs

    # Training of Model 
    model.train()
    for epoch in range(num_epochs):
        total_loss = []
        for batch_idx, (data, target) in enumerate(train_loader):
            # Move data to device
            data, target = data.to(device), target.to(device)
            # Zero gradients before backward pass
            optimizer.zero_grad(set_to_none=True) 

            output = model(data) # Forward pass
            loss = criterion(output, target) # Calculate loss
            loss.backward() # Backward pass   
            optimizer.step() # Update weights
            total_loss.append(loss.item()) # Track loss
        

            # Track Progress:
            """
            if (batch_idx + 1) % 100 == 0:
                print(f'Epoch [{epoch+1}/{num_epochs}] Batch [{batch_idx+1}/{len(train_loader)}] '
                  f'Loss: {loss.item():.4f}')
    
            """ 
        # average loss for the epoch    
        avg_loss = sum(total_loss) / len(total_loss)
        # print(f'Epoch [{epoch+1}/{num_epochs}], Average Loss: {avg_loss:.4f}')

    # print('Finished Training')
    
    def evaluate_model(model, test_loader, device): # Evaluate Model 
        model.eval()
        true_labels = []
        pred_labels = []
    
        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs, 1) ## Get highest probability class
                true_labels.extend(labels.cpu().numpy())
                pred_labels.extend(predicted.cpu().numpy())
    
        
        return classification_report(true_labels, pred_labels, target_names=selected_classes)
        
    # Evaluate and print model performance on test set    
    print("\nTest Set Performance:")
    a = evaluate_model(model, test_loader, device)
    print(a)

    # Evaluate and print model performance on training set
    print("\nTraining Set Performance:")
    b = evaluate_model(model, train_loader, device)
    print(b)
    
    # Return both performance reports
    return (a, b)

In [None]:
claaisifcation_report_list = []
for i in a:
    claaisifcation_report_list.append(evaluate(i))



=== Evaluating classes [0, 1] ===
Selected classes: ['airplane', 'automobile']
Using device: mps

Test Set Performance:
              precision    recall  f1-score   support

    airplane       0.50      1.00      0.67      1000
  automobile       0.00      0.00      0.00      1000

    accuracy                           0.50      2000
   macro avg       0.25      0.50      0.33      2000
weighted avg       0.25      0.50      0.33      2000


Training Set Performance:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

    airplane       0.50      1.00      0.67      5000
  automobile       0.00      0.00      0.00      5000

    accuracy                           0.50     10000
   macro avg       0.25      0.50      0.33     10000
weighted avg       0.25      0.50      0.33     10000



=== Evaluating classes [0, 2] ===
Selected classes: ['airplane', 'bird']
Using device: mps

Test Set Performance:
              precision    recall  f1-score   support

    airplane       0.84      0.73      0.78      1000
        bird       0.76      0.86      0.81      1000

    accuracy                           0.80      2000
   macro avg       0.80      0.80      0.80      2000
weighted avg       0.80      0.80      0.80      2000


Training Set Performance:
              precision    recall  f1-score   support

    airplane       0.86      0.77      0.81      5000
        bird       0.79      0.87      0.83      5000

    accuracy                           

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

    airplane       0.50      1.00      0.67      5000
        deer       0.00      0.00      0.00      5000

    accuracy                           0.50     10000
   macro avg       0.25      0.50      0.33     10000
weighted avg       0.25      0.50      0.33     10000



=== Evaluating classes [0, 5] ===
Selected classes: ['airplane', 'dog']
Using device: mps

Test Set Performance:
              precision    recall  f1-score   support

    airplane       0.50      1.00      0.67      1000
         dog       0.00      0.00      0.00      1000

    accuracy                           0.50      2000
   macro avg       0.25      0.50      0.33      2000
weighted avg       0.25      0.50      0.33      2000


Training Set Performance:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

    airplane       0.50      1.00      0.67      5000
         dog       0.00      0.00      0.00      5000

    accuracy                           0.50     10000
   macro avg       0.25      0.50      0.33     10000
weighted avg       0.25      0.50      0.33     10000



=== Evaluating classes [0, 6] ===
Selected classes: ['airplane', 'frog']
Using device: mps

Test Set Performance:
              precision    recall  f1-score   support

    airplane       0.90      0.78      0.83      1000
        frog       0.80      0.91      0.85      1000

    accuracy                           0.84      2000
   macro avg       0.85      0.84      0.84      2000
weighted avg       0.85      0.84      0.84      2000


Training Set Performance:
              precision    recall  f1-score   support

    airplane       0.92      0.79      0.85      5000
        frog       0.81      0.93      0.87      5000

    accuracy                           

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

    airplane       0.00      0.00      0.00      5000
        ship       0.50      1.00      0.67      5000

    accuracy                           0.50     10000
   macro avg       0.25      0.50      0.33     10000
weighted avg       0.25      0.50      0.33     10000



=== Evaluating classes [0, 9] ===
Selected classes: ['airplane', 'truck']
Using device: mps

Test Set Performance:
              precision    recall  f1-score   support

    airplane       0.89      0.77      0.82      1000
       truck       0.80      0.91      0.85      1000

    accuracy                           0.84      2000
   macro avg       0.84      0.84      0.84      2000
weighted avg       0.84      0.84      0.84      2000


Training Set Performance:
              precision    recall  f1-score   support

    airplane       0.92      0.78      0.85      5000
       truck       0.81      0.93      0.87      5000

    accuracy                          

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

  automobile       0.50      1.00      0.67      5000
        deer       0.00      0.00      0.00      5000

    accuracy                           0.50     10000
   macro avg       0.25      0.50      0.33     10000
weighted avg       0.25      0.50      0.33     10000



=== Evaluating classes [1, 5] ===
Selected classes: ['automobile', 'dog']
Using device: mps

Test Set Performance:
              precision    recall  f1-score   support

  automobile       0.96      0.81      0.88      1000
         dog       0.84      0.96      0.90      1000

    accuracy                           0.89      2000
   macro avg       0.90      0.89      0.89      2000
weighted avg       0.90      0.89      0.89      2000


Training Set Performance:
              precision    recall  f1-score   support

  automobile       0.96      0.82      0.88      5000
         dog       0.84      0.97      0.90      5000

    accuracy                          

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

  automobile       0.00      0.00      0.00      5000
       truck       0.50      1.00      0.67      5000

    accuracy                           0.50     10000
   macro avg       0.25      0.50      0.33     10000
weighted avg       0.25      0.50      0.33     10000



=== Evaluating classes [2, 3] ===
Selected classes: ['bird', 'cat']
Using device: mps

Test Set Performance:
              precision    recall  f1-score   support

        bird       0.50      1.00      0.67      1000
         cat       0.00      0.00      0.00      1000

    accuracy                           0.50      2000
   macro avg       0.25      0.50      0.33      2000
weighted avg       0.25      0.50      0.33      2000


Training Set Performance:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

        bird       0.50      1.00      0.67      5000
         cat       0.00      0.00      0.00      5000

    accuracy                           0.50     10000
   macro avg       0.25      0.50      0.33     10000
weighted avg       0.25      0.50      0.33     10000



=== Evaluating classes [2, 4] ===
Selected classes: ['bird', 'deer']
Using device: mps

Test Set Performance:
              precision    recall  f1-score   support

        bird       0.00      0.00      0.00      1000
        deer       0.50      1.00      0.67      1000

    accuracy                           0.50      2000
   macro avg       0.25      0.50      0.33      2000
weighted avg       0.25      0.50      0.33      2000


Training Set Performance:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

        bird       0.00      0.00      0.00      5000
        deer       0.50      1.00      0.67      5000

    accuracy                           0.50     10000
   macro avg       0.25      0.50      0.33     10000
weighted avg       0.25      0.50      0.33     10000



=== Evaluating classes [2, 5] ===
Selected classes: ['bird', 'dog']
Using device: mps

Test Set Performance:
              precision    recall  f1-score   support

        bird       0.50      1.00      0.67      1000
         dog       0.00      0.00      0.00      1000

    accuracy                           0.50      2000
   macro avg       0.25      0.50      0.33      2000
weighted avg       0.25      0.50      0.33      2000


Training Set Performance:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

        bird       0.50      1.00      0.67      5000
         dog       0.00      0.00      0.00      5000

    accuracy                           0.50     10000
   macro avg       0.25      0.50      0.33     10000
weighted avg       0.25      0.50      0.33     10000



=== Evaluating classes [2, 6] ===
Selected classes: ['bird', 'frog']
Using device: mps

Test Set Performance:
              precision    recall  f1-score   support

        bird       0.73      0.80      0.76      1000
        frog       0.78      0.71      0.74      1000

    accuracy                           0.75      2000
   macro avg       0.76      0.75      0.75      2000
weighted avg       0.76      0.75      0.75      2000


Training Set Performance:
              precision    recall  f1-score   support

        bird       0.77      0.83      0.80      5000
        frog       0.81      0.75      0.78      5000

    accuracy                           0.79

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

        bird       0.00      0.00      0.00      5000
       horse       0.50      1.00      0.67      5000

    accuracy                           0.50     10000
   macro avg       0.25      0.50      0.33     10000
weighted avg       0.25      0.50      0.33     10000



=== Evaluating classes [2, 8] ===
Selected classes: ['bird', 'ship']
Using device: mps

Test Set Performance:
              precision    recall  f1-score   support

        bird       0.00      0.00      0.00      1000
        ship       0.50      1.00      0.67      1000

    accuracy                           0.50      2000
   macro avg       0.25      0.50      0.33      2000
weighted avg       0.25      0.50      0.33      2000


Training Set Performance:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

        bird       0.00      0.00      0.00      5000
        ship       0.50      1.00      0.67      5000

    accuracy                           0.50     10000
   macro avg       0.25      0.50      0.33     10000
weighted avg       0.25      0.50      0.33     10000



=== Evaluating classes [2, 9] ===
Selected classes: ['bird', 'truck']
Using device: mps

Test Set Performance:
              precision    recall  f1-score   support

        bird       0.92      0.84      0.87      1000
       truck       0.85      0.92      0.89      1000

    accuracy                           0.88      2000
   macro avg       0.88      0.88      0.88      2000
weighted avg       0.88      0.88      0.88      2000


Training Set Performance:
              precision    recall  f1-score   support

        bird       0.93      0.85      0.89      5000
       truck       0.86      0.94      0.90      5000

    accuracy                           0.8

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

         cat       0.50      1.00      0.67      5000
        deer       0.00      0.00      0.00      5000

    accuracy                           0.50     10000
   macro avg       0.25      0.50      0.33     10000
weighted avg       0.25      0.50      0.33     10000



=== Evaluating classes [3, 5] ===
Selected classes: ['cat', 'dog']
Using device: mps

Test Set Performance:
              precision    recall  f1-score   support

         cat       0.61      0.73      0.67      1000
         dog       0.67      0.54      0.60      1000

    accuracy                           0.64      2000
   macro avg       0.64      0.64      0.63      2000
weighted avg       0.64      0.64      0.63      2000


Training Set Performance:
              precision    recall  f1-score   support

         cat       0.64      0.77      0.70      5000
         dog       0.71      0.57      0.63      5000

    accuracy                           0.67  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

         cat       0.50      1.00      0.67      5000
       horse       0.00      0.00      0.00      5000

    accuracy                           0.50     10000
   macro avg       0.25      0.50      0.33     10000
weighted avg       0.25      0.50      0.33     10000



=== Evaluating classes [3, 8] ===
Selected classes: ['cat', 'ship']
Using device: mps

Test Set Performance:
              precision    recall  f1-score   support

         cat       0.00      0.00      0.00      1000
        ship       0.50      1.00      0.67      1000

    accuracy                           0.50      2000
   macro avg       0.25      0.50      0.33      2000
weighted avg       0.25      0.50      0.33      2000


Training Set Performance:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

         cat       0.00      0.00      0.00      5000
        ship       0.50      1.00      0.67      5000

    accuracy                           0.50     10000
   macro avg       0.25      0.50      0.33     10000
weighted avg       0.25      0.50      0.33     10000



=== Evaluating classes [3, 9] ===
Selected classes: ['cat', 'truck']
Using device: mps

Test Set Performance:
              precision    recall  f1-score   support

         cat       0.88      0.84      0.86      1000
       truck       0.85      0.88      0.87      1000

    accuracy                           0.86      2000
   macro avg       0.86      0.86      0.86      2000
weighted avg       0.86      0.86      0.86      2000


Training Set Performance:
              precision    recall  f1-score   support

         cat       0.90      0.87      0.88      5000
       truck       0.87      0.90      0.89      5000

    accuracy                           0.89

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

        deer       0.50      1.00      0.67      5000
         dog       0.00      0.00      0.00      5000

    accuracy                           0.50     10000
   macro avg       0.25      0.50      0.33     10000
weighted avg       0.25      0.50      0.33     10000



=== Evaluating classes [4, 6] ===
Selected classes: ['deer', 'frog']
Using device: mps

Test Set Performance:
              precision    recall  f1-score   support

        deer       0.72      0.86      0.79      1000
        frog       0.83      0.67      0.74      1000

    accuracy                           0.77      2000
   macro avg       0.78      0.77      0.76      2000
weighted avg       0.78      0.77      0.76      2000


Training Set Performance:
              precision    recall  f1-score   support

        deer       0.73      0.90      0.81      5000
        frog       0.87      0.66      0.76      5000

    accuracy                           0.78

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

        deer       0.50      1.00      0.67      5000
       horse       0.00      0.00      0.00      5000

    accuracy                           0.50     10000
   macro avg       0.25      0.50      0.33     10000
weighted avg       0.25      0.50      0.33     10000



=== Evaluating classes [4, 8] ===
Selected classes: ['deer', 'ship']
Using device: mps

Test Set Performance:
              precision    recall  f1-score   support

        deer       0.80      0.92      0.85      1000
        ship       0.91      0.76      0.83      1000

    accuracy                           0.84      2000
   macro avg       0.85      0.84      0.84      2000
weighted avg       0.85      0.84      0.84      2000


Training Set Performance:
              precision    recall  f1-score   support

        deer       0.81      0.92      0.86      5000
        ship       0.90      0.78      0.84      5000

    accuracy                           0.85

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

         dog       0.00      0.00      0.00      5000
       horse       0.50      1.00      0.67      5000

    accuracy                           0.50     10000
   macro avg       0.25      0.50      0.33     10000
weighted avg       0.25      0.50      0.33     10000



=== Evaluating classes [5, 8] ===
Selected classes: ['dog', 'ship']
Using device: mps

Test Set Performance:
              precision    recall  f1-score   support

         dog       0.92      0.89      0.90      1000
        ship       0.89      0.92      0.91      1000

    accuracy                           0.91      2000
   macro avg       0.91      0.91      0.91      2000
weighted avg       0.91      0.91      0.91      2000


Training Set Performance:
              precision    recall  f1-score   support

         dog       0.92      0.92      0.92      5000
        ship       0.92      0.92      0.92      5000

    accuracy                           0.92 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

        frog       0.50      1.00      0.67      5000
       horse       0.00      0.00      0.00      5000

    accuracy                           0.50     10000
   macro avg       0.25      0.50      0.33     10000
weighted avg       0.25      0.50      0.33     10000



=== Evaluating classes [6, 8] ===
Selected classes: ['frog', 'ship']
Using device: mps

Test Set Performance:
              precision    recall  f1-score   support

        frog       0.00      0.00      0.00      1000
        ship       0.50      1.00      0.67      1000

    accuracy                           0.50      2000
   macro avg       0.25      0.50      0.33      2000
weighted avg       0.25      0.50      0.33      2000


Training Set Performance:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

        frog       0.00      0.00      0.00      5000
        ship       0.50      1.00      0.67      5000

    accuracy                           0.50     10000
   macro avg       0.25      0.50      0.33     10000
weighted avg       0.25      0.50      0.33     10000



=== Evaluating classes [6, 9] ===
Selected classes: ['frog', 'truck']
Using device: mps

Test Set Performance:
              precision    recall  f1-score   support

        frog       0.80      0.92      0.85      1000
       truck       0.91      0.76      0.83      1000

    accuracy                           0.84      2000
   macro avg       0.85      0.84      0.84      2000
weighted avg       0.85      0.84      0.84      2000


Training Set Performance:
              precision    recall  f1-score   support

        frog       0.84      0.95      0.89      5000
       truck       0.94      0.81      0.87      5000

    accuracy                           0.8

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

       horse       0.50      1.00      0.67      5000
        ship       0.00      0.00      0.00      5000

    accuracy                           0.50     10000
   macro avg       0.25      0.50      0.33     10000
weighted avg       0.25      0.50      0.33     10000



=== Evaluating classes [7, 9] ===
Selected classes: ['horse', 'truck']
Using device: mps

Test Set Performance:
              precision    recall  f1-score   support

       horse       0.85      0.88      0.87      1000
       truck       0.88      0.84      0.86      1000

    accuracy                           0.86      2000
   macro avg       0.86      0.86      0.86      2000
weighted avg       0.86      0.86      0.86      2000


Training Set Performance:
              precision    recall  f1-score   support

       horse       0.86      0.89      0.87      5000
       truck       0.89      0.85      0.87      5000

    accuracy                           0.

In [4]:
claaisifcation_report_list

[('              precision    recall  f1-score   support\n\n    airplane       0.50      1.00      0.67      1000\n  automobile       0.00      0.00      0.00      1000\n\n    accuracy                           0.50      2000\n   macro avg       0.25      0.50      0.33      2000\nweighted avg       0.25      0.50      0.33      2000\n',
  '              precision    recall  f1-score   support\n\n    airplane       0.50      1.00      0.67      5000\n  automobile       0.00      0.00      0.00      5000\n\n    accuracy                           0.50     10000\n   macro avg       0.25      0.50      0.33     10000\nweighted avg       0.25      0.50      0.33     10000\n'),
 ('              precision    recall  f1-score   support\n\n    airplane       0.84      0.73      0.78      1000\n        bird       0.76      0.86      0.81      1000\n\n    accuracy                           0.80      2000\n   macro avg       0.80      0.80      0.80      2000\nweighted avg       0.80      0.80    