In [1]:
import pickle
import numpy as np
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms.functional import to_pil_image
from torch.utils.data import Dataset, DataLoader

def unpickle(file): 
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

# Directory where the CIFAR-10 batches are stored
data_dir = 'data/cifar-10-batches-py'

# List to store all data batches
data_list = []

# List to store all labels from batches
labels_list = []

# There are 5 data batches named data_batch_1, data_batch_2, ..., data_batch_5
for i in range(1, 6):
    file_path = os.path.join(data_dir, f'data_batch_{i}')
    batch_data = unpickle(file_path)
    data_list.append(batch_data[b'data'])
    labels_list.extend(batch_data[b'labels'])

# Convert the list of data batches to a single numpy array
data_array = np.vstack(data_list)
labels_array = np.array(labels_list)

# Convert numpy arrays to PyTorch tensors
data_tensor = torch.tensor(data_array, dtype=torch.float32)
labels_tensor = torch.tensor(labels_array, dtype=torch.long)

# Normalize the data to [0, 1] range
data_tensor = data_tensor / 255.0

# Reshape the data_tensor to (num_samples, channels, height, width)
data_tensor = data_tensor.view(-1, 3, 32, 32)

class CIFAR10Dataset(Dataset):
    def __init__(self, data_tensor, labels_tensor, transform=None):
        self.data = data_tensor
        self.labels = labels_tensor
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx]
        label = self.labels[idx]
        
        # Convert tensor to PIL Image
        sample = to_pil_image(sample)
        
        if self.transform:
            sample = self.transform(sample)

        return sample, label


# Define the recommended transformations for data augmentation
transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),  # CIFAR-10 normalization
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),  # CIFAR-10 normalization
])

test_file_path = 'data/cifar-10-batches-py/test_batch'
test_batch_data = unpickle(test_file_path)
test_data_array = np.array(test_batch_data[b'data'])
test_labels_array = np.array(test_batch_data[b'labels'])

# Convert to PyTorch tensors
test_data_tensor = torch.tensor(test_data_array, dtype=torch.float32)
test_labels_tensor = torch.tensor(test_labels_array, dtype=torch.long)

# Normalize and reshape
test_data_tensor = test_data_tensor / 255.0
test_data_tensor = test_data_tensor.view(-1, 3, 32, 32)
# Create the custom dataset
train_dataset = CIFAR10Dataset(data_tensor, labels_tensor, transform=transform)
test_dataset = CIFAR10Dataset(test_data_tensor, test_labels_tensor, transform=test_transform)

# Create the DataLoader
batch_size = 32
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [3]:
from torchvision.models import convnext_base  # Importing ConvNeXt Base model
import torch.optim as optim
import time
import torch.nn as nn

# Modify the ConvNeXt Base model to fit the CIFAR-10 dataset
model = convnext_base(weights='DEFAULT')  

model.features[0] = nn.Conv2d(3, 128, kernel_size=3, stride=1, padding=1, bias=False)
model.classifier[-1] = nn.Linear(model.classifier[-1].in_features, 10)  # CIFAR-10 has 10 classes

# Define Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.00009, weight_decay=0.015)

# Check for GPU availability and move the model to GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training loop
num_epochs = 3
for epoch in range(num_epochs):
    start_time = time.time()
    model.train()
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_dataloader):  # Assuming train_dataloader is defined
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        if (i + 1) % 50 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / (i+1):.4f}')

Epoch [1/3], Loss: 2.1040
Epoch [1/3], Loss: 1.9875
Epoch [1/3], Loss: 1.8964
Epoch [1/3], Loss: 1.8115
Epoch [1/3], Loss: 1.7343
Epoch [1/3], Loss: 1.6545
Epoch [1/3], Loss: 1.5915
Epoch [1/3], Loss: 1.5257
Epoch [1/3], Loss: 1.4609
Epoch [1/3], Loss: 1.4075
Epoch [1/3], Loss: 1.3561
Epoch [1/3], Loss: 1.3046
Epoch [1/3], Loss: 1.2629
Epoch [1/3], Loss: 1.2230
Epoch [1/3], Loss: 1.1848
Epoch [1/3], Loss: 1.1499
Epoch [1/3], Loss: 1.1177
Epoch [1/3], Loss: 1.0885
Epoch [1/3], Loss: 1.0614
Epoch [1/3], Loss: 1.0354
Epoch [1/3], Loss: 1.0124
Epoch [1/3], Loss: 0.9884
Epoch [1/3], Loss: 0.9663
Epoch [1/3], Loss: 0.9450
Epoch [1/3], Loss: 0.9256
Epoch [1/3], Loss: 0.9076
Epoch [1/3], Loss: 0.8906
Epoch [1/3], Loss: 0.8733
Epoch [1/3], Loss: 0.8576
Epoch [1/3], Loss: 0.8418
Epoch [1/3], Loss: 0.8274
Epoch [2/3], Loss: 0.3825
Epoch [2/3], Loss: 0.4006
Epoch [2/3], Loss: 0.3755
Epoch [2/3], Loss: 0.3619
Epoch [2/3], Loss: 0.3542
Epoch [2/3], Loss: 0.3486
Epoch [2/3], Loss: 0.3438
Epoch [2/3],

In [4]:
from sklearn.metrics import classification_report, accuracy_score

# Evaluation on test data with classification report
model.eval()
y_true = []
y_pred = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        
        # Append batch predictions and true labels
        y_true += labels.cpu().numpy().tolist()
        y_pred += predicted.cpu().numpy().tolist()

# Compute and print the classification report
print("Classification Report:")
print(classification_report(y_true, y_pred, target_names=['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']))
# Compute and print the overall accuracy
overall_accuracy = accuracy_score(y_true, y_pred)
print(f'Overall Accuracy: {overall_accuracy * 100:.2f}%')

Classification Report:
              precision    recall  f1-score   support

    airplane       0.98      0.96      0.97      1000
  automobile       0.96      0.98      0.97      1000
        bird       0.97      0.95      0.96      1000
         cat       0.91      0.91      0.91      1000
        deer       0.96      0.97      0.96      1000
         dog       0.91      0.93      0.92      1000
        frog       0.97      0.98      0.98      1000
       horse       0.99      0.96      0.98      1000
        ship       0.98      0.98      0.98      1000
       truck       0.97      0.96      0.97      1000

    accuracy                           0.96     10000
   macro avg       0.96      0.96      0.96     10000
weighted avg       0.96      0.96      0.96     10000

Overall Accuracy: 96.04%


In [6]:
torch.save(model.state_dict(), 'cifar10_convnext.pth')
print("Model has been saved to cifar10_convnext.pth")

Model has been saved to cifar10_convnext.pth
