In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.20.1-py3-none-any.whl (4.4 MB)
[K     |████████████████████████████████| 4.4 MB 31.7 MB/s 
[?25hCollecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 47.1 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 48.6 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.8.1-py3-none-any.whl (101 kB)
[K     |████████████████████████████████| 101 kB 10.0 MB/s 
Installing collected packages: pyyaml, tokenizers, huggingface-hub, transformers
  Attempting uninstall: pyyaml
    Found existing installation: PyYAML 3.13
    Uninsta

In [None]:
# Imports
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torchvision import models
from torch.utils.data.sampler import SubsetRandomSampler
from numpy.lib.function_base import append
import numpy as np
from transformers import ConvNextFeatureExtractor, ConvNextForImageClassification
import torch.optim as optim
import time

In [None]:
# Use CUDA/GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [None]:
#Seed Randomizers
random_seed = 42
np.random.seed(random_seed)
torch_rng = torch.manual_seed(random_seed)

#Classes
classes = ('Airplane', 'Car', 'Bird', 'Cat', 'Deer', 'Dog', 'Frog', 'Horse', 'Ship', 'Truck')

#Randomly Splitting Train set into Training and Validation
train_data = torchvision.datasets.CIFAR10(root='./data', train=True, download=True)
val_data = torchvision.datasets.CIFAR10(root='./data', train=True, download=True)

indices = list(range(len(train_data)))
np.random.shuffle(indices)
train_indices = indices[:45000]
val_indices = indices[45000:]
train_sample = SubsetRandomSampler(train_indices)
val_sample = SubsetRandomSampler(val_indices)

# #Get mean and std from just the training set
train_sample_data = []
for index, item in enumerate(train_sample):
    train_sample_data.append(train_data[item])

img_arr = np.concatenate([np.asarray(train_sample_data[i][0]) for i in range(len(train_sample_data))])

train_mean = np.mean(img_arr, axis=(0, 1))/255
train_std = np.std(img_arr, axis=(0, 1))/255
print(train_mean, train_std)

#Set Train and Test/Validation Image Transformers For Data Augmentation
train_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomChoice(transforms=[
        transforms.RandomRotation(degrees=45),
        transforms.GaussianBlur(kernel_size=3),
        transforms.RandomHorizontalFlip(p=1),
        transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),#from ConvNeXt
        transforms.RandomVerticalFlip(p=0)
    ], 
        p=[0.05, 0.05, 0.05, 0.05, 0.8]),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=train_mean, std=train_std),
])
test_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=train_mean, std=train_std)
])

#Reload/Transfor/DataLoader
train_data = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transform)
val_data = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=test_transform)

batch_size = 8
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, sampler=train_sample, num_workers=2)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=1, sampler=val_sample, num_workers=2)

test_data = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=test_transform)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False, num_workers=2)

# Load Feature Extractor ConvNext
feature_extractor = ConvNextFeatureExtractor.from_pretrained("facebook/convnext-tiny-224")
model = ConvNextForImageClassification.from_pretrained("facebook/convnext-tiny-224")

# Locking Weights an Biases
# for param in model.parameters():
#     param.requires_grad = False

# Deleting the last layer and adding a new layer
new_classifier = nn.Sequential(*list(model.classifier.children())[:-1])
model.classifier = new_classifier
model.classifier = nn.Linear(768,10)

# Converting the model to GPU
model.to(device)
print(device)

# Optimization Settings
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
[0.49147618 0.48220086 0.44667191] [0.24713163 0.24367339 0.26168631]
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


Downloading:   0%|          | 0.00/266 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/68.0k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/109M [00:00<?, ?B/s]

cuda:0


In [None]:
model.eval()

ConvNextForImageClassification(
  (convnext): ConvNextModel(
    (embeddings): ConvNextEmbeddings(
      (patch_embeddings): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
      (layernorm): ConvNextLayerNorm()
    )
    (encoder): ConvNextEncoder(
      (stages): ModuleList(
        (0): ConvNextStage(
          (downsampling_layer): Identity()
          (layers): Sequential(
            (0): ConvNextLayer(
              (dwconv): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
              (layernorm): ConvNextLayerNorm()
              (pwconv1): Linear(in_features=96, out_features=384, bias=True)
              (act): GELUActivation()
              (pwconv2): Linear(in_features=384, out_features=96, bias=True)
              (drop_path): Identity()
            )
            (1): ConvNextLayer(
              (dwconv): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
              (layernorm): ConvNextLayerNorm()
              

In [None]:
trainloader = torch.utils.data.DataLoader(train_data, batch_size=4, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False, num_workers=2)
dataiter = iter(trainloader)
images, labels = dataiter.next()

In [None]:
for epoch in range(10):  # loop over the dataset multiple times

    running_loss = 0.0
    start_time = time.time()
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].to(device), data[1].to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        output = model(inputs).logits
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()

        #Time
        end_time = time.time()
        time_taken = end_time - start_time

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
            print('Time:',time_taken)
            running_loss = 0.0

print('Finished Training of ConvNeXt')

[1,  2000] loss: 0.516
Time: 138.62709283828735
[1,  4000] loss: 0.253
Time: 270.75112295150757
[1,  6000] loss: 0.186
Time: 402.91266679763794
[1,  8000] loss: 0.158
Time: 535.0360262393951
[1, 10000] loss: 0.147
Time: 667.0143790245056
[1, 12000] loss: 0.158
Time: 798.7789626121521
[2,  2000] loss: 0.099
Time: 132.17781734466553
[2,  4000] loss: 0.088
Time: 264.0385434627533
[2,  6000] loss: 0.092
Time: 396.3565695285797
[2,  8000] loss: 0.097
Time: 529.0886716842651
[2, 10000] loss: 0.076
Time: 661.8425807952881
[2, 12000] loss: 0.087
Time: 794.4775125980377
[3,  2000] loss: 0.042
Time: 132.7688548564911
[3,  4000] loss: 0.048
Time: 265.61485147476196
[3,  6000] loss: 0.051
Time: 398.3078861236572
[3,  8000] loss: 0.056
Time: 530.8547399044037
[3, 10000] loss: 0.044
Time: 663.4202258586884
[3, 12000] loss: 0.055
Time: 796.051361322403
[4,  2000] loss: 0.027
Time: 132.7461395263672
[4,  4000] loss: 0.029
Time: 265.34492683410645
[4,  6000] loss: 0.033
Time: 397.9591727256775
[4,  800

In [None]:
#Testing Accuracy
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = model(images).logits
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %.2f %%' % (100 * correct / total))

Accuracy of the network on the 10000 test images: 97.38 %


In [None]:
# prepare to count predictions for each class
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

# again no gradients needed
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = model(images).logits
        _, predictions = torch.max(outputs, 1)
        # collect the correct predictions for each class
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
            total_pred[classes[label]] += 1


# print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')

Accuracy for class: Airplane is 98.6 %
Accuracy for class: Car   is 97.2 %
Accuracy for class: Bird  is 96.8 %
Accuracy for class: Cat   is 96.9 %
Accuracy for class: Deer  is 97.3 %
Accuracy for class: Dog   is 92.7 %
Accuracy for class: Frog  is 99.4 %
Accuracy for class: Horse is 98.4 %
Accuracy for class: Ship  is 98.3 %
Accuracy for class: Truck is 98.2 %


In [None]:
torch.save(model.state_dict(), "Model_ConvNeXt_97_State_Dict")

In [None]:
torch.save(model, "Model_ConvNeXt_97")