In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.transforms import v2
from torchvision.datasets import CIFAR100
from torch.utils.data import DataLoader
from tqdm import tqdm

train_transforms=v2.Compose([
    v2.ToImage(),
    v2.ToDtype(torch.float32,scale=True)
])
test_transforms=v2.Compose([
    v2.ToImage(),
    v2.ToDtype(torch.float32,scale=True)
])
trainSet=CIFAR100('torCifar',train=True,transform=train_transforms)
testSet=CIFAR100('torCifar',train=False,transform=test_transforms)

In [2]:
len(trainSet),len(testSet)

(50000, 10000)

In [3]:
img,label=trainSet[0]
img.shape,label

(torch.Size([3, 32, 32]), 19)

In [4]:
with open('torCifar/cifar-100-python/meta','rb') as f:
    import pickle
    a=pickle.load(f)
a['fine_label_names'][:5]

['apple', 'aquarium_fish', 'baby', 'bear', 'beaver']

In [5]:
train_dataloader=DataLoader(trainSet, batch_size=100,shuffle=True,num_workers=4)
test_dataloader=DataLoader(testSet, batch_size=100,num_workers=4)

In [6]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(   # takes in a 3x32x32
            nn.Conv2d(
                in_channels=3,              
                out_channels=16,            
                kernel_size=5,              
                stride=1,                   
                padding=2,                  
            ),                              
            nn.ReLU(),                      
            nn.MaxPool2d(kernel_size=2),    
        ) # outputs a 16x16x16
        self.conv2 = nn.Sequential(  # takes in a 16x16x16
            nn.Conv2d(16, 32, 5, 1, 2),     
            nn.ReLU(),                      
            nn.MaxPool2d(2),                
        ) # outputs a 32x8x8
        self.do=nn.Dropout(.5)
        self.out = nn.Linear(32 * 8 * 8, 100)
    def forward(self, x):
        x = self.conv1(x) # 16x14x14
        x = self.conv2(x) # 32x8x8
        x = x.view(x.size(0), -1) # flatten this into a single vector of size 32*7*7=1568
        x = self.do(x)
        output = self.out(x) # combine into 10 output logits
        return output

In [7]:
torch.randn(3,5).softmax(dim=1)

tensor([[0.2583, 0.0222, 0.3310, 0.3569, 0.0316],
        [0.0818, 0.2010, 0.0261, 0.6551, 0.0361],
        [0.0959, 0.0171, 0.0678, 0.7742, 0.0450]])

In [8]:
from sklearn.preprocessing import LabelBinarizer
lb=LabelBinarizer()
lb.fit(list(range(100)))

In [9]:
model=CNN().to('cuda')

In [10]:
EPOCHS=200

criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(),lr=.001)
for epoch in tqdm(range(EPOCHS)):
    totalloss=0
    for batch, (X,y) in enumerate(train_dataloader):
        X=X.to('cuda')
        y=torch.Tensor(lb.transform(y)).to('cuda')
        #y=torch.Tensor(y).to('cuda')

        predictions=model(X)
        loss=criterion(predictions,y)
    
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        totalloss+=loss.item()

    if epoch%20==0:
        totalloss/=len(train_dataloader)
        print('Loss/train',totalloss,epoch)
        test_loss=0
        
        with torch.no_grad():
            for X,y in test_dataloader:
                X=X.to('cuda')
                y=y.to('cuda')
                pred=model(X)
                test_loss+=criterion(pred,y).item()
            test_loss/=len(test_dataloader)
        print('Loss/test',test_loss,epoch)

  0%|                                                   | 0/200 [00:00<?, ?it/s]

Loss/train 3.7927056279182434 0


  0%|▏                                          | 1/200 [00:02<08:04,  2.43s/it]

Loss/test 3.4046794533729554 0


 10%|████▏                                     | 20/200 [00:37<05:36,  1.87s/it]

Loss/train 2.035033353328705 20


 10%|████▍                                     | 21/200 [00:40<06:03,  2.03s/it]

Loss/test 2.7906190085411073 20


 20%|████████▍                                 | 40/200 [01:15<04:59,  1.87s/it]

Loss/train 1.7984405541419983 40


 20%|████████▌                                 | 41/200 [01:17<05:18,  2.00s/it]

Loss/test 2.9202412033081053 40


 30%|████████████▌                             | 60/200 [01:51<04:03,  1.74s/it]

Loss/train 1.7021466665267944 60


 30%|████████████▊                             | 61/200 [01:53<04:19,  1.87s/it]

Loss/test 3.011856987476349 60


 40%|████████████████▊                         | 80/200 [02:26<03:23,  1.69s/it]

Loss/train 1.6263661983013153 80


 40%|█████████████████                         | 81/200 [02:28<03:40,  1.86s/it]

Loss/test 3.080763099193573 80


 50%|████████████████████▌                    | 100/200 [03:00<02:53,  1.73s/it]

Loss/train 1.5497346887588501 100


 50%|████████████████████▋                    | 101/200 [03:02<03:03,  1.85s/it]

Loss/test 3.1135146164894105 100


 60%|████████████████████████▌                | 120/200 [03:37<02:26,  1.83s/it]

Loss/train 1.5098372254371644 120


 60%|████████████████████████▊                | 121/200 [03:40<02:35,  1.97s/it]

Loss/test 3.236201276779175 120


 70%|████████████████████████████▋            | 140/200 [04:15<01:51,  1.86s/it]

Loss/train 1.4783199009895325 140


 70%|████████████████████████████▉            | 141/200 [04:18<02:00,  2.04s/it]

Loss/test 3.306302285194397 140


 80%|████████████████████████████████▊        | 160/200 [04:53<01:15,  1.89s/it]

Loss/train 1.46180916929245 160


 80%|█████████████████████████████████        | 161/200 [04:56<01:21,  2.10s/it]

Loss/test 3.312635805606842 160


 90%|████████████████████████████████████▉    | 180/200 [05:32<00:37,  1.89s/it]

Loss/train 1.444932514667511 180


 90%|█████████████████████████████████████    | 181/200 [05:34<00:38,  2.05s/it]

Loss/test 3.445985727310181 180


100%|█████████████████████████████████████████| 200/200 [06:10<00:00,  1.85s/it]
