In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from tqdm import tqdm

# Define the LeNet-5 architecture
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, kernel_size=5)  # Input channels: 3 (RGB), Output channels: 6, Kernel size: 5x5
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)        # Fully connected layer 1
        self.fc2 = nn.Linear(120, 84)                # Fully connected layer 2
        self.fc3 = nn.Linear(84, 10)                 # Fully connected layer 3 (output)

    def forward(self, x):
        x = nn.functional.relu(self.conv1(x))
        x = nn.functional.max_pool2d(x, 2)
        x = nn.functional.relu(self.conv2(x))
        x = nn.functional.max_pool2d(x, 2)
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        x = self.fc3(x)
        return x




In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from tqdm import tqdm

# Define a basic residual block
class BasicBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.downsample = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        residual = self.downsample(x)
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += residual
        out = self.relu(out)
        return out

# Define the ResNet architecture
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, num_classes)

    def _make_layer(self, block, out_channels, num_blocks, stride):
        layers = []
        layers.append(block(self.in_channels, out_channels, stride))
        self.in_channels = out_channels
        for _ in range(1, num_blocks):
            layers.append(block(out_channels, out_channels, stride=1))
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avg_pool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

# Define transformations and download CIFAR-10 dataset
transform = transforms.Compose([
    # transforms.RandomCrop(32, padding=4),
    # transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)


# for image,label in trainloader:
#   print(type(image))

# Initialize the ResNet model and move it to CUDA if available
net = ResNet(BasicBlock, [2, 2, 2, 2])  # ResNet-18-like architecture
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001, momentum=0.9)

# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    net.train()
    running_loss = 0.0
    for i, data in enumerate(tqdm(trainloader, position=0, leave=True), 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 200 == 199:
            print(f"[{epoch + 1}, {i + 1}] Loss: {running_loss / 200:.3f}")
            running_loss = 0.0

print("Finished Training")

# Testing the model
net.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in tqdm(testloader, position=0, leave=True):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = net(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Accuracy on the test set: {accuracy:.2f}%")



Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 49132145.85it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


 26%|██▌       | 203/782 [00:18<00:29, 19.46it/s]

[1, 200] Loss: 1.741


 51%|█████▏    | 402/782 [00:29<00:21, 17.45it/s]

[1, 400] Loss: 1.370


 77%|███████▋  | 602/782 [00:39<00:10, 17.94it/s]

[1, 600] Loss: 1.222


100%|██████████| 782/782 [00:49<00:00, 15.69it/s]
 26%|██▌       | 203/782 [00:11<00:31, 18.64it/s]

[2, 200] Loss: 0.931


 52%|█████▏    | 403/782 [00:22<00:20, 18.76it/s]

[2, 400] Loss: 0.876


 77%|███████▋  | 603/782 [00:33<00:09, 18.41it/s]

[2, 600] Loss: 0.854


100%|██████████| 782/782 [00:43<00:00, 18.18it/s]
 26%|██▌       | 202/782 [00:11<00:32, 17.82it/s]

[3, 200] Loss: 0.632


 51%|█████▏    | 402/782 [00:22<00:21, 18.06it/s]

[3, 400] Loss: 0.625


 77%|███████▋  | 602/782 [00:34<00:10, 17.85it/s]

[3, 600] Loss: 0.615


100%|██████████| 782/782 [00:44<00:00, 17.62it/s]
 26%|██▌       | 203/782 [00:11<00:32, 18.08it/s]

[4, 200] Loss: 0.419


 52%|█████▏    | 403/782 [00:22<00:21, 17.96it/s]

[4, 400] Loss: 0.420


 77%|███████▋  | 603/782 [00:34<00:09, 17.97it/s]

[4, 600] Loss: 0.434


100%|██████████| 782/782 [00:44<00:00, 17.64it/s]
 26%|██▌       | 203/782 [00:11<00:31, 18.16it/s]

[5, 200] Loss: 0.251


 52%|█████▏    | 403/782 [00:23<00:21, 17.86it/s]

[5, 400] Loss: 0.264


 77%|███████▋  | 603/782 [00:34<00:09, 18.34it/s]

[5, 600] Loss: 0.294


100%|██████████| 782/782 [00:44<00:00, 17.51it/s]


Finished Training


100%|██████████| 157/157 [00:03<00:00, 41.89it/s]

Accuracy on the test set: 76.28%





In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from tqdm import tqdm


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = nn.ReLU()(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = nn.ReLU()(out)
        return out

class ResNet34(nn.Module):
    def __init__(self, num_classes=10):
        super(ResNet34, self).__init__()
        self.in_planes = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(BasicBlock, 64, 3, stride=1)
        self.layer2 = self._make_layer(BasicBlock, 128, 4, stride=2)
        self.layer3 = self._make_layer(BasicBlock, 256, 6, stride=2)
        self.layer4 = self._make_layer(BasicBlock, 512, 3, stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(512, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = nn.ReLU()(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avgpool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

transform = transforms.Compose([
    # transforms.RandomCrop(32, padding=4),
    # transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)


# for image,label in trainloader:
#   print(type(image))

# Initialize the ResNet model and move it to CUDA if available
net = ResNet34(num_classes=10)  # ResNet-18-like architecture
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    net.train()
    running_loss = 0.0
    for i, data in enumerate(tqdm(trainloader, position=0, leave=True), 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 200 == 199:
            print(f"[{epoch + 1}, {i + 1}] Loss: {running_loss / 200:.3f}")
            running_loss = 0.0

print("Finished Training")

# Testing the model
net.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in tqdm(testloader, position=0, leave=True):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = net(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Accuracy on the test set: {accuracy:.2f}%")



Files already downloaded and verified
Files already downloaded and verified


 26%|██▌       | 201/782 [00:26<00:54, 10.61it/s]

[1, 200] Loss: 1.779


 51%|█████▏    | 401/782 [00:45<00:35, 10.65it/s]

[1, 400] Loss: 1.437


 77%|███████▋  | 601/782 [01:04<00:17, 10.44it/s]

[1, 600] Loss: 1.251


100%|██████████| 782/782 [01:21<00:00,  9.56it/s]
 26%|██▌       | 201/782 [00:19<00:56, 10.31it/s]

[2, 200] Loss: 0.956


 51%|█████▏    | 402/782 [00:38<00:36, 10.55it/s]

[2, 400] Loss: 0.928


 77%|███████▋  | 602/782 [00:57<00:17, 10.40it/s]

[2, 600] Loss: 0.858


100%|██████████| 782/782 [01:15<00:00, 10.41it/s]
 26%|██▌       | 201/782 [00:19<00:59,  9.85it/s]

[3, 200] Loss: 0.629


 51%|█████▏    | 401/782 [00:38<00:36, 10.48it/s]

[3, 400] Loss: 0.631


 77%|███████▋  | 601/782 [00:57<00:17, 10.16it/s]

[3, 600] Loss: 0.631


100%|██████████| 782/782 [01:14<00:00, 10.44it/s]
 26%|██▌       | 201/782 [00:19<00:57, 10.11it/s]

[4, 200] Loss: 0.406


 51%|█████▏    | 402/782 [00:38<00:35, 10.57it/s]

[4, 400] Loss: 0.433


 77%|███████▋  | 602/782 [00:57<00:17, 10.47it/s]

[4, 600] Loss: 0.454


100%|██████████| 782/782 [01:15<00:00, 10.41it/s]
 26%|██▌       | 201/782 [00:19<00:55, 10.49it/s]

[5, 200] Loss: 0.261


 51%|█████▏    | 401/782 [00:38<00:36, 10.57it/s]

[5, 400] Loss: 0.291


 77%|███████▋  | 602/782 [00:57<00:16, 10.61it/s]

[5, 600] Loss: 0.314


100%|██████████| 782/782 [01:14<00:00, 10.43it/s]


Finished Training


100%|██████████| 157/157 [00:05<00:00, 28.09it/s]

Accuracy on the test set: 74.51%





In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from tqdm import tqdm

# Define a basic residual block
class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion * planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion * planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = nn.ReLU()(self.bn1(self.conv1(x)))
        out = nn.ReLU()(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = nn.ReLU()(out)
        return out

# Define the ResNet architecture for CIFAR-10
class ResNet50(nn.Module):
    def __init__(self, num_classes=10):
        super(ResNet50, self).__init__()
        self.in_planes = 64

        # Initial convolutional layer
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)

        # Residual blocks
        self.layer1 = self._make_layer(Bottleneck, 64, 3, stride=1)
        self.layer2 = self._make_layer(Bottleneck, 128, 4, stride=2)
        self.layer3 = self._make_layer(Bottleneck, 256, 6, stride=2)
        self.layer4 = self._make_layer(Bottleneck, 512, 3, stride=2)

        # Average pooling and fully connected layer
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(512 * Bottleneck.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = nn.ReLU()(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avgpool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out



# Define transformations and download CIFAR-10 dataset
transform = transforms.Compose([
    # transforms.RandomCrop(32, padding=4),
    # transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)


# for image,label in trainloader:
#   print(type(image))

# Initialize the ResNet model and move it to CUDA if available
net = ResNet50(num_classes=10)  # ResNet-18-like architecture
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    net.train()
    running_loss = 0.0
    for i, data in enumerate(tqdm(trainloader, position=0, leave=True), 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 200 == 199:
            print(f"[{epoch + 1}, {i + 1}] Loss: {running_loss / 200:.3f}")
            running_loss = 0.0

print("Finished Training")

# Testing the model
net.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in tqdm(testloader, position=0, leave=True):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = net(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Accuracy on the test set: {accuracy:.2f}%")



Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:01<00:00, 98843701.99it/s] 


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


 26%|██▌       | 200/782 [00:48<01:57,  4.96it/s]

[1, 200] Loss: 2.069


 51%|█████     | 400/782 [01:29<01:19,  4.83it/s]

[1, 400] Loss: 1.702


 77%|███████▋  | 600/782 [02:10<00:38,  4.76it/s]

[1, 600] Loss: 1.546


100%|██████████| 782/782 [02:49<00:00,  4.62it/s]
 26%|██▌       | 200/782 [00:43<02:05,  4.64it/s]

[2, 200] Loss: 1.340


 51%|█████     | 400/782 [01:26<01:25,  4.45it/s]

[2, 400] Loss: 1.275


 77%|███████▋  | 600/782 [02:10<00:39,  4.60it/s]

[2, 600] Loss: 1.198


100%|██████████| 782/782 [02:50<00:00,  4.59it/s]
 26%|██▌       | 200/782 [00:43<02:06,  4.59it/s]

[3, 200] Loss: 0.989


 51%|█████     | 400/782 [01:27<01:23,  4.57it/s]

[3, 400] Loss: 1.005


 77%|███████▋  | 600/782 [02:11<00:39,  4.56it/s]

[3, 600] Loss: 0.958


100%|██████████| 782/782 [02:51<00:00,  4.56it/s]
 26%|██▌       | 200/782 [00:43<02:06,  4.60it/s]

[4, 200] Loss: 0.752


 51%|█████     | 400/782 [01:27<01:23,  4.60it/s]

[4, 400] Loss: 0.783


 77%|███████▋  | 600/782 [02:11<00:39,  4.60it/s]

[4, 600] Loss: 0.786


100%|██████████| 782/782 [02:51<00:00,  4.57it/s]
 26%|██▌       | 200/782 [00:44<02:07,  4.57it/s]

[5, 200] Loss: 0.555


 51%|█████     | 400/782 [01:27<01:23,  4.57it/s]

[5, 400] Loss: 0.591


 77%|███████▋  | 600/782 [02:11<00:39,  4.60it/s]

[5, 600] Loss: 0.596


100%|██████████| 782/782 [02:51<00:00,  4.56it/s]


Finished Training


100%|██████████| 157/157 [00:10<00:00, 15.55it/s]

Accuracy on the test set: 72.66%





In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from tqdm import tqdm

# Define a basic residual block
class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion * planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion * planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = nn.ReLU()(self.bn1(self.conv1(x)))
        out = nn.ReLU()(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = nn.ReLU()(out)
        return out

# Define the ResNet architecture for CIFAR-10
class ResNet101(nn.Module):
    def __init__(self, num_classes=10):
        super(ResNet101, self).__init__()
        self.in_planes = 64

        # Initial convolutional layer
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)

        # Residual blocks
        self.layer1 = self._make_layer(Bottleneck, 64, 3, stride=1)
        self.layer2 = self._make_layer(Bottleneck, 128, 4, stride=2)
        self.layer3 = self._make_layer(Bottleneck, 256, 23, stride=2)
        self.layer4 = self._make_layer(Bottleneck, 512, 3, stride=2)

        # Average pooling and fully connected layer
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(512 * Bottleneck.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = nn.ReLU()(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avgpool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out


# Define transformations and download CIFAR-10 dataset
transform = transforms.Compose([
    # transforms.RandomCrop(32, padding=4),
    # transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)


# for image,label in trainloader:
#   print(type(image))

# Initialize the ResNet model and move it to CUDA if available
net = ResNet101(num_classes=10)  # ResNet-18-like architecture
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    net.train()
    running_loss = 0.0
    for i, data in enumerate(tqdm(trainloader, position=0, leave=True), 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 200 == 199:
            print(f"[{epoch + 1}, {i + 1}] Loss: {running_loss / 200:.3f}")
            running_loss = 0.0

print("Finished Training")

# Testing the model
net.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in tqdm(testloader, position=0, leave=True):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = net(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Accuracy on the test set: {accuracy:.2f}%")



Files already downloaded and verified
Files already downloaded and verified


 26%|██▌       | 200/782 [01:07<03:19,  2.92it/s]

[1, 200] Loss: 2.197


 51%|█████     | 400/782 [02:17<02:15,  2.83it/s]

[1, 400] Loss: 1.777


 77%|███████▋  | 600/782 [03:29<01:05,  2.79it/s]

[1, 600] Loss: 1.604


100%|██████████| 782/782 [04:34<00:00,  2.84it/s]
 26%|██▌       | 200/782 [01:12<03:29,  2.78it/s]

[2, 200] Loss: 1.430


 51%|█████     | 400/782 [02:24<02:18,  2.76it/s]

[2, 400] Loss: 1.344


 77%|███████▋  | 600/782 [03:36<01:06,  2.74it/s]

[2, 600] Loss: 1.289


100%|██████████| 782/782 [04:42<00:00,  2.77it/s]
 26%|██▌       | 200/782 [01:12<03:28,  2.79it/s]

[3, 200] Loss: 1.116


 51%|█████     | 400/782 [02:24<02:17,  2.78it/s]

[3, 400] Loss: 1.089


 77%|███████▋  | 600/782 [03:36<01:06,  2.75it/s]

[3, 600] Loss: 1.058


100%|██████████| 782/782 [04:41<00:00,  2.77it/s]
 26%|██▌       | 200/782 [01:12<03:31,  2.76it/s]

[4, 200] Loss: 0.909


 51%|█████     | 400/782 [02:24<02:17,  2.78it/s]

[4, 400] Loss: 0.897


 77%|███████▋  | 600/782 [03:36<01:05,  2.79it/s]

[4, 600] Loss: 0.878


100%|██████████| 782/782 [04:42<00:00,  2.77it/s]
 26%|██▌       | 200/782 [01:12<03:33,  2.72it/s]

[5, 200] Loss: 0.728


 51%|█████     | 400/782 [02:24<02:17,  2.78it/s]

[5, 400] Loss: 0.744


 77%|███████▋  | 600/782 [03:36<01:05,  2.78it/s]

[5, 600] Loss: 0.754


100%|██████████| 782/782 [04:42<00:00,  2.77it/s]


Finished Training


100%|██████████| 157/157 [00:16<00:00,  9.34it/s]

Accuracy on the test set: 68.53%



