<a href="https://colab.research.google.com/github/mehdii190/neural-network/blob/main/src/resnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


num_epochs = 30
batch_size = 100
learning_rate = 0.001




transform = transforms.Compose([
    transforms.Pad(4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32),
    transforms.ToTensor()
])


train_dataset = torchvision.datasets.CIFAR10(root = "../../data/",
                                             train = True,
                                             transform = transform,
                                             download = True)


test_dataset = torchvision.datasets.CIFAR10(root = "../../data/",
                                             train = False,
                                             transform = transforms.ToTensor())


train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                           batch_size = batch_size,
                                           shuffle = True)

test_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                           batch_size = batch_size,
                                           shuffle = False)



Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../../data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:04<00:00, 41203692.63it/s]


Extracting ../../data/cifar-10-python.tar.gz to ../../data/


In [3]:
def conv3x3(in_channles, out_channles, stride = 1):
  return nn.Conv2d(in_channles, out_channles, kernel_size= 3 , stride= stride, padding= 1 ,bias = False)


class ResidualBlock(nn.Module):
  def __init__(self, in_channles, out_channles, stride = 1 , downsampling = None):
    super(ResidualBlock, self).__init__()
    self.conv1 = conv3x3(in_channles, out_channles, stride)
    self.bn1 = nn.BatchNorm2d(out_channles)
    self.relu = nn.ReLU(inplace= True)
    self.conv2 = conv3x3(in_channles, out_channles)
    self.bn2 = nn.BatchNorm2d(out_channles)
    self.downsampling = downsampling
  def forward(self,x):
    residual = x.clone()
    out = self.conv1(x)
    out = self.bn1(out)
    out = self.relu(out)
    out = self.conv2(out)
    out = self.bn2(out)
    if self.downsampling:
      residual = self.downsampling(x)
    out+= residual
    out = self.relu(out)
    return out

In [12]:
class ResNet(nn.Module):
  def __init__(self,block,layers,num_classes = 10):
    """
    layers will be a list = [2,2,2] 
    """
    super(ResNet,self).__init__()
    self.in_channles = 16
    self.conv = conv3x3(3,16)
    self.bn = nn.BatchNorm2d(16)
    self.relu = nn.ReLU(inplace=True)
    self.layer1 = self.make_layer(block,16,layers[0],stride = 1)
    self.layer2 = self.make_layer(block,32,layers[1],stride = 2)
    self.layer3 = self.make_layer(block,64,layers[2],stride = 2)
    self.avg_pool = nn.AvgPool2d(8)
    self.fc = nn.Linear(64,num_classes)

  def make_layer(self, block, out_channles , blocks, stride = 1 ):
    downsampling = None
    if self.in_channles != out_channles or stride != 1 :
      downsampling = nn.Sequential(conv3x3(self.in_channles,out_channles, stride = stride),
                                   nn.BatchNorm2d(out_channles))
    layers = []
    layers.append(block(self.in_channles,out_channles, stride, downsampling ))
    self.in_channles = out_channles
    for i in range(1,blocks):
      layers.append(block(out_channles,out_channles))
    return nn.Sequential(*layers)



  def forward(self,x):
    out = self.conv(x)
    out = self.bn(out)
    out = self.relu(out)
    out = self.layer1(out)
    out = self.layer2(out)
    out = self.layer3(out)
    out = self.avg_pool(out)
    out = out.view(out.size(0),-1)
    out = self.fc(out)
    return out



In [21]:
model = ResNet(ResidualBlock,[2,2,2]).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)


In [23]:
decay = 0
model.train()
for epoch in range(num_epochs):

  if epoch + 1 % 20==0:
    decay+=1
    optimizer.param_groups[0]["lr"] = learning_rate * (0.5**decay)
    print("the new learning rate is {}".format(optimizer.param_groups[0]["lr"]))

  for i , (images,labels) in enumerate(train_loader):
    images = images.to(device)
    labels = labels.to(device)
    outputs = model(images)
    loss = criterion(outputs,labels)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if i+1 % 100 == 0:
      print("epoch [{}/{}] , step [{}/{}] , loss {:.4f}"
      .format(epoch+1,num_epochs,i+1,len(train_loader),loss.item()))


RuntimeError: ignored