# Section 1
## Learning PyTorch for the first time

In this section, we learn how to use pretrained models for imagenet classification

In [2]:
import torch

In [3]:
torch.cuda.is_available()

False

In [4]:
from torchvision import models

In [5]:
dir(models)

['AlexNet',
 'DenseNet',
 'GoogLeNet',
 'GoogLeNetOutputs',
 'Inception3',
 'InceptionOutputs',
 'MNASNet',
 'MobileNetV2',
 'ResNet',
 'ShuffleNetV2',
 'SqueezeNet',
 'VGG',
 '_GoogLeNetOutputs',
 '_InceptionOutputs',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_utils',
 'alexnet',
 'densenet',
 'densenet121',
 'densenet161',
 'densenet169',
 'densenet201',
 'detection',
 'googlenet',
 'inception',
 'inception_v3',
 'mnasnet',
 'mnasnet0_5',
 'mnasnet0_75',
 'mnasnet1_0',
 'mnasnet1_3',
 'mobilenet',
 'mobilenet_v2',
 'quantization',
 'resnet',
 'resnet101',
 'resnet152',
 'resnet18',
 'resnet34',
 'resnet50',
 'resnext101_32x8d',
 'resnext50_32x4d',
 'segmentation',
 'shufflenet_v2_x0_5',
 'shufflenet_v2_x1_0',
 'shufflenet_v2_x1_5',
 'shufflenet_v2_x2_0',
 'shufflenetv2',
 'squeezenet',
 'squeezenet1_0',
 'squeezenet1_1',
 'utils',
 'vgg',
 'vgg11',
 'vgg11_bn',
 'vgg13',
 'vgg13_bn',
 'vgg16',
 'vg

In [6]:
alexnet = models.AlexNet()

In [7]:
alexnet

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [8]:
resnet = models.resnet101(pretrained=False)
resnet

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [9]:
resnet.load_state_dict(torch.load("/Users/ramanshsharma/pytorch_practice/resnet.pth"))

<All keys matched successfully>

In [10]:
from torchvision import transforms

# sort of like putting layers of transformations together
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(254),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [11]:
from PIL import Image
img = Image.open("dog.jpeg")
# img.show()

In [12]:
img_p = preprocess(img)
img_p.shape

torch.Size([3, 254, 254])

In [13]:
img_r = torch.unsqueeze(img_p, 0)
img_r.shape
# 1 dimension added at beginning to refer to the number of examples in the data

torch.Size([1, 3, 254, 254])

In [14]:
resnet.eval() # important for the model to predict properly
# eval basically tells the model that some layers such as dropout
# and batchnorm are not to be used while evaluating
out = resnet(img_r)
out.shape

torch.Size([1, 1000])

In [15]:
with open('data_labels.txt') as f:
    labels = [line.strip() for line in f.readlines()]
len(labels)

1000

In [16]:
values, idx = torch.max(out, 1)
idx = idx[0]
idx

tensor(208)

In [17]:
confidence = torch.nn.functional.softmax(out, dim=1)[0] * 100
whatami = labels[idx]
me = confidence[idx]
print(f"This is {whatami} with {torch.round(me)}% confidence.")

This is Labrador retriever with 65.0% confidence.


In [18]:
confidence[idx].item()

64.54376983642578

In [50]:
_, idx = torch.sort(out, descending=True)
idx.shape

torch.Size([1, 1000])

In [57]:
[f"{labels[i]}, {confidence[i]}%" for i in idx[0][:5]]

['Labrador retriever, 64.54376983642578%',
 'golden retriever, 25.38818359375%',
 'kuvasz, 2.5867414474487305%',
 'Great Pyrenees, 1.2494858503341675%',
 'clumber, clumber spaniel, 0.8665345907211304%']

In [75]:
torch.save(resnet.state_dict(), "/Users/ramanshsharma/pytorch_practice/resnet.pth")

# Section 2
## Learning how to use GAN
In this section we learn how to use Generative Adversarial Networks to produce real looking images.

In [19]:
# IT IS ASSUMED I HAVE NO IDEA OF THE CODE BELOW
from torch import nn
import torch

class ResNetBlock(nn.Module): # <1>
    def __init__(self, dim):
        super().__init__()
        self.conv_block = self.build_conv_block(dim)
        
    def build_conv_block(self, dim):
        conv_block = nn.Sequential(
            nn.ReflectionPad2d(1),
            
            nn.Conv2d(dim, dim, kernel_size=3, padding=0, bias=True),
            nn.InstanceNorm2d(dim),
            nn.ReLU(True),
            
            nn.ReflectionPad2d(1),
            
            nn.Conv2d(dim, dim, kernel_size=3, padding=0, bias=True),
            nn.InstanceNorm2d(dim)
        )
        
        return conv_block
    
    def forward(self, x):
        out = x + self.conv_block(x) # <2>
        
        return out
    

class ResNetGenerator(nn.Module):
    def __init__(self, input_nc=3, output_nc=3, ngf=64, n_blocks=9): # <3>
        assert n_blocks >= 0
        super().__init__()
        
        self.input_nc = input_nc
        self.output_nc = output_nc
        self.ngf = ngf
        
        model = [
            nn.ReflectionPad2d(3),
            nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0, bias=True),
            nn.InstanceNorm2d(ngf),
            nn.ReLU(True)
        ]
        
        n_downsampling = 2
        
        for i in range(n_downsampling):
            mult = 2 ** i
            model.extend([
                nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, 
                          stride=2, padding=1, bias=True),
                nn.InstanceNorm2d(ngf * mult * 2),
                nn.ReLU(True)
            ])
            
        mult = 2 * n_downsampling
        
        for i in range(n_blocks):
            model.append(ResNetBlock(ngf * mult))
        
        for i in range(n_downsampling):
            mult = 2 ** (n_downsampling - i)
            model.extend([
                nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), 
                                   kernel_size=3, stride=2,
                                   padding=1, output_padding=1,
                                   bias=True),
                nn.InstanceNorm2d(int(ngf * mult / 2)),
                nn.ReLU(True)
            ])
            
        model.append(nn.ReflectionPad2d(3))
        model.append(nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0))
        model.append(nn.Tanh())
        
        self.model = nn.Sequential(*model)
        
    def forward(self, input): # <3>
        
        return self.model(input)

In [20]:
net = ResNetGenerator()

In [21]:
net

ResNetGenerator(
  (model): Sequential(
    (0): ReflectionPad2d((3, 3, 3, 3))
    (1): Conv2d(3, 64, kernel_size=(7, 7), stride=(1, 1))
    (2): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (3): ReLU(inplace=True)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (5): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (8): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (9): ReLU(inplace=True)
    (10): ResNetBlock(
      (conv_block): Sequential(
        (0): ReflectionPad2d((1, 1, 1, 1))
        (1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1))
        (2): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
        (3): ReLU(inplace=True)
        (4): ReflectionPad2d((1, 1, 1, 1))
     

In [22]:
param_path = "/Users/ramanshsharma/pytorch_practice/horse2zebra_0.4.0.pth"
loaded_param = torch.load(param_path)
net.load_state_dict(loaded_param)

<All keys matched successfully>

In [23]:
net.eval()

ResNetGenerator(
  (model): Sequential(
    (0): ReflectionPad2d((3, 3, 3, 3))
    (1): Conv2d(3, 64, kernel_size=(7, 7), stride=(1, 1))
    (2): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (3): ReLU(inplace=True)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (5): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (8): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (9): ReLU(inplace=True)
    (10): ResNetBlock(
      (conv_block): Sequential(
        (0): ReflectionPad2d((1, 1, 1, 1))
        (1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1))
        (2): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
        (3): ReLU(inplace=True)
        (4): ReflectionPad2d((1, 1, 1, 1))
     

In [24]:
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.ToTensor()
])

In [25]:
img = Image.open('/Users/ramanshsharma/pytorch_practice/horse.jpg')
# img.show()
img_t = preprocess(img)
img_r = torch.unsqueeze(img_t, 0) # adding that extra dimension for
                                    # number of examples
img_r.shape

torch.Size([1, 3, 256, 314])

In [26]:
out = net(img_r)

In [27]:
out_t = (out.data.squeeze() + 1.0) / 2.0
out_img = transforms.ToPILImage()(out_t)
out_img.show()

# Section 3
## Learning natural language
In this section, we apply ideas of natural language by making an image captioning system.

This section could only have been done through terminal, but I am not about to download such a big repository for one execution on terminal.

# Section 4
## Torch Hub

In [28]:
from torch import hub
# hub does not require GitHub repos to be cloned
# yet allows to import models if the repo has a hubconf.py file

resnet18 = hub.load('pytorch/vision:master', # name of account, repo, branch
                    'resnet18', # name of entry point function
                    pretrained=True)

Downloading: "https://github.com/pytorch/vision/archive/master.zip" to /Users/ramanshsharma/.cache/torch/hub/master.zip
Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /Users/ramanshsharma/.cache/torch/hub/checkpoints/resnet18-5c106cde.pth


HBox(children=(FloatProgress(value=0.0, max=46827520.0), HTML(value='')))


