# Introduction to PyTorch

https://pytorch.org/tutorials/beginner/introyt.html

**Introduction** \|\| [Tensors](tensors_deeper_tutorial.html) \|\|
[Autograd](autogradyt_tutorial.html) \|\| [Building
Models](modelsyt_tutorial.html) \|\| [TensorBoard
Support](tensorboardyt_tutorial.html) \|\| [Training
Models](trainingyt.html) \|\| [Model Understanding](captumyt.html)

Introduction to PyTorch
=======================

Follow along with the video below or on
[youtube](https://www.youtube.com/watch?v=IC0_FRiX-sw).



In [None]:
# necessary imports

import os
import sys
import numpy as np
import cv2
import pandas as pd

import torch                     # for all things PyTorch
import torch.nn as nn            # for torch.nn.Module, the parent object for PyTorch models
import torch.nn.functional as F  # for the activation function

%matplotlib inline

import torch
import torchvision
import torchvision.transforms as transforms

import matplotlib.pyplot as plt

## Tensors

In [None]:
# tensor filled with zeros
# the default type is float32

z = torch.zeros(5, 3)

print(z)
print("datatype is " + str(z.dtype))

In [None]:
# tensor filled with ones

i = torch.ones((5, 3), dtype=torch.int16)

print(i)

In [None]:
# initialize learning weights randomly, often with a specific seed for the PRNG for reproducibility of results

torch.manual_seed(1729)
r1 = torch.rand(2, 2)
print('A random tensor:')
print(r1)

r2 = torch.rand(2, 2)
print('\nA different random tensor:')
print(r2) # new values

torch.manual_seed(1729)
r3 = torch.rand(2, 2)
print('\nShould match r1:')
print(r3) # repeats values of r1 because of re-seed

In [None]:
# arithmetic operations

ones = torch.ones(2, 3)
print(ones)

twos = torch.ones(2, 3) * 2 # every element is multiplied by 2
print(twos)

threes = ones + twos       # addition allowed because shapes are similar
print(threes)              # tensors are added element-wise
print(threes.shape)        # this has the same dimensions as input tensors

r1 = torch.rand(2, 3)
r2 = torch.rand(3, 2)

# uncomment this line to get a runtime error because of different shapes
# r3 = r1 + r2

In [None]:
# Here’s a small sample of the mathematical operations available

r = (torch.rand(2, 2) - 0.5) * 2 # values between -1 and 1
print('A random matrix, r:')
print(r)

# Common mathematical operations are supported:
print('\nAbsolute value of r:')
print(torch.abs(r))

# ...as are trigonometric functions:
print('\nInverse sine of r:')
print(torch.asin(r))

# ...and linear algebra operations like determinant and singular value decomposition
print('\nDeterminant of r:')
print(torch.det(r))
print('\nSingular value decomposition of r:')
print(torch.svd(r))

# ...and statistical and aggregate operations:
print('\nAverage and standard deviation of r:')
print(torch.std_mean(r))
print('\nMaximum value of r:')
print(torch.max(r))

## Express models in PyTorch

In [None]:
# LeNet-5 is one of the earliest convolutional neural nets, and one of the drivers of the explosion in Deep Learning. It was built to read small images of handwritten numbers (the MNIST dataset), and correctly classify which digit was represented in the image.

# Here’s the abridged version of how it works:
# Layer C1 is a convolutional layer, meaning that it scans the input image for features it learned during training. It outputs a map of where it saw each of its learned features in the image. This “activation map” is downsampled in layer S2.
# Layer C3 is another convolutional layer, this time scanning C1’s activation map for combinations of features. It also puts out an activation map describing the spatial locations of these feature combinations, which is downsampled in layer S4.

# Finally, the fully-connected layers at the end, F5, F6, and OUTPUT, are a classifier that takes the final activation map, and classifies it into one of ten bins representing the 10 digits.

class LeNet(nn.Module): # erbt von nn.Module

    def __init__(self):
        # initalisieren über Eltern-Klasse
        super(LeNet, self).__init__()

        # 1 input image channel (black & white), 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 5 * 5, 120)  # 5*5 from image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        
        return x

    def num_flat_features(self, x):
        # all dimensions except the batch dimension
        size = x.size()[1:]

        num_features = 1
        
        for s in size:
            num_features *= s
            
        return num_features

In [None]:
# Let’s instantiate this object and run a sample input through it.

# what does the object tell us about itself?
net = LeNet()
print(net)

# create 32 x 32 grayscale image with one batch as input
input = torch.rand(1, 1, 32, 32)
print('\n Image batch shape:')
print(input.shape)

# we don't call forward() directly
output = net(input)
print('\n Raw output:')
print(output)

# output has a batch dimension
print(output.shape)

## Datasets and Dataloaders

In [None]:
# necessary imports

import os
import sys
import numpy as np
import cv2
import pandas as pd

import torch                     # for all things PyTorch
import torch.nn as nn            # for torch.nn.Module, the parent object for PyTorch models
import torch.nn.functional as F  # for the activation function

%matplotlib inline

import torch
import torchvision
import torchvision.transforms as transforms

import matplotlib.pyplot as plt

In [None]:
# transform our incoming images into a PyTorch tensor

# transforms.ToTensor() converts images loaded by Pillow into PyTorch tensors
# transforms.Normalize() adjusts the values of the tensor so that their average is zero and their standard deviation is 1.0
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))])

In [None]:
# create an instance of the CIFAR10 dataset

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)

In [None]:
# Once your dataset is ready, you can give it to the DataLoader

trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2)

In [None]:
# It’s good practice to visualize the batches your DataLoader serves

import matplotlib.pyplot as plt
import numpy as np

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))


# get some random training images
dataiter = iter(trainloader)
images, labels = next(dataiter)

# show images
imshow(torchvision.utils.make_grid(images))
 
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(4)))

## Training a PyTorch Model

In [None]:
# Let’s put all the pieces together, and train a model

#%matplotlib inline

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

import matplotlib
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# First, we’ll need training and test datasets. If you haven’t already, run the cell below to make sure the dataset is downloaded. (It may take a minute.)

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [None]:
# We’ll run our check on the output from DataLoader

import matplotlib.pyplot as plt
import numpy as np

# functions to show an image


def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))


# get some random training images
dataiter = iter(trainloader)
images, labels = next(dataiter)

# show images
imshow(torchvision.utils.make_grid(images))

# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(4)))

In [None]:
# This is the model we’ll train. If it looks familiar, that’s because it’s a variant of LeNet - discussed earlier in this video - adapted for 3-color images.

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()

In [None]:
# The last ingredients we need are a loss function and an optimizer

# The loss function, as discussed earlier in this video, is a measure of how far from our ideal output the model’s prediction was.
criterion = nn.CrossEntropyLoss()

# The optimizer is what drives the learning. Here we have created an optimizer that implements stochastic gradient descent, one of the more straightforward optimization algorithms.
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [None]:
# Finally, all of this is assembled into the training loop. Go ahead and run this cell, as it will likely take a few minutes to execute

for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0

    # each epoch iterates over training data serving batches
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data

        # reset the parameter gradients for every round
        optimizer.zero_grad()

        # forward + backward + optimize:
        # get actual prediction
        outputs = net(inputs)

        # computing the loss
        loss = criterion(outputs, labels)

        # backprop. in order to calculate the gradients
        loss.backward()

        # nudging the learning weights in the direction it thinks will reduce the loss
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

In [None]:
# test the generality of the model, we ask it to make predictions on data it hasn’t trained on

correct = 0
total = 0

with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))

## Custom frames RAFT

In [None]:
# add RAFT to core path

sys.path.append('RAFT/core')

#### Helper functions

In [None]:
# helper functions

from collections import OrderedDict
from raft import RAFT
from utils import flow_viz
from utils.utils import InputPadder


# convert to torch and get correct dimensions
def process_img(img, device):
    return torch.from_numpy(img).permute(2, 0, 1).float()[None].to(device)


def load_model(weights_path, args):
    model = RAFT(args)
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("current device is")
    print(device)

    try:
        pretrained_weights = torch.load(weights_path, map_location=device)
    except Exception as e:
        raise RuntimeError(f"Fehler beim laden der Gewichte: {e}")

    print(torch.cuda.device_count())

    if torch.cuda.device_count() >= 1:
       model = torch.nn.DataParallel(model)
    
    try:
        model.load_state_dict(pretrained_weights)
    except Exception as e:
        raise RuntimeError(f"Fehler beim setzen der Gewichte: {e}")
    
    model.to(device)

    return model


# perform inference with every model
def inference(model, frame1, frame2, device, pad_mode='sintel', iters=12, flow_init=None, upsample=True, test_mode=True):

    model.eval()
    with torch.no_grad():
        # preprocess
        frame1 = process_img(frame1, device)
        frame2 = process_img(frame2, device)

        # important because raft requires every image to be divisible by 8
        padder = InputPadder(frame1.shape, mode=pad_mode)
        frame1, frame2 = padder.pad(frame1, frame2)

        # predict flow in two different modes
        if test_mode:
          # returns the initial flow (1/8 res) + upsampled flow (upsampled res)
          flow_low, flow_up = model(frame1,
                                    frame2,
                                    iters=iters,
                                    flow_init=flow_init,
                                    upsample=upsample,
                                    test_mode=test_mode)

          return flow_low, flow_up

        else:
            # we get all flow it. for the specified amount of iterations
            flow_iters = model(frame1,
                               frame2,
                               iters=iters,
                               flow_init=flow_init,
                               upsample=upsample,
                               test_mode=test_mode)
            
            return flow_iters


def get_viz(flo):
    flo = flo[0].permute(1,2,0).cpu().numpy()
    return flow_viz.flow_to_image(flo)


# sketchy class to pass to RAFT
class Args():
  def __init__(self, model='', path='', small=False, mixed_precision=True, alternate_corr=False):
    self.model = model
    self.path = path
    self.small = small
    self.mixed_precision = mixed_precision
    self.alternate_corr = alternate_corr

  """ Sketchy hack to pretend to iterate through the class objects """
  def __iter__(self):
    return self

  def __next__(self):
    raise StopIteration

#### Get Data

In [None]:
# get the images

demo_path = '/home/max/Dokumente/CV_projectsFork/RAFT/custom_demo_frames'
frame1 = cv2.imread(os.path.join(demo_path, 'm_baseFrameGray.jpg'))
frame2 = cv2.imread(os.path.join(demo_path, 'm_nextFrameGray.jpg'))

frame1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2RGB)
frame2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2RGB)

_, ax = plt.subplots(1, 2, figsize=(15, 8))
ax[0].imshow(frame1)
ax[1].imshow(frame2);

#### Get Model

In [None]:
# load model
model = load_model("RAFT/models/raft-sintel.pth", args=Args())

In [None]:
# estimation on a custom pair of frames

## OPTIONAL (use KITTI only model)
# del model
# model = load_model("RAFT/models/raft-kitti.pth", args=Args())
# flow_iters = inference(model, frame1, frame2, device='cuda', pad_mode='kitti', iters=20, test_mode=False)
# flow_iters = inference(model, frame1, frame2, device='cuda', pad_mode='kitti', iters=10, test_mode=False)
flow_iters = inference(model, frame1, frame2, device='cuda', pad_mode='kitti', iters=9, test_mode=False)
# flow_iters = inference(model, frame1, frame2, device='cuda', pad_mode='kitti', iters=5, test_mode=False)

f, (ax0, ax1) = plt.subplots(1,2, figsize=(15,10))

ax0.imshow(get_viz(flow_iters[0]))
ax0.set_title('first flow iteration')
ax1.imshow(get_viz(flow_iters[-1]))
ax1.set_title('final flow iteration');