In [1]:
import logging

import pandas as pd
import numpy as np
import torch as t

from gulpio2 import GulpDirectory
from pathlib import Path
from moviepy.editor import ImageSequenceClip, clips_array

## Torch intro (already know all of this)

basic understanding of creating and manipulating tensors, as well as converting them between the torch an numpy libraries

In [2]:
# model = t.load('../models/trn_rgb.ckpt')

In [3]:
# model['state_dict']

In [4]:
data = [[1,2],[3,4]]

x_data = t.tensor(data)

x_data

tensor([[1, 2],
        [3, 4]])

In [5]:
np_array = np.array(data)
x_np = t.from_numpy(np_array)

x_np

tensor([[1, 2],
        [3, 4]])

In [6]:
x_ones = t.ones_like(x_data)

x_ones

tensor([[1, 1],
        [1, 1]])

In [7]:
x_rand = t.rand_like(x_data, dtype=t.float)

x_rand

tensor([[0.9567, 0.1242],
        [0.5955, 0.9702]])

In [8]:
shape = (2,3)

t_rand = t.rand(shape)
t_ones = t.ones(shape)
t_zero = t.zeros(shape)

t_rand, t_ones, t_zero

(tensor([[0.2603, 0.0285, 0.3682],
         [0.2309, 0.1206, 0.9349]]),
 tensor([[1., 1., 1.],
         [1., 1., 1.]]),
 tensor([[0., 0., 0.],
         [0., 0., 0.]]))

In [9]:
tensor = t.rand(3,4)

tensor.shape, tensor.dtype, tensor.device

(torch.Size([3, 4]), torch.float32, device(type='cpu'))

In [10]:
# we move our tensor to the GPU if available
if t.cuda.is_available():
    tensor = tensor.to('cuda')
    
tensor.device

device(type='cuda', index=0)

In [11]:
# standard indexing and slicing

tensor = t.ones(4,4)

tensor[:,1] = 0

tensor

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])

In [12]:
tensor = t.cat([t.ones(4,1),t.ones(4,1)*2,t.ones(4,1)*3,t.ones(4,1)*4],dim=1)

tensor

tensor([[1., 2., 3., 4.],
        [1., 2., 3., 4.],
        [1., 2., 3., 4.],
        [1., 2., 3., 4.]])

In [13]:
t1 = t.ones(4,1) * 2
t2 = t.ones(4,1) * 4

t1 * t2

tensor([[8.],
        [8.],
        [8.],
        [8.]])

In [14]:
# transpose one since malmut needs t.shape (4,1) and (1,4)

t_x = t1 @ t2.T

t_x

tensor([[8., 8., 8., 8.],
        [8., 8., 8., 8.],
        [8., 8., 8., 8.],
        [8., 8., 8., 8.]])

In [15]:
# Operations with _ suffix are inplace, saving memory but may be destructive due to immediate memory loss

t_x.add_(5)

tensor([[13., 13., 13., 13.],
        [13., 13., 13., 13.],
        [13., 13., 13., 13.],
        [13., 13., 13., 13.]])

In [16]:
# tensor vs numpy

n_x = t_x.numpy()

n_x

array([[13., 13., 13., 13.],
       [13., 13., 13., 13.],
       [13., 13., 13., 13.],
       [13., 13., 13., 13.]], dtype=float32)

In [17]:
# a change in the tensor, reflects 

t_x.sub_(5)

tensor([[8., 8., 8., 8.],
        [8., 8., 8., 8.],
        [8., 8., 8., 8.],
        [8., 8., 8., 8.]])

In [18]:
n_x

array([[8., 8., 8., 8.],
       [8., 8., 8., 8.],
       [8., 8., 8., 8.],
       [8., 8., 8., 8.]], dtype=float32)

In [19]:
# numpy to tensor

n = np.ones([4,4])

t_n = t.from_numpy(n)
t_n

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]], dtype=torch.float64)

In [20]:
t_n.float()

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [21]:
x = t.ones(2,2)
print(x)

x.add_(5)
n = x.numpy()
print(n)

tensor([[1., 1.],
        [1., 1.]])
[[6. 6.]
 [6. 6.]]


## Intro to `TORCH.AUTOGRAD`

- backward and forward propagation for a pretrained model on a tensor from an epic video tensor

In [22]:
import torchvision

gulp_root = Path.home()
rgb_train = GulpDirectory('../datasets/epic/gulp/rgb_p01/')
rgb_frames, rgb_meta = rgb_train['P01_01_90']

In [23]:
rgb_frames = t.tensor(rgb_frames)
rgb_frames.shape


torch.Size([46, 256, 456, 3])

In [24]:
model = torchvision.models.resnet18(pretrained=True)
data = rgb_frames[0].reshape(1,3,256,456)
# data = t.rand(1,3,256,456)
# labels = t.randint(256,(1,1000))
labels = t.rand(1,1000)

In [25]:
# data.float()
pred = model(data.float())

In [26]:
loss = (pred - labels).sum()

loss.backward()

In [27]:
optim = t.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)

In [28]:
optim.step()

## Creating a neural network

In [29]:
import torch.nn as nn
import torch.nn.functional as F

In [30]:
class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 3x3 square convolution kernel
        self.conv1 = nn.Conv2d(1,6,3)
        self.conv2 = nn.Conv2d(6,16,3)
        # an affinite operation: y= Wx + b
        self.fc1 = nn.Linear(16*6*6,120) # 6*6 from image dimension
        self.fc2 = nn.Linear(120,84)
        self.fc3 = nn.Linear(84,10)
        
    def forward(self, x):
        # Max pooling over a (2,2) window
        x = F.max_pool2d(F.relu(self.conv1(x)),(2,2))
        # If the image size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)),2)
        x = x.view(-1,self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:] # all dimensions except the batch dimension
        num_features = 1
        for x in size:
            num_features *= x
        return num_features

In [31]:
net = Net()

net

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [32]:
params = list(net.parameters())
len(params), params[0].size()

(10, torch.Size([6, 1, 3, 3]))

In [33]:
data = t.randn(1,1,32,32)

out = net(data)

net.zero_grad()
out.backward(t.randn(1,10))

In [36]:
test = t.randn(3,1920,1080)

In [37]:
test

tensor([[[-0.1397,  0.5502,  1.6300,  ...,  0.9398, -0.0719, -2.2812],
         [-0.2084, -0.4688,  0.9950,  ...,  1.1666, -1.1199,  0.6802],
         [-1.1255,  1.5349,  0.4463,  ...,  0.1691,  0.4356, -0.1499],
         ...,
         [-0.1423, -1.9227, -1.1938,  ..., -0.3782, -0.7625,  0.3316],
         [-0.4834, -0.2956,  0.9604,  ...,  1.8837, -0.0243,  1.7543],
         [ 0.6476, -1.7784,  1.1157,  ...,  0.2569,  0.1327, -0.1933]],

        [[ 1.2829,  1.1765,  0.1958,  ..., -1.5732,  0.1448, -1.2092],
         [-0.6557, -0.3134,  0.1925,  ...,  2.2321, -0.1551,  0.8474],
         [-0.8401,  0.7711, -1.9645,  ..., -0.9020, -1.7128,  2.1805],
         ...,
         [-0.5909,  1.0970,  0.9628,  ..., -0.8758,  1.2320, -1.2602],
         [-1.3636,  0.3095,  1.3150,  ...,  0.4650,  0.4415,  0.6542],
         [ 0.8892,  1.1601, -0.4193,  ..., -0.4005, -0.2716,  2.7571]],

        [[-0.2225, -0.3776,  1.8823,  ..., -0.1462,  0.1571,  0.1435],
         [-0.3925, -0.2903, -0.0522,  ..., -0