In [32]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import matplotlib
import torchvision
from torchvision import transforms, datasets

import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader

import random
matplotlib.use('Agg')

from models import *

because the backend has already been chosen;
matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.



In [37]:
resnet18 = torchvision.models.resnet18(True)

resnet_backbone = nn.Sequential(*[l for l in resnet18.children()][:-1]) # inputs size 224, outputs size 512

class Encoder(nn.Module):
    def __init__(self, image_channels=3, h_dim=2048, z_dim=100): # hardcoding h_dim based on how flat final step is
        super(Encoder, self).__init__()
        self.h_dim=h_dim
        self.resnet = resnet_backbone
        
        self.fc1 = nn.Linear(h_dim, z_dim)
        self.fc2 = nn.Linear(h_dim, z_dim)

    def reparameterize(self, mu, logvar):
        std = logvar.mul(0.5).exp_()
        # return torch.normal(mu, std)
        esp = torch.randn(*mu.size()).cuda()
        z = mu + std * esp
        return z
    
    def forward(self, x):
        h = self.resnet(x).view(self.h_dim)
        mu, logvar = self.fc1(h), self.fc2(h)
        z = self.reparameterize(mu, logvar)
        return z, mu, logvar

In [38]:
# Loading custom image set

SZ = 256

transform = transforms.Compose([
    transforms.Resize((SZ)),
    #transforms.RandomCrop(64, pad_if_needed=True),
    transforms.CenterCrop(SZ),
    transforms.ToTensor(), 
])

vid_id = "DJI_0007_lr"
#vid_id = "DJI_0006"
# Obnoxious. Images have to be within folder within root
dataset = datasets.ImageFolder(root='../../videos/'+vid_id, transform=transform) # can simply create large dataset based on this hierarchical folder structure!
len(dataset.imgs)

11722

In [43]:
sample = dataset[0][0].unsqueeze(0).cuda(); sample.shape
encoder = Encoder(); encoder.cuda()

ValueError: too many values to unpack (expected 2)

(tensor([[[0.5333, 0.5412, 0.5529,  ..., 0.4000, 0.4000, 0.4078],
          [0.5373, 0.5412, 0.5529,  ..., 0.4039, 0.4078, 0.4118],
          [0.5373, 0.5412, 0.5529,  ..., 0.4078, 0.4157, 0.4196],
          ...,
          [0.5569, 0.5294, 0.5608,  ..., 0.5686, 0.5608, 0.5647],
          [0.5412, 0.5490, 0.6431,  ..., 0.5922, 0.5882, 0.5804],
          [0.5373, 0.6314, 0.7176,  ..., 0.5922, 0.5961, 0.5882]],
 
         [[0.5059, 0.5137, 0.5255,  ..., 0.3765, 0.3804, 0.3882],
          [0.5059, 0.5137, 0.5255,  ..., 0.3765, 0.3804, 0.3882],
          [0.5020, 0.5137, 0.5255,  ..., 0.3804, 0.3882, 0.3922],
          ...,
          [0.6118, 0.6627, 0.6353,  ..., 0.4157, 0.4078, 0.4039],
          [0.6471, 0.6471, 0.6392,  ..., 0.4196, 0.4118, 0.4078],
          [0.6588, 0.6549, 0.6431,  ..., 0.4235, 0.4275, 0.4157]],
 
         [[0.4392, 0.4510, 0.4627,  ..., 0.3059, 0.3059, 0.3137],
          [0.4392, 0.4510, 0.4627,  ..., 0.3059, 0.3098, 0.3137],
          [0.4392, 0.4510, 0.4627,  ...,

In [42]:
encoder

Encoder(
  (resnet): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (r