In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# Leave this
img_size = 224
num_epochs = 10

# TUNE THESE
latent_size = 50
hidden_size = 100
learning_rate = 3e-4
batch_size = 16

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os

GOOGLE_DRIVE_PATH = os.path.join('drive', 'My Drive', 'EECS 545 Project')
print(os.listdir(GOOGLE_DRIVE_PATH))
os.chdir(GOOGLE_DRIVE_PATH)

['metadata.json', 'Image.zip', '545 Project Ideas.gdoc', 'data_scene_flow', 'Copy of Data_Loading.ipynb', '.ipynb_checkpoints', 'Image.zip (Unzipped Files)', 'Images', 'Untitled Diagram.drawio', 'Weather', 'Extra_Weather_Data', 'More_Weather_Data_60k', 'DataSplit.ipynb', 'Unet + triplet loss.ipynb', 'Data_Loading.ipynb', 'saved_model', 'Untitled', '__pycache__', 'Joey Unet and Content Loss.ipynb', 'Test version(Yuanbin)-UNet Triplet loss.ipynb', 'JoeyVAE598.py', 'ExampleInterpolation.png', 'JoeyTransferLearning.ipynb', 'newest Unet and Triplet Loss.ipynb', 'WeatherVAE.pt', 'Tune parameters.ipynb', 'Baselines.ipynb', 'WeatherVAE.ipynb']


In [None]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import init
import torchvision
import torchvision.transforms as T
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler
import torchvision.datasets as dset

import matplotlib.pyplot as plt
%matplotlib inline


# for plotting
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['font.size'] = 16
plt.rcParams['image.interpolation'] = 'nearest'
# plt.rcParams['image.cmap'] = 'gray'

In [None]:
if torch.cuda.is_available:
  print('Good to go!')
else:
  print('Please set GPU via Edit -> Notebook Settings.')

Good to go!


In [None]:
import numpy as np
from torchvision import datasets, models, transforms
from PIL import Image

class SmallDataLoader:
  def __init__(self, BatchSize=16, TestPercent=0.1, data_loc="Weather", img_size=224, dev="cuda"):
    self.train_names = []
    self.train_labels = []
    self.val_names = []
    self.val_labels = []
    self.test_names = []
    self.test_labels = []

    self.label_keys = {0: "cloudy", 1: "foggy", 2: "rain", 3: "snow", 4: "sunny"}
    self.data_loc = os.path.join(os.getcwd(), data_loc)
    self.batch_size = BatchSize
    self.img_size = img_size
    self.dev=dev

    data_types = ["cloudy", "foggy", "rain", "snow", "sunny"]
    splits = ["Train", "Val", "Test"]
    for split in splits:
      for i in range(5):
        dtype = self.label_keys[i]
        type_loc = os.path.join(data_loc, split, dtype)
        files = os.listdir(type_loc)

        if split == "Train":
          self.train_labels += len(files) * [i]
          self.train_names += files
        elif split == "Val":
          self.val_labels += len(files) * [i]
          self.val_names += files
        elif split == "Test":
          self.test_labels += len(files) * [i]
          self.test_names += files

    # Shuffle data first
    self.N_train = len(self.train_names) - len(self.train_names) % self.batch_size
    self.N_val = len(self.val_names) - len(self.val_names) % self.batch_size
    self.N_test = len(self.test_names) - len(self.test_names) % self.batch_size
    self.shuffle_train()

    self.iter_no = 0
    self.val_no = 0

    self.transform_dict = {
        "Train": transforms.Compose([
        transforms.RandomResizedCrop(self.img_size),
        transforms.RandomHorizontalFlip(),
        # transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2),
        # transforms.GaussianBlur(5),
        transforms.ToTensor()
        # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ]),
        "Val": transforms.Compose([
        transforms.CenterCrop(img_size),
        transforms.ToTensor()
        # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])
    }

  # index into file names, labels
  def get_ind(self, index, dset="Train"):
    if dset == "Train":
      fname = self.train_names[index]
      label = self.train_labels[index]
    elif dset == "Val":
      fname = self.val_names[index]
      label = self.val_labels[index]

    label_name = self.label_keys[label]
    img_loc = os.path.join(self.data_loc, dset, label_name, fname)
    input_image = Image.open(img_loc).convert('RGB')
    input_tensor = self.transform_dict[dset](input_image)
    return input_tensor.to(device=self.dev), label

  # For training
  def get_batch(self):
    # End of Epoch
    if self.iter_no + self.batch_size >= self.N_train:
      self.iter_no = 0
      return "EOE", None

    batch_img = torch.zeros((self.batch_size, 3, self.img_size, self.img_size), device=self.dev, dtype=torch.float32)
    batch_lab = torch.zeros((self.batch_size), device=self.dev, dtype=torch.long)
    for i in range(self.batch_size):
      input_tensor, label = self.get_ind(self.iter_no + i)
      batch_img[i] += input_tensor
      batch_lab[i] += label
    self.iter_no += self.batch_size
    return batch_img, batch_lab

  def get_val(self):
    if self.val_no + self.batch_size >= self.N_val:
      self.val_no = 0
      return "EOE", None

    batch_img = torch.zeros((self.batch_size, 3, self.img_size, self.img_size), device=self.dev)
    batch_lab = torch.zeros((self.batch_size), device=self.dev, dtype=torch.long)
    for i in range(self.batch_size):
      input_tensor, label = self.get_ind(self.val_no + i, dset="Val")
      batch_img[i] = input_tensor
      batch_lab[i] = label
    self.val_no += self.batch_size
    return batch_img, batch_lab


  def shuffle_train(self):
    inds = np.random.choice(self.N_train, size=self.N_train, replace=False)
    self.train_names = [self.train_names[i] for i in inds]
    self.train_labels = [self.train_labels[i] for i in inds]
    self.iter_no = 0


## Train a model

Now that we have our VAE defined and loss function ready, lets train our model! Our training script is provided  in `a6_helper.py`, and we have pre-defined an Adam optimizer, learning rate, and # of epochs for you to use. 

Training for 10 epochs should take ~2 minutes and your loss should be less than 120.

In [None]:
from JoeyVAE598 import loss_function

def train_vae(epoch, model, train_loader, cond=False, num_class=5, lr=0.002):
    """
    Train a VAE or CVAE!

    Inputs:
    - epoch: Current epoch number 
    - model: VAE model object
    - train_loader: PyTorch Dataloader object that contains our training data
    - cond: Boolean value representing whether we're training a VAE or 
    Conditional VAE 
    """
    model.train()
    train_loss = 0
    loss = None
    optimizer = optim.Adam(model.parameters(), lr=lr)
    while True:
      data, labels = train_loader.get_batch()
      if data == "EOE":
        break
      if cond:
        one_hot_vec = one_hot(labels, num_classes).to(device='cuda')
        recon_batch, mu, logvar = model(data, one_hot_vec)
      else:
        recon_batch, mu, logvar = model(data)
      optimizer.zero_grad()
      loss = loss_function(recon_batch, data, mu, logvar)
      loss.backward()
      train_loss += loss.data
      optimizer.step()
    print('Train Epoch: {} \tLoss: {:.6f}'.format(epoch, loss.data))

In [None]:
from JoeyVAE598 import VAE

dl = SmallDataLoader(BatchSize=batch_size, img_size=img_size)
device = 'cuda'

input_size = dl.img_size ** 2
model = VAE(input_size, latent_size=latent_size, hidden_size=hidden_size)
model.cuda()

# Check latent size
with torch.no_grad():
  z = torch.randn(5, 3, img_size, img_size).to(device='cuda')
  print(model.encoder(z).shape)

# Train and save
for epoch in range(0, num_epochs):
  print(epoch)
  train_vae(epoch, model, dl, lr=learning_rate)
  torch.save(model, 'WeatherVAE.pt')

# Load
model = torch.load('WeatherVAE.pt')
model.eval()

torch.Size([5, 100])


VAE(
  (encoder): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 16, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
      (1): ReLU(inplace=True)
      (2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU(inplace=True)
    )
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): Sequential(
      (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
      (1): ReLU(inplace=True)
      (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU(inplace=True)
    )
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Conv2d(32, 64, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
      (1): ReLU(inplace=True)
      (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU(inplace=True)
    )
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=Fal

## Visualize results

After training our VAE network, we're able to take advantage of its power to generate new training examples. This process simply involves the decoder: we intialize some random distribution for our latent spaces z, and generate new examples by passing these latent space into the decoder. 

Run the cell below to generate new images! You should be able to visually recognize many of the digits, although some may be a bit blurry or badly formed. Our next model will see improvement in these results. 

In [None]:
z = torch.randn(5, latent_size).to(device='cuda')
import matplotlib.gridspec as gridspec
model.eval()
samples = model.decoder(z).data.cpu().numpy()

fig = plt.figure(figsize=(5, 1))
gspec = gridspec.GridSpec(1, 5)
gspec.update(wspace=0.05, hspace=0.05)
for i, sample in enumerate(samples):
  ax = plt.subplot(gspec[i])
  plt.axis('off')
  ax.set_xticklabels([])
  ax.set_yticklabels([])
  ax.set_aspect('equal')
  plt.imshow(np.transpose(sample, axes=[1, 2, 0]))

## Latent Space Interpolation

As a final visual test of our trained VAE model, we can perform interpolation in latent space. We generate random latent vectors $z_0$ and $z_1$, and linearly interplate between them; we run each interpolated vector through the trained generator to produce an image.

Each row of the figure below interpolates between two random vectors. For the most part the model should exhibit smooth transitions along each row, demonstrating that the model has learned something nontrivial about the underlying spatial structure of the digits it is modeling.

In [None]:
def show_images(images, title="ExampleInterpolation.png"):
    sqrtn = int(np.ceil(np.sqrt(images.shape[0])))

    fig = plt.figure(figsize=(sqrtn, sqrtn))
    gs = gridspec.GridSpec(sqrtn, sqrtn)
    gs.update(wspace=0.05, hspace=0.05)

    for i, img in enumerate(images):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(np.transpose(img, axes=[1, 2, 0]))
        plt.savefig(title)
    return

S = 12
device = 'cuda'
z0 = torch.randn(S,latent_size , device=device)
z1 = torch.randn(S, latent_size, device=device)
w = torch.linspace(0, 1, S, device=device).view(S, 1, 1)
z = (w * z0 + (1 - w) * z1).transpose(0, 1).reshape(S * S, latent_size)

samples = model.decoder(z).data.cpu().numpy()
show_images(samples)

Now try using an SVM to classify

In [None]:
from JoeyVAE598 import reparametrize

dl = SmallDataLoader(BatchSize=1, img_size=224, dev="cuda")
model.eval()

def encode_train_batch(model, data):
  # Create latent space
  encoding = model.encoder(data)
  mu = model.mu_layer(encoding)
  logvar = model.logvar_layer(encoding)
  z = reparametrize(mu, logvar)
  return z

def get_train_encodings(model, dl, N=50):
  X = []
  Y = []
  B = dl.batch_size
  i = 0
  while True:
      data, labels = dl.get_batch()
      if data == "EOE":
        break
      i += B
      if i % 100 == 0:
        print(i)
      data = data.to(device='cuda:0')
      data = torch.cat(N*[data])
      z = encode_train_batch(model, data)
      for ind in range(N):
        X.append(z[ind].data.cpu().numpy())
        Y.append(labels[0].data.cpu().numpy())
  return X, Y

In [None]:
from sklearn import svm
from sklearn.metrics import accuracy_score

def get_val_encodings(model, dl):
  X = []
  Y = []
  B = dl.batch_size
  i = 0
  while True:
      data, labels = dl.get_val()
      if data == "EOE":
        break
      i += B
      if i % 100 == 0:
        print(i)
      data = data.to(device='cuda:0')
      encoding = model.encoder(data)
      mu = model.mu_layer(encoding)
      for ind in range(B):
        X.append(mu[ind].data.cpu().numpy())
        Y.append(labels[ind].data.cpu().numpy())
  return X, Y

In [None]:
dl = SmallDataLoader(BatchSize=1, img_size=224, dev="cuda")
model.eval()

def test_svm(model, dl, N_samples):
  with torch.no_grad():
    print("Getting train set")
    X, Y = get_train_encodings(model, dl, N=N_samples)
    print("Fitting")
    clf = svm.SVC()
    clf.fit(X, Y)
    print("Getting val set")
    X_val, Y_val = get_val_encodings(model, dl)
    pred_y = clf.predict(X_val)
    acc = accuracy_score(np.asarray(pred_y), np.asarray(Y_val))
    return acc

N_list = [1, 10, 50, 100]
acc_list = []
for N_samples in N_list:
  acc = test_svm(model, dl, N_samples)
  acc_list.append(acc * 100)
  print("N:", N_samples, " Accuracy:", acc*100)

100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
100
200
N: 1  Accuracy: 32.421875
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
100
200
N: 10  Accuracy: 29.296875
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
100
200
N: 50  Accuracy: 31.640625
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100


In [None]:
print(N_list)
print(acc_list)