In [2]:
# Imports
import torch
import re
from google.colab import drive
import numpy as np
from tqdm.notebook import tqdm
import shutil
import sys
from __future__ import print_function
from __future__ import division
from PIL import Image
from torchvision import transforms
import torch.nn as nn
import numpy as np
import cv2
from matplotlib import pyplot as plt
import os
import random
import json
from tqdm.notebook import tqdm
drive.mount('/content/gdrive')
root_path = '/content/gdrive/MyDrive/Data/'
sys.path.append('/content/gdrive/MyDrive/shapestacks-master')

Mounted at /content/gdrive


In [3]:
# extract files to local drive (11 minutes)
shutil.unpack_archive(os.path.join(root_path, "shapestacks-rgb.tar.gz"), '/content/shapestacks-images')

In [4]:
# Given the filepath to an image, will return the perspective number
def get_perspective(filename):
  regex = re.compile(r'c=unique-cam_\d{1,2}')
  return int("".join(filter(lambda x: x.isdigit(), regex.search(filename).group(0))))

# Given an environment name, will return the ground truth stability
def is_stable(filename):
  vcom = re.compile(r'vcom=\d{1,2}')
  vpsf = re.compile(r'vpsf=\d{1,2}')

  # Extract numbers
  vcom = int("".join(filter(lambda x: x.isdigit(), vcom.search(filename).group(0))))
  vpsf = int("".join(filter(lambda x: x.isdigit(), vpsf.search(filename).group(0))))

  return not (vcom == 0 and vpsf == 0)

In [7]:
# Rearrange vectors using a custom mapping so that perspectives are sequentially
# arranged around the tower
def rearrange_vector(vec):
  if len(vec) != 12:
    raise Exception("Invalid input vector")
  new_order = [2, 0, 1, 4, 3, 5, 8, 6, 7, 10, 9, 11]
  new_vec = []
  for original_pos in new_order:
    new_vec.append(vec[original_pos])
  return new_vec


In [6]:
# Return a mapping of {environment name: [image names]} from the ShapeStacks
# dataset
def load_filenames():
  root_path = '/content'
  base_dir = root_path +'/shapestacks-images/shapestacks/recordings'
  e_names = os.listdir(base_dir)
  environments = list(map(lambda x: os.path.join(base_dir, x), e_names))
  filenames = {}
  for i, e in tqdm(enumerate(environments)):
    images = [os.path.join(e, f) for f in os.listdir(e) if f.endswith('.png') and e.find('blocks') != -1]
    # Some folders don't have 16 images so we skip them if they don't
    if len(images) != 16:
      continue
    filenames[e_names[i]] = rearrange_vector(sorted(images, key=lambda x: get_perspective(x))[:12])

  return filenames

In [8]:

def initialize_model():
    model_ft = None
    input_size = 0

    """ Resnet18
    """
    model_ft = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True)
    # uncomment to use larger ResNets
    # model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet34', pretrained=True)
    # model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True)
    # model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet101', pretrained=True)
    # model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet152', pretrained=True)

    # Turn off the current layers from gradient decenet
    for param in model_ft.parameters():
      param.requires_grad = False

    # Produce our new fc layer
    dense_layers = nn.Sequential(
          nn.Linear(512,1024),
          nn.Linear(1024,512),
          nn.Linear(512,12)
        )

    model_ft.fc = dense_layers
    # Move the model to the GPU if Cuda GPU is available
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_ft.to(device)

    return model_ft

# Initialize the model for this run
model = initialize_model()

Downloading: "https://github.com/pytorch/vision/archive/v0.10.0.zip" to /root/.cache/torch/hub/v0.10.0.zip
Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

In [9]:
# Load in our generated target tensors
in_file = '/content/gdrive/MyDrive/Data/target_tensors.json'
with open(in_file) as in_file:
  target_tensors = json.load(in_file)

# Generate a list of (image, target_tensor) tuples
# Target_tensor is rotated so that it is relative to the image's perspective number
def process():
  environments = load_filenames()
  output = []
  for environment_name, file_names in tqdm(list(environments.items())):
    target_tensor = np.asarray(rearrange_vector(target_tensors[environment_name][:12]))
    # Here we can apply normalisation if required for a case such as CEL
    new_tensor = target_tensor
    # Only keep non flawless towers
    if sum(new_tensor) != 0 and sum(new_tensor) < 11:
      # Rotate the target tensor to make it relative
      for index, file_name in enumerate(file_names):
        relative_tensor = np.roll(new_tensor, -index)
        output.append((file_name, torch.tensor(relative_tensor)))

  # Create train and test split
  random.shuffle(output)
  cut_point = round(len(output)*0.8)
  return output[:cut_point], output[cut_point:]

In [10]:
# Given a filepath to an image, this function will return a tensor, normalised
# as requested by ResNet documentation:
#   https://github.com/pytorch/examples/blob/97304e232807082c2e7b54c597615dc0ad8f6173/imagenet/main.py#L197-L198
def load_images(image_locations):
  preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
  ])
  input_image = [torch.unsqueeze(preprocess(Image.open(filename)), 0) for filename in image_locations]
  return torch.cat(input_image)

In [14]:
# Define Optimizer and Loss Function
params_to_update = []
for name,param in model.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)
        print("\t",name)

optimizer = torch.optim.Adam(params_to_update, lr=0.01)

# Euclidean distance loss function
def euclidean(a, b):
    diff = a - b
    square = torch.square(diff)
    summation = torch.sum(square)
    return torch.sqrt(summation)

# Note some preprocessing differences are required when changing loss functions
loss_func = euclidean #nn.MSELoss()/nn.CrossEntropyLoss()/nn.SmoothL1Loss()

	 fc.0.weight
	 fc.0.bias
	 fc.1.weight
	 fc.1.bias
	 fc.2.weight
	 fc.2.bias


In [15]:
# Utility function, given a list of 16 image filepaths, will plot them.
def render_images(images):
  # create figure
  fig = plt.figure(figsize=(10, 10))
    
  # setting values to rows and column variables
  rows = 4
  columns = 4
    
  image_objects = map(cv2.imread, images)

  for i, img in enumerate(image_objects):
    fig.add_subplot(rows, columns, i + 1)
    plt.imshow(img)
    plt.axis('off')
    
  plt.show()

In [16]:
# Get training and test set
training_set, test_set = process()

# Training variables
EPOCS = 10
BATCH_SIZE = 32
for i in range(EPOCS):
  current_batch = 0
  random.shuffle(training_set)
  for j in tqdm(list(range(len(training_set) // BATCH_SIZE))):

    # Get current batch and split into images and target ouputs
    batch = training_set[j*BATCH_SIZE: j*BATCH_SIZE + BATCH_SIZE]
    images = load_images([image for (image, _) in batch]).cuda()
    outputs = torch.stack([target for (_, target) in batch]).cuda()

    # Forward-pass
    prediction = model(images)

    # Back-prop
    loss = loss_func(prediction.float(), outputs.float()) 
    optimizer.zero_grad()
    loss.backward()        
    optimizer.step()  
      
  # show learning progress after each epoc
  print(f"EPOC {i}, BATCH: {j}, LOSS: {loss.cpu().data.numpy()}")

EPOC 0, BATCH: 591, LOSS: 7.321829795837402


EPOC 1, BATCH: 591, LOSS: 7.662507057189941


EPOC 2, BATCH: 591, LOSS: 6.987581253051758


EPOC 3, BATCH: 591, LOSS: 7.150958061218262


EPOC 4, BATCH: 591, LOSS: 7.406583309173584


EPOC 5, BATCH: 591, LOSS: 6.703777313232422


EPOC 6, BATCH: 591, LOSS: 10.620667457580566


EPOC 7, BATCH: 591, LOSS: 7.904351234436035


EPOC 8, BATCH: 591, LOSS: 7.622331002376864


EPOC 9, BATCH: 591, LOSS: 8.139028174590195
