In [0]:
# Shared by all
import os
import torch
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils # We should use this eventually.
from torch import nn, optim
from torch.nn import functional as F
import numpy as np


# For DataLoader
from PIL import Image
import numbers

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Dataset Code: `CarlaDataset.py`

In [0]:

class CarlaDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        # xcxc I'm assuming that the images live in _out.
        self.data_dir = data_dir
        self.transform = transform
        self.df = self._get_dataframe()
        self.df_as_mat = self.df.values
    
    def __len__(self):
        num_rows, _ = self.df_as_mat.shape
        return num_rows
    
    def __getitem__(self, idx):
        '''
        Generate one sample of data.
        '''
        # We're gonna do some hardcore hard-coding here.
        # First, extract our control inputs
        row = self.df_as_mat[idx, :]
        # xcxc We're... We're not exactly doing anything with our control inputs. For now.
        # We lop off the final value in -1 because of our dataframe- we 
        # interpret the indicator value of whether it's stationary or not 
        # as a boolean, and python interprets it as a number.
        control_inputs = np.array(
            [x for x in row if isinstance(x, numbers.Number)][:-1])
        is_stationary = row[-1]
        
        curr_images = self._get_image_tensor_for_row(row[0], is_stationary)
        # Get the next row
        next_delta = 4 # xcxc This is a hardcoded parameter from Klayton's data.
        next_input_id = int(row[0]) + next_delta
        num_rows_next = np.sum(self.df['input_num'] == str(next_input_id))
        if num_rows_next == 0:
            # No next: treat it as if we're stationary
            return (curr_images, curr_images, np.zeros(len(control_inputs)))
        elif is_stationary == True:
            # If it's stationary, then simply return our current images
            return (curr_images, curr_images, np.zeros(len(control_inputs)))
        else:
            next_images = self._get_image_tensor_for_row(
                str(next_input_id), is_stationary)
            return (curr_images, next_images, control_inputs)
    
    def _get_image_tensor_for_row(self, row_id, is_stationary):
        '''
        Inputs:
            row_id: String that represents the input_num
        Outputs:
            A (2 x H x W x 4) 4D matrix of the two images.
        '''
        # The row_id should be the input_num. Should also be a string.
        which_row = (self.df['input_num'] == row_id)
        where_stationary = (self.df['is_stationary'] == is_stationary)
        row = self.df[which_row & where_stationary]
        n_res, _ = row.shape
        if n_res > 1:
            # xcxc I'm assuming there's only one row per row_id.
            # This may or may not be a strictly held invariant.
            print("XCXC: THERE ARE MORE THAN 1 ROW FOR A ROW_ID")
        row = row.values[0]
        images = []
        for ele in row:
            if str(ele).split('.')[-1] == 'png':
                full_name = os.path.join(self.data_dir, '_out', ele)
                np_arr = np.asarray(Image.open(full_name))
                np_arr = self._rearrange_axes_image(np_arr)
                # Apply transform on each image independently.
                if self.transform:
                    np_arr = self.transform(np_arr)
                images.append(np_arr)
        images = np.array(images)
        return images
    
    def _rearrange_axes_image(self, img):
        H,W,_ = img.shape
        new_img = np.zeros((3,H,W))
        for i in range(3):
            new_img[i,:,:] = img[:,:,i]
        return new_img

    def _get_dataframe(self):
        control_input_df = self._get_control_input_df()
        filename_df = self._get_image_path_df()
        df = control_input_df.merge(right=filename_df,
                                    left_on='input_num',
                                    right_on='index')
        # Then, we add a column to our dataframe saying whether it's stationary or not
        num_rows, _ = df.shape
        df['is_stationary'] = np.zeros((num_rows), dtype=bool)
        # Then make a copy and set is_stationary to true...
        df_copy = df.copy()
        df_copy['is_stationary'] = np.ones((num_rows), dtype=bool)
        # then stack and return
        final_df = pd.concat([df, df_copy])
        return final_df

    def _get_control_input_df(self):
        # xcxc I'm also assuming that our columns in control_input stay static like so.
        control_input_df = pd.read_csv(os.path.join(self.data_dir, 'control_input.txt'),
                               names=['input_num', 'ctr1', 'ctr2'])
        control_input_df['input_num'] = control_input_df['input_num'].astype('str')
        return control_input_df
    
    def _get_image_path_df(self):
        # A little cryptic, but it just gets the list of all filenames
        all_files_in_out = [x[2] for x in os.walk(os.path.join(self.data_dir, '_out'))][0]
        # Then filter out by getting only the png files. We can remove this step if need be.
        all_files_in_out = [img_name for img_name in all_files_in_out if img_name.split('.')[1] == 'png']

        # We can then make a map with our data...
        filename_groupings = {}
        for fn in all_files_in_out:
            fn_number = str(int(fn.split('_')[0]))
            if fn_number not in filename_groupings:
                filename_groupings[fn_number] = []
            filename_groupings[fn_number].append(fn)
            
        # Then make a dataframe from this dictionary
        filename_df = pd.DataFrame.from_dict(
            filename_groupings, orient='index').reset_index()
        filename_df = filename_df.dropna(subset=[0,1]) # Drop if any of our images is None.
#         filename_df = filename_df[filename_df['index'].astype('int') < 494] # Drop all the ones that are after 494
        return filename_df

### Model: `CVAE.py` (xcxc To be changed later)

In [0]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')     

In [0]:
class CVAE(nn.Module):
	def __init__(self):
		super().__init__()
		d = 0.4
		self.z_size = 64
		self.hidden = 256
		ch_sz = 3
		last_conv = 4
		self.tensor = (1,last_conv,300,400)
		flat = np.prod(self.tensor)

		# channel_in, c_out, kernel_size, stride, padding
		def convbn(ci,co,ksz,s=1,pz=0):		#ReLU nonlinearity
			return nn.Sequential(
				nn.Conv2d(ci,co,ksz,stride=s,padding=pz),
				nn.ReLU(),
				nn.BatchNorm2d(co))
		def convlast(ci,co,ksz,s=1,pz=0):	#Sigmoid nonlinearity
			return nn.Sequential(
				nn.Conv2d(ci,co,ksz,stride=s,padding=pz),
				nn.Sigmoid(),
				nn.BatchNorm2d(co))
		def mlp(in_size,hidden):
			return nn.Sequential(
				nn.Dropout(d),
				nn.Linear(in_size,hidden),
				nn.ReLU())

		#Encoder NN
		self.enc = nn.Sequential(
				nn.Dropout(d),
				convbn(ch_sz,64,3,1,1),
				convbn(64,16,1,1),
				convbn(16,last_conv,1,1))
		self.m1 = mlp(flat,self.hidden)
		self.zmean = nn.Linear(self.hidden,self.z_size)
		self.zstdev = nn.Linear(self.hidden,self.z_size)

		#Decoder NN
		self.expand_z = nn.Linear(self.z_size,self.hidden)
		self.m2 = mlp(self.hidden,flat)
		self.dec = nn.Sequential(
				nn.Dropout(d),
				convbn(last_conv,16,1,1),
				convbn(16,64,1,1),
				convlast(64,ch_sz,1,1))

	def encoder(self, x):
		h_layer = torch.flatten(self.enc(x))	
		# Get shapes for decoder
		# shapes1 = self.enc(x).shape
		# shapes2 = len(h_layer)
		# pdb.set_trace()
		# add control input in the following layer
		h = self.m1(h_layer)
		return h

	def bottleneck(self, x):
		z_mean = self.zmean(x)
		z_stdev = self.zstdev(x)
		#reparam to get z latent sample
		std = torch.exp(0.5*z_stdev)
		eps = torch.randn_like(std)
		z = z_mean + eps*std
		return z, z_mean, z_stdev

	def decoder(self, z):
		#check the nonlinearities of this layer
		h = self.expand_z(z)
		h1 = self.m2(h)
		#make sure to reshape data correctly
		x = torch.reshape(h1,(self.tensor))
		out = self.dec(x)
		return out

	def forward(self, x):
		h = self.encoder(x)
		z, z_mean, z_stdev = self.bottleneck(h)
		out = self.decoder(z)
		return out, z, z_mean, z_stdev

### Training Script: `run_script.py`

In [0]:


def main():
    model = CVAE()
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999),
                            eps=1e-08, weight_decay=0)
    epochs = 10
    dl = DataLoader(CarlaDataset("drive/My Drive/1) The C/2019-20/ESE546"))
    total_step = 0
    for epoch in range(epochs):
        for i, X in enumerate(dl):
            left_image_t = X[0][:, 0, :, :, :] # left/right images of t
            right_image_t = X[0][:, 1, :, :, :] # left/right images of t

            # img2 = X[1] # left/right of t+1 # xcxc do the same indexing above to get l/r of t+1
            ctrl_inputs = X[2]

            left_image_t.to(device)
            right_image_t.to(device)
            ctrl_inputs.to(device)

            left_image_t = (left_image_t/255).float()

            out, z, z_mean, z_stdev = model.forward(left_image_t) # This is 3 x H x W or something idk. we also have to multiply this by 255. Then yeet this into PIL and save it.
            loss = criterion(out, left_image_t)
            optimizer.zero_grad()
            loss.backward()
            total_step += 1

            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, epochs, i+1, total_step, loss.item()))

In [0]:
main()

Epoch [1/10], Step [1/1], Loss: 1.1697
Epoch [1/10], Step [2/2], Loss: 1.1502
Epoch [1/10], Step [3/3], Loss: 1.1493
Epoch [1/10], Step [4/4], Loss: 1.1731
Epoch [1/10], Step [5/5], Loss: 1.1733
Epoch [1/10], Step [6/6], Loss: 1.1496
Epoch [1/10], Step [7/7], Loss: 1.1501
Epoch [1/10], Step [8/8], Loss: 1.1733
Epoch [1/10], Step [9/9], Loss: 1.1724
Epoch [1/10], Step [10/10], Loss: 1.1496
Epoch [1/10], Step [11/11], Loss: 1.1740
Epoch [1/10], Step [12/12], Loss: 1.1736
Epoch [1/10], Step [13/13], Loss: 1.1737
Epoch [1/10], Step [14/14], Loss: 1.1741
Epoch [1/10], Step [15/15], Loss: 1.1495
Epoch [1/10], Step [16/16], Loss: 1.1753
Epoch [1/10], Step [17/17], Loss: 1.1474
Epoch [1/10], Step [18/18], Loss: 1.1738
Epoch [1/10], Step [19/19], Loss: 1.1704
Epoch [1/10], Step [20/20], Loss: 1.1424
Epoch [1/10], Step [21/21], Loss: 1.1683
Epoch [1/10], Step [22/22], Loss: 1.1673
Epoch [1/10], Step [23/23], Loss: 1.1339
Epoch [1/10], Step [24/24], Loss: 1.1672
Epoch [1/10], Step [25/25], Loss: 

KeyboardInterrupt: ignored