# RGB Image Reconstruction VAE

Code used is currently heavily based on a tutorial getting started with variational autoencoder using pytorch on Debugger Cafe  
This will be changed over time

In [34]:
import os
import csv
import numpy as np
import math
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from skimage import io
from PIL import Image
from tqdm import tqdm
#import argparse

import torch
import torch.nn as nn
import torchvision
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.utils import save_image

matplotlib.style.use('ggplot')

torch.cuda.empty_cache() 
#import model

import ast

In [35]:
class ActiveVisionDataset (Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.annotations = pd.read_csv(csv_file, index_col=None)
        self.root_dir = root_dir
        self.transform = transform
        
    def __len__(self):
        return len(self.annotations)
    
    def __getitem__(self,index):
        if type(index) == torch.Tensor:
            index = index.item()
        img_path = os.path.join(self.root_dir, self.annotations.iloc[index, 0])
        image = io.imread(img_path)
        shape_label = torch.tensor(int(self.annotations.iloc[index,1]))
        cam_loc = torch.tensor(ast.literal_eval(self.annotations.iloc[index,2]))
        
        if self.transform:
            image = self.transform(image)
        
        return image, shape_label, cam_loc

In [36]:
class Encoder(nn.Module):
    def __init__(self, z_dim):
        super(Encoder, self).__init__() 
        self.enc1 = nn.Conv2d(
            in_channels=3, out_channels=init_kernel, kernel_size=kernel_size, 
            stride=stride, padding=padding
        )
        self.enc2 = nn.Conv2d(
            in_channels=init_kernel, out_channels=init_kernel*2, kernel_size=kernel_size, 
            stride=stride, padding=padding
        )
        self.enc3 = nn.Conv2d(
            in_channels=init_kernel*2, out_channels=init_kernel, kernel_size=kernel_size, 
            stride=stride, padding=padding
        )
        self.enc4 = nn.Conv2d(
            in_channels=init_kernel*4, out_channels=init_kernel*8, kernel_size=kernel_size, 
            stride=stride, padding=padding
        )
        self.enc5 = nn.Conv2d(
            in_channels=init_kernel*8, out_channels=init_kernel, kernel_size=kernel_size, 
            stride=stride, padding=padding
        )
        #self.dropout = nn.Dropout2d(p=0.1) # need to call in forward but turn off automatically in eval()
        
        self.mu = nn.Linear(init_kernel*191*191, z_dim)
        self.sigma = nn.Linear(init_kernel*191*191, z_dim)
        
        
    def reparameterize(self, mu, log_var):
        """
        :param mu: mean from the encoder's latent space
        :param log_var: log variance from the encoder's latent space
        """
        std = torch.exp(0.5*log_var) # standard deviation
        eps = torch.randn_like(std) # `randn_like` as we need the same size
        sample = mu + (eps * std) # sampling
        return sample
 
    def forward(self, x):
        
        #print("before anything")
        #print(x.shape)
        x = self.enc1(x)
        x = F.relu(x)
        x = self.enc2(x)
        x = F.relu(x)
        x = self.enc3(x)
        x = F.relu(x)
        #x = self.enc4(x)
        #x = F.relu(x)
        #x = self.enc5(x)
        #x = F.relu(x)
        
        #print("before flatten:")
        #print(x.shape)
        
        x = x.view(x.size(0), -1)
        
        #print("after flatten:")
        #print(x.shape)
        
        # get `mu` and `log_var`
        mu = self.mu(x)
        log_var = self.sigma(x) # fix!!!!name
        
        #return mu, log_var #fix!!! name
        ## get the latent vector through reparameterization
        z = self.reparameterize(mu, log_var)
        
        return z, mu, log_var

In [37]:
class Decoder(nn.Module):
    def __init__(self, z_dim):
        super(Decoder, self).__init__()
        
        self.lin1 = nn.Linear(z_dim, init_kernel*191*191)
        
        self.dec1 = nn.ConvTranspose2d(
            in_channels=init_kernel, out_channels=init_kernel*8, kernel_size=kernel_size, 
            stride=stride, padding=padding
        )
        self.dec2 = nn.ConvTranspose2d(
            in_channels=init_kernel*8, out_channels=init_kernel*4, kernel_size=kernel_size, 
            stride=stride, padding=padding
        )
        self.dec3 = nn.ConvTranspose2d(
            in_channels=init_kernel, out_channels=init_kernel*2, kernel_size=kernel_size, 
            stride=stride, padding=padding
        )
        self.dec4 = nn.ConvTranspose2d(
            in_channels=init_kernel*2, out_channels=init_kernel, kernel_size=kernel_size, 
            stride=stride, padding=padding
        )
        self.dec5 = nn.ConvTranspose2d(
            in_channels=init_kernel, out_channels=3, kernel_size=kernel_size, 
            stride=stride, padding=padding
        )
        
    def forward(self, z):
        x = self.lin1(z)
        x=F.relu(x)
        
        x=x.view(-1, init_kernel, 191, 191)
        #print("after unflatten:")
        #print(x.shape)
        
        #x = self.dec1(x)
        #x = F.relu(x)
        #x = self.dec2(x)
        #x = F.relu(x)
        x = self.dec3(x)
        x = F.relu(x)
        x = self.dec4(x)
        x = F.relu(x)
        x = self.dec5(x)
        reconstruction = torch.sigmoid(x)
        
        return reconstruction

In [38]:
class ConvVAE(nn.Module):
    def __init__(self, z_dim):
        super(ConvVAE, self).__init__()
        self.encoder = Encoder(z_dim)
        self.decoder = Decoder(z_dim)
    
    def forward(self,x):
        sample, z_mu, z_logvar = self.encoder(x)
        
        #z_mu, z_logvar = self.encoder(x)
        
        #std = torch.exp(z_logvar)
        #eps = torch.randn_like(std) # `randn_like` as we need the same size
        #sample = z_mu + (eps * std) # sampling
        
        reconstruction = self.decoder(sample)
        
        return reconstruction, z_mu, z_logvar

In [39]:
def final_loss(bce_loss, mu, logvar):
    """
    This function will add the reconstruction loss (BCELoss) and the 
    KL-Divergence.
    KL-Divergence = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    :param bce_loss: recontruction loss
    :param mu: the mean from the latent vector
    :param logvar: log variance from the latent vector
    """
    BCE = bce_loss 
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return BCE + KLD

In [40]:
def fit(model, dataloader):
    model.train()
    running_loss = 0.0
    for i, data in tqdm(enumerate(dataloader), total=int(len(train_data)/dataloader.batch_size)):
        #print(data)
        data, _ = data
        if torch.cuda.is_available():
            data = data.to(device)
        optimizer.zero_grad()
        reconstruction, mu, logvar = model(data)
        
        bce_loss = criterion(reconstruction, data)
        loss = final_loss(bce_loss, mu, logvar)
        running_loss += loss.item()
        loss.backward()
        optimizer.step()
    train_loss = running_loss/len(dataloader.dataset)
    return train_loss

In [41]:
def validate(model, dataloader):
    model.eval()
    running_loss = 0.0
    with torch.no_grad():
        for i, data in tqdm(enumerate(dataloader), total=int(len(val_data)/dataloader.batch_size)):
            data, _ = data
            if torch.cuda.is_available():
                data = data.to(device)
            reconstruction, mu, logvar = model(data)
            bce_loss = criterion(reconstruction, data)
            loss = final_loss(bce_loss, mu, logvar)
            running_loss += loss.item()
        
            # save the last batch input and output of every epoch
            if i == int(len(val_data)/dataloader.batch_size) - 1:
                num_rows = 8
                both = torch.cat((data.view(batch_size, 3, 200, 200)[:8], 
                                  reconstruction.view(batch_size, 3, 200, 200)[:8]))
                save_image(both.cpu(), f"outputs/{parameter}{value}/imgs/output{epoch}.png", nrow=num_rows)
    val_loss = running_loss/len(dataloader.dataset)
    return val_loss

## Define Parameters (change in optimization)

In [42]:
# leanring parameters

#Number of conv layers: 3,4,5

epochs = 25 # 50,100
batch_size = 8 #8,16,32,64?
lr = 0.0001 #0.002, 0.003

kernel_size = 4 #3,4,5,6?
stride = 1 #1,2,3,4?
padding = 0 #0,1,2,3,4?
init_kernel = 8 #4,8,16,32 #initial number of filters

latent_dim = 96

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

#Add remove layers ***make priority!!!!
#Optimizer (setup just a few cells below)

#Do dropout (across multiple layers, with multiple p values?)
#Do pooling? Max pool or average pool?
#Do activation function #There's a lot
#Batch normalization #Looked it up

cuda


## Run

In [43]:
dataset = ActiveVisionDataset(csv_file='imgs/rgbCSV.csv', root_dir= 'imgs/rgbImg/', transform = torchvision.transforms.ToTensor())
train_data, val_data = torch.utils.data.random_split(dataset, [2400, 600])
train_loader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset=val_data, batch_size=batch_size, shuffle=True)

In [None]:
its runtime!

In [None]:
'''
batch_sizes = [8]

for batch_size in batch_sizes:
'''
dataset = ActiveVisionDataset(csv_file='imgs/rgbCSV.csv', root_dir= 'imgs/rgbImg/', transform = torchvision.transforms.ToTensor())
train_data, val_data = torch.utils.data.random_split(dataset, [2400, 600])
train_loader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset=val_data, batch_size=batch_size, shuffle=True)

model = ConvVAE(latent_dim).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.BCELoss(reduction='sum')

parameter = 'testertsne'
value = 0
os.makedirs("outputs/"+parameter+str(value), exist_ok=True)
os.makedirs("outputs/"+parameter+str(value)+"/imgs", exist_ok=True)
#parameters = ['layers']
#values = ['3,4,5,6,7']

#for parameter in parameters:
    #for value in values:

        #Create a folder here
        #os.makedirs(parameter+value, exist_ok=True)
value = str(value)
train_loss = []
val_loss = []
for epoch in range(epochs):
    print(f"Epoch {epoch+1} of {epochs}")
    train_epoch_loss = fit(model, train_loader)
    val_epoch_loss = validate(model, val_loader)
    train_loss.append(train_epoch_loss)
    val_loss.append(val_epoch_loss)
    print(f"Train Loss: {train_epoch_loss:.4f}")
    print(f"Val Loss: {val_epoch_loss:.4f}")

filepath = os.path.join(os.getcwd(), "outputs", parameter+str(value), parameter+str(value)+".pth")
torch.save(model.state_dict(), filepath)

plt.figure(figsize=(10,10))
plt.plot(range(1,epochs+1), train_loss, label="Train Loss")
plt.plot(range(1,epochs+1), val_loss, label="Validation Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.yticks(np.arange(int(math.floor(min(train_loss) / 100.0)) * 100, max(train_loss)+1, 1000))
plt.savefig('outputs/'+parameter+value+'/loss'+parameter+value+'.png')

with open('outputs/'+parameter+value+'/loss'+parameter+value+'.csv','w', newline='') as f:
    wr = csv.writer(f)
    wr.writerow(["Train loss", "Val loss"])
    wr.writerows(zip(train_loss, val_loss))

with open('outputs/lossCompare.csv', 'a+', newline='') as f:
    wr = csv.writer(f)
    wr.writerow([parameter, value ,train_loss[-1], val_loss[-1]])

In [None]:
plt.figure(figsize=(10,10))
plt.plot(range(1,epochs+1), train_loss, label="Train Loss")
plt.plot(range(1,epochs+1), val_loss, label="Validation Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.yticks(np.arange(int(math.floor(min(train_loss) / 100.0)) * 100, max(train_loss)+1, 1000))
plt.savefig('outputs/'+parameter+value+'/loss'+parameter+value+'.png')

In [None]:
with open('outputs/'+parameter+value+'/loss'+parameter+value+'.csv','w', newline='') as f:
    wr = csv.writer(f)
    wr.writerow(["Train loss", "Val loss"])
    wr.writerows(zip(train_loss, val_loss))

with open('outputs/lossCompare.csv', 'a+', newline='') as f:
    wr = csv.writer(f)
    wr.writerow([parameter, value ,train_loss[-1], val_loss[-1]])

# This is test set time!

In [None]:
#Which model am i tryna load in huh
PATH = os.path.join(os.getcwd(), "outputs", "batch_sizes8", "batch_sizes8.pth")

ConvVAE = ConvVAE(latent_dim)#.to(device)
ConvVAE.load_state_dict(torch.load(PATH))
ConvVAE.to(device)

In [None]:
#So what imma do is call something similar to
#validate to gather a load of things and ill store the reslts
def generate_latent_vectors(model, dataloader):
    model.eval()
    latent = []
    target = []
    with torch.no_grad():
        for i, data in tqdm(enumerate(dataloader), total=int(len(val_data)/dataloader.batch_size)):
            data, label = data
            #if torch.cuda.is_available():
            #    data = data.to(device)
            z, mu, logvar = model.encoder(data.cuda())
            latent.extend(mu.cpu().detach().numpy())
            target.extend(label.numpy())
        return latent, target

In [None]:
#Make this a test set!!!
test_latent_var, test_target = generate_latent_vectors(ConvVAE, val_loader)

In [None]:
#print(len(test_latent_var))
#print(len(test_target))
#print(test_target)

In [None]:
from sklearn.manifold import TSNE
from mpl_toolkits.mplot3d import Axes3D
%matplotlib qt
from IPython import display
import matplotlib.cm as cmx
import matplotlib.colors as colors


import plotly.express as px
import plotly.io as pio

In [None]:
latent = np.array(test_latent_var)
target = np.array(test_target)
tsne = TSNE(n_components=2, init="pca", random_state=0)

X = tsne.fit_transform(latent)

data = np.vstack((X.T, target)).T
df = pd.DataFrame(data=data, columns=["z1", "z2", "label"])
df["label"] = df["label"].astype(str)

fig = px.scatter(df, x="z1", y="z2", color="label")

pio.write_html(fig, file="raw.html", auto_open=True)

# Extra helper code

In [None]:
stop

1. Adding/removing layers: CNN layers and fully connected layers  
2. Varying batch sizes  
3. Varying numbers of training epochs  
4. Applying different levels of drop-out  
5. Applying different types of pooling  
6. Applying different types of activation function  

In [None]:
#for data in train_loader:
#    print(im_path)
#    print("Data: ", data)

In [50]:
#print(list(train_data))
#type(train_loader)
#print(train_data.shape)

for i, (images, labels, coords) in enumerate(train_loader):
    print(coords.shape)
#    print(labels.shape)
    #print(coords)

torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([8, 3])
torch.Size([

In [None]:
#print(model)

In [None]:
for i in range(0,10):
    vars()['bruh'+str(i)] = i

print (bruh0)

In [None]:
        for i in range(2,6):
            vars()[self.'enc'+str(i)] = nn.Conv2d(
                in_channels=init_kernel, out_channels=init_kernel*i, kernel_size=kernel_size, 
                stride=stride, padding=padding
            )

In [None]:
correct = 0
total = 0
with torch.no_grad():
    for data in val_loader:
        images, labels = data
        outputs = ConvVAE.encoder(images)
        _, predicted = torch.max(outputs.data, 1)
        print(outputs)
        print(predicted)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))