In [None]:
import os
import csv
import numpy as np
import math
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from skimage import io
from PIL import Image
from tqdm import tqdm
#import argparse

import torch
import torch.nn as nn
import torchvision
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.utils import save_image

matplotlib.style.use('ggplot')

torch.cuda.empty_cache() 
#import model

import ast

In [None]:
class Encoder(nn.Module):
    def __init__(self, z_dim):
        super(Encoder, self).__init__() 
        self.enc1 = nn.Conv2d(
            in_channels=3, out_channels=init_kernel, kernel_size=kernel_size, 
            stride=stride, padding=padding
        )
        self.enc2 = nn.Conv2d(
            in_channels=init_kernel, out_channels=init_kernel*2, kernel_size=kernel_size, 
            stride=stride, padding=padding
        )
        self.enc3 = nn.Conv2d(
            in_channels=init_kernel*2, out_channels=init_kernel, kernel_size=kernel_size, 
            stride=stride, padding=padding
        )
        self.enc4 = nn.Conv2d(
            in_channels=init_kernel*4, out_channels=init_kernel*8, kernel_size=kernel_size, 
            stride=stride, padding=padding
        )
        self.enc5 = nn.Conv2d(
            in_channels=init_kernel*8, out_channels=init_kernel, kernel_size=kernel_size, 
            stride=stride, padding=padding
        )
        self.dropout = nn.Dropout()
        
        #self.bn_enc1 = nn.BatchNorm2d(init_kernel)
        #self.bn_enc2 = nn.BatchNorm2d(init_kernel*2)
        #self.bn_enc3 = nn.BatchNorm2d(init_kernel)
        
        self.lin1 = nn.Linear(init_kernel*119*119, 1000)
        self.lin2 = nn.Linear(1000, 500)
        self.lin3 = nn.Linear(500,250)
        self.mu = nn.Linear(250, z_dim)
        self.sigma = nn.Linear(250, z_dim)
        
        self.bn_lin1 = nn.BatchNorm1d(1000)
        self.bn_lin2 = nn.BatchNorm1d(500)
        self.bn_lin3 = nn.BatchNorm1d(250)
        
    def reparameterize(self, mu, log_var):
        """
        :param mu: mean from the encoder's latent space
        :param log_var: log variance from the encoder's latent space
        """
        std = torch.exp(0.5*log_var) # standard deviation
        eps = torch.randn_like(std) # `randn_like` as we need the same size
        sample = mu + (eps * std) # sampling
        return sample
 
    def forward(self, x):
        
        #print("before anything")
        #print(x.shape)
        x = self.enc1(x)
        x = F.relu(x)
        #x = self.bn_enc1(x)
        x = self.enc2(x)
        x = F.relu(x)
        #x = self.bn_enc2(x)
        x = self.enc3(x)
        x = F.relu(x)
        #x = self.bn_enc3(x)
        #x = self.enc4(x)
        #x = F.relu(x)
        #x = self.enc5(x)
        #x = F.relu(x)
        
        #print("before flatten:")
        #print(x.shape)
        
        x = x.view(x.size(0), -1)
        
        #print("after flatten:")
        #print(x.shape)
        
        x = self.lin1(x)
        x = F.relu(x)
        x = self.bn_lin1(x)
        x = self.dropout(x)
        x = self.lin2(x)
        x = F.relu(x)
        x = self.bn_lin2(x)
        x = self.dropout(x)
        x = self.lin3(x)
        x = F.relu(x)
        x = self.bn_lin3(x)
        # get `mu` and `log_var`
        mu = self.mu(x)
        log_var = self.sigma(x)
        ## get the latent vector through reparameterization
        z = self.reparameterize(mu, log_var)
        
        return z, mu, log_var
    
class Decoder(nn.Module):
    def __init__(self, z_dim):
        super(Decoder, self).__init__()
        
        self.lin1 = nn.Linear(z_dim, 250)
        self.lin2 = nn.Linear(250, 500)        
        self.lin3 = nn.Linear(500, 1000)
        self.lin4 = nn.Linear(1000, init_kernel*119*119)
        
        self.dec1 = nn.ConvTranspose2d(
            in_channels=init_kernel, out_channels=init_kernel*8, kernel_size=kernel_size, 
            stride=stride, padding=padding
        )
        self.dec2 = nn.ConvTranspose2d(
            in_channels=init_kernel*8, out_channels=init_kernel*4, kernel_size=kernel_size, 
            stride=stride, padding=padding
        )
        self.dec3 = nn.ConvTranspose2d(
            in_channels=init_kernel, out_channels=init_kernel*2, kernel_size=kernel_size, 
            stride=stride, padding=padding
        )
        self.dec4 = nn.ConvTranspose2d(
            in_channels=init_kernel*2, out_channels=init_kernel, kernel_size=kernel_size, 
            stride=stride, padding=padding
        )
        self.dec5 = nn.ConvTranspose2d(
            in_channels=init_kernel, out_channels=3, kernel_size=kernel_size, 
            stride=stride, padding=padding
        )
        self.dropout = nn.Dropout()
        
        #self.bn_enc1 = nn.BatchNorm2d(init_kernel*2)
        #self.bn_enc2 = nn.BatchNorm2d(init_kernel)
        
        self.bn_lin1 = nn.BatchNorm1d(250)
        self.bn_lin2 = nn.BatchNorm1d(500)
        self.bn_lin3 = nn.BatchNorm1d(1000)
        self.bn_lin4 = nn.BatchNorm1d(init_kernel*119*119)
        
    def forward(self, z):
        x = self.lin1(z)
        x=F.relu(x)
        x = self.bn_lin1(x)
        x = self.dropout(x)
        x = self.lin2(x)
        x=F.relu(x)
        x = self.bn_lin2(x)
        x = self.dropout(x)
        x = self.lin3(x)
        x=F.relu(x)
        x = self.bn_lin3(x)
        x = self.lin4(x)
        x=F.relu(x)
        x = self.bn_lin4(x)
        
        x=x.view(-1, init_kernel, 119, 119)
        #print("after unflatten:")
        #print(x.shape)
        
        #x = self.dec1(x)
        #x = F.relu(x)
        #x = self.dec2(x)
        #x = F.relu(x)
        x = self.dec3(x)
        x = F.relu(x)
        #x = self.bn_enc1(x)
        x = self.dec4(x)
        x = F.relu(x)
        #x = self.bn_enc2(x)
        x = self.dec5(x)
        reconstruction = torch.sigmoid(x)
        
        return reconstruction
    
    
class ConvVAE(nn.Module):
    def __init__(self, z_dim):
        super(ConvVAE, self).__init__()
        self.encoder = Encoder(z_dim)
        self.decoder = Decoder(z_dim)
    
    def forward(self,x):
        sample, z_mu, z_logvar = self.encoder(x)
        
        #z_mu, z_logvar = self.encoder(x)
        
        #std = torch.exp(z_logvar)
        #eps = torch.randn_like(std) # `randn_like` as we need the same size
        #sample = z_mu + (eps * std) # sampling
        
        reconstruction = self.decoder(sample)
        
        return reconstruction, z_mu, z_logvar

Below should be the optimal parameters

In [None]:
# leanring parameters

#Number of conv layers: 3,4,5

epochs = 50 # 50,100
batch_size = 16 #8,16,32,64?
lr = 0.001 #0.002, 0.003

kernel_size = 4 #3,4,5,6?
stride = 1 #1,2,3,4?
padding = 0 #0,1,2,3,4?
init_kernel = 8 #4,8,16,32 #initial number of filters

latent_dim = 96

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

In [None]:
class ActiveVisionDataset (Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.annotations = pd.read_csv(csv_file, index_col=None)
        self.root_dir = root_dir
        self.transform = transform
        
    def __len__(self):
        return len(self.annotations)
    
    def __getitem__(self,index):
        if type(index) == torch.Tensor:
            index = index.item()
        img_path = os.path.join(self.root_dir, self.annotations.iloc[index, 0])
        image = io.imread(img_path)
        shape_label = torch.tensor(int(self.annotations.iloc[index,1]))
        cam_loc = torch.tensor(ast.literal_eval(self.annotations.iloc[index,2]))
        
        if self.transform:
            image = self.transform(image)
        
        return image, shape_label, cam_loc

In [None]:
train_data = ActiveVisionDataset(csv_file='imgs/TrainSet/rgbCSV.csv', root_dir= 'imgs/TrainSet/rgbImg/', transform = torchvision.transforms.ToTensor())
val_data = ActiveVisionDataset(csv_file='imgs/ValSet/rgbCSV.csv', root_dir= 'imgs/ValSet/rgbImg/', transform = torchvision.transforms.ToTensor())
train_loader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset=val_data, batch_size=batch_size, shuffle=True)

In [None]:
#Which model am i tryna load in huh
PATH = os.path.join(os.getcwd(), "outputs", "4oT800V200BatchNorm64DropoutEpochs500", "4oT800V200BatchNorm64DropoutEpochs500.pth")

ConvVAE = ConvVAE(latent_dim)#.to(device)
ConvVAE.load_state_dict(torch.load(PATH))
ConvVAE.to(device)
ConvVAE.eval()

In [None]:
ConvVAE.eval()

In [None]:
# def generate_latent_vectors(model, dataloader):
#     model.eval()
#     latent = []
#     target = []
#     with torch.no_grad():
#         for i, data in tqdm(enumerate(dataloader), total=int(len(val_data)/dataloader.batch_size)):
#             data, labels = data
#             #if torch.cuda.is_available():
#             #    data = data.to(device)
#             z, mu, logvar = model.encoder(data.cuda())
#             latent.extend(mu)
#             target.extend(labels)
#         return latent, target

In [None]:

#train_latent_var, train_target = generate_latent_vectors(ConvVAE, train_loader)
#test_latent_var, test_target = generate_latent_vectors(ConvVAE, val_loader)

# New Stuff

In [None]:
class CombNet(nn.Module):
    def __init__(self, z_dim):
        super(CombNet, self).__init__()
        
        #Combined fully conntected layer
                
        self.linear1 = nn.Linear(z_dim+3, 150)
        self.linear2 = nn.Linear(150,300)
        self.linear3 = nn.Linear(300,z_dim)
        
        
        #Img decoding
        
        self.lin1 = nn.Linear(z_dim, init_kernel*119*119)
        '''
        self.dec1 = nn.ConvTranspose2d(
            in_channels=init_kernel, out_channels=init_kernel*8, kernel_size=kernel_size, 
            stride=stride, padding=padding
        )
        self.dec2 = nn.ConvTranspose2d(
            in_channels=init_kernel*8, out_channels=init_kernel*4, kernel_size=kernel_size, 
            stride=stride, padding=padding
        )
        '''
        self.dec3 = nn.ConvTranspose2d(
            in_channels=init_kernel, out_channels=init_kernel*2, kernel_size=kernel_size, 
            stride=stride, padding=padding
        )
        self.dec4 = nn.ConvTranspose2d(
            in_channels=init_kernel*2, out_channels=init_kernel, kernel_size=kernel_size, 
            stride=stride, padding=padding
        )
        self.dec5 = nn.ConvTranspose2d(
            in_channels=init_kernel, out_channels=3, kernel_size=kernel_size, 
            stride=stride, padding=padding
        )
        
        #Polar Coord decoding
        self.coord1 = nn.Linear(z_dim, 50)
        self.coord2 = nn.Linear(50,10)
        self.coord3 = nn.Linear(10,3)
    
    def forward(self,img,coord):
        
        img, mu, log_var = ConvVAE.encoder(img)
        
        #reshape in some way, maybe expansion?
        z = torch.cat((img, coord), dim=1) #Maybe 0!!!!!!!!!!
        z = self.linear1(z)
        z = self.linear2(z)
        z = self.linear3(z) #Doesn't have to go back to z_dim I was just feeling it 

        ##feed new latent vector into deocders that try to get the original things
        
        #IMG time
        #------------------------------------------------
        img = self.lin1(z)
        img=F.relu(img)
        
        img=img.view(-1, init_kernel, 119, 119)
        #print("after unflatten:")
        #print(x.shape)
        
        #x = self.dec1(x)
        #x = F.relu(x)
        #x = self.dec2(x)
        #x = F.relu(x)
        img = self.dec3(img)
        img = F.relu(img)
        img = self.dec4(img)
        img = F.relu(img)
        img = self.dec5(img)
        reconstruction_img = torch.sigmoid(img)
        #------------------------------------------------
        
        #Coord Time
        #------------------------------------------------
        coord = self.coord1(z)
        coord = F.relu(coord)
        coord = self.coord2(coord)
        coord = F.relu(coord)
        coord = self.coord3(coord)
        reconstruction_coord = torch.sigmoid(coord)
        
        #------------------------------------------------
        
        
        # decode reshaped vector into full image
        # decode reshaped vector into polar coords
        
        return reconstruction_img, reconstruction_coord, z
        
#How to do loss?
# % loss per epoch?


# Copied straight from original. Need to mess with them a bit to make work for this!!!

In [None]:
def final_loss(bce_loss, mu, logvar):
    """
    This function will add the reconstruction loss (BCELoss) and the 
    KL-Divergence.
    KL-Divergence = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    :param bce_loss: recontruction loss
    :param mu: the mean from the latent vector
    :param logvar: log variance from the latent vector
    """
    BCE = bce_loss 
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return BCE + KLD

In [None]:
def fit(model, dataloader):
    model.train()
    running_loss = 0.0
    for i, data in tqdm(enumerate(dataloader), total=int(len(train_data)/dataloader.batch_size)):
        #print(data)
        img, _, coord = data
        if torch.cuda.is_available():
            img = img.to(device)
            coord = coord.to(device)
        optimizer.zero_grad()
        #z, mu, logvar = ConvVAE.encoder(img.cuda())
        
        reconstructed_img, reconstructed_coord, z = model(img, coord)
        
        bce_loss_img = criterion(reconstructed_img, img)
        bce_loss_coord = criterion(reconstructed_coord,coord)
        loss = bce_loss_img + bce_loss_coord #final_loss(bce_loss, mu, logvar)
        running_loss += loss.item()
        loss.backward()
        optimizer.step()
    train_loss = running_loss/len(dataloader.dataset)
    return train_loss

In [None]:
def validate(model, dataloader):
    model.eval()
    running_loss = 0.0
    with torch.no_grad():
        for i, data in tqdm(enumerate(dataloader), total=int(len(val_data)/dataloader.batch_size)):
            img, _, coord = data
            if torch.cuda.is_available():
                img = img.to(device)
                coord = coord.to(device)
            #z, mu, logvar = ConvVAE.encoder(img.cuda())
        
            reconstructed_img, reconstructed_coord, z = model(img, coord)
        
            bce_loss_img = criterion(reconstructed_img, img)
            bce_loss_coord = criterion(reconstructed_coord,coord)
            loss = bce_loss_img + bce_loss_coord #final_loss(bce_loss, mu, logvar)
            running_loss += loss.item()
        
            # save the last batch input and output of every epoch
            if i == int(len(val_data)/dataloader.batch_size) - 1:
                num_rows = 8
                both = torch.cat((img.view(batch_size, 3, 128, 128)[:8], 
                                  reconstructed_img.view(batch_size, 3, 128, 128)[:8]))
                save_image(both.cpu(), f"outputs/{parameter}{value}/imgs/output{epoch}.png", nrow=num_rows)
                wr.writerow([coord, reconstructed_coord])
    val_loss = running_loss/len(dataloader.dataset)
    return val_loss

### New Parameters for NEW model!

In [None]:
epochs = 50 # 50,100
batch_size = 16 #8,16,32,64?
lr = 0.001 #0.002, 0.003

kernel_size = 4 #3,4,5,6?
stride = 1 #1,2,3,4?
padding = 0 #0,1,2,3,4?
init_kernel = 8 #4,8,16,32 #initial number of filters

latent_dim = 96

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

In [None]:
model = CombNet(latent_dim).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.MSELoss(reduction='sum')

parameter = 'testMultiOut'
value = 2
os.makedirs("outputs/"+parameter+str(value), exist_ok=True)
os.makedirs("outputs/"+parameter+str(value)+"/imgs", exist_ok=True)
#parameters = ['layers']
#values = ['3,4,5,6,7']

#for parameter in parameters:
    #for value in values:

        #Create a folder here
        #os.makedirs(parameter+value, exist_ok=True)

f = open("outputs/"+parameter+str(value)+"/reconstruction.csv","w")
wr = csv.writer(f)
wr.writerow(["Original", "Reconstructed"])
value = str(value)

latent = []
target = []
train_loss = []
val_loss = []
for epoch in range(epochs):
    print(f"Epoch {epoch+1} of {epochs}")
    train_epoch_loss = fit(model, train_loader)
    val_epoch_loss = validate(model, val_loader)
    train_loss.append(train_epoch_loss)
    val_loss.append(val_epoch_loss)
    print(f"Train Loss: {train_epoch_loss:.4f}")
    print(f"Val Loss: {val_epoch_loss:.4f}")
f.close()
filepath = os.path.join(os.getcwd(), "outputs", parameter+str(value), parameter+str(value)+".pth")
torch.save(model.state_dict(), filepath)

plt.figure(figsize=(10,10))
plt.plot(range(1,epochs+1), train_loss, label="Train Loss")
plt.plot(range(1,epochs+1), val_loss, label="Validation Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.yticks(np.arange(int(math.floor(min(train_loss) / 100.0)) * 100, max(train_loss)+1, 1000))
plt.savefig('outputs/'+parameter+value+'/loss'+parameter+value+'.png')

with open('outputs/'+parameter+value+'/loss'+parameter+value+'.csv','w', newline='') as f:
    wr = csv.writer(f)
    wr.writerow(["Train loss", "Val loss"])
    wr.writerows(zip(train_loss, val_loss))

with open('outputs/lossCompare.csv', 'a+', newline='') as f:
    wr = csv.writer(f)
    wr.writerow([parameter, value ,train_loss[-1], val_loss[-1]])

In [None]:
plt.figure(figsize=(10,10))
plt.plot(range(1,epochs+1), train_loss, label="Train Loss")
plt.plot(range(1,epochs+1), val_loss, label="Validation Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.yticks(np.arange(int(math.floor(min(train_loss) / 100.0)) * 100, max(train_loss)+1, 1000))
plt.savefig('outputs/'+parameter+value+'/loss'+parameter+value+'.png')

In [None]:
with open('outputs/'+parameter+value+'/loss'+parameter+value+'.csv','w', newline='') as f:
    wr = csv.writer(f)
    wr.writerow(["Train loss", "Val loss"])
    wr.writerows(zip(train_loss, val_loss))

with open('outputs/lossCompare.csv', 'a+', newline='') as f:
    wr = csv.writer(f)
    wr.writerow([parameter, value ,train_loss[-1], val_loss[-1]])

# Testing

In [None]:
PATH = os.path.join(os.getcwd(), "outputs", parameter+str(value), parameter+str(value)+".pth")

TestVAE = CombNet(latent_dim)#.to(device)
TestVAE.load_state_dict(torch.load(PATH))
TestVAE.to(device)

In [None]:
def generate_latent_vectors(model, dataloader):
    model.eval()
    latent = []
    target = []
    with torch.no_grad():
        for i, data in tqdm(enumerate(dataloader), total=int(len(val_data)/dataloader.batch_size)):
            data, label, coord = data
            #if torch.cuda.is_available():
            #    data = data.to(device)
            z, mu, logvar = model.encoder(data.cuda())
            latent.extend(mu.cpu().detach().numpy())
            target.extend(label.numpy())
        return latent, target

In [None]:
test_latent_var, test_target = generate_latent_vectors(TestVAE, val_loader)

In [None]:
from sklearn.manifold import TSNE
from mpl_toolkits.mplot3d import Axes3D
%matplotlib qt
from IPython import display
import matplotlib.cm as cmx
import matplotlib.colors as colors


import plotly.express as px
import plotly.io as pio

In [None]:
latent = np.array(test_latent_var)
target = np.array(test_target)
tsne = TSNE(n_components=2, init="pca", random_state=0)

X = tsne.fit_transform(latent)

data = np.vstack((X.T, target)).T
df = pd.DataFrame(data=data, columns=["z1", "z2", "label"])
df["label"] = df["label"].astype(str)

fig = px.scatter(df, x="z1", y="z2", color="label")

pio.write_html(fig, file='outputs/'+parameter+value+'/plot'+parameter+value+'.html', auto_open=True)
#pio.write_html(fig, file='raw.html', auto_open=True)

## Helper Code

In [None]:
rrr

In [None]:
#print(list(train_data))
#type(train_loader)
#print(train_data.shape)

for i, (images, labels, coords) in enumerate(train_loader):
    print(images.shape)
#    print(labels.shape)
    #print(coords)