In [1]:
## This file has an autoencoder implementation
## Author : Avadesh Meduri
## Date : 20/04/2022

from matplotlib import pyplot as plt
import numpy as np

from torch.utils.data import Dataset
import torch
import torchvision
from torchvision import transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils import data
from torchvision.io import read_image
from torch.utils.data import DataLoader, RandomSampler, Sampler
import time
from PIL import Image
from skimage.io import imread
import numba
from torchvision.transforms import ToTensor, ToPILImage, Resize

In [2]:
class BoxDataSet(Dataset):
    
    def __init__(self, fnames, rgbd = True, resize = (224,224)):
        
        self.rgbd = rgbd
        self.resize = resize
        self.y_len = [0]
        self.img_dir = []
        for i in range(len(fnames)):
            self.img_dir.append("./image_data/data" + str(fnames[i]))
            self.data = np.load("./position_data/data" + str(fnames[i]) + ".npz")
            if i == 0:
                self.y_train = torch.tensor(self.data["position"]).float()
                self.y_len.append(len(self.data["position"])-1)
                
            else:
                self.y_train = torch.vstack((self.y_train, torch.tensor(self.data["position"]).float()))
                self.y_len.append(self.y_len[-1] + len(self.data["position"]))
                        
    def __len__(self):
        return len(self.y_train)
    
    def __getitem__(self, gidx):
        
        
        b_idx = max(np.searchsorted(self.y_len, gidx)-1,0) # which dir to look into
        idx = max(gidx - self.y_len[b_idx] - 1,0) # relative idx
        
        image = ToTensor()(imread(self.img_dir[b_idx] + "/color_" + str(idx) + ".jpg"))
        if self.rgbd:
            d_image = ToTensor()(imread(self.img_dir[b_idx] + "/depth_" + str(idx) + ".jpg"))
            image = torch.vstack((image, d_image))
        else:
            image = transforms.Resize(self.resize)(image)
        
        label = self.y_train[gidx]
        
        return image.float(), label
                
    
    def get_data(self, gidx):
        
        
        b_idx = max(np.searchsorted(self.y_len, gidx)-1,0) # which dir to look into
        idx = max(gidx - self.y_len[b_idx] - 1,0) # relative idx
        
        image = ToTensor()(imread(self.img_dir[b_idx] + "/color_" + str(idx) + ".jpg"))
        if self.rgbd:
            d_image = ToTensor()(imread(self.img_dir[b_idx] + "/depth_" + str(idx) + ".jpg"))
            image = torch.vstack((image, d_image))
        else:
            image = transforms.Resize(self.resize)(image)
            
        label = self.y_train[gidx]
        
        return image.float()[None,:,:,:], label



In [16]:
class Encoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(4, 64, 3, padding = (1,1))
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(64, 32, 3, padding = (1,1))
        self.conv3 = nn.Conv2d(32, 16, 3, padding = (1,1))
        
        self.deconv1 = nn.Conv2d(16, 32, 2, stride=2)
        self.deconv2 = nn.Conv2d(32, 64, 2, stride=2)
        self.deconv3 = nn.Conv2d(64, 4, 2, stride=2)
        
        
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        print(x.shape)
        x = F.relu(self.deconv1(x))
        print(x.shape)
        x = F.relu(self.deconv2(x))
        print(x.shape)
        x = self.deconv3(x)
        
        return x

In [18]:
device = torch.device("cuda")
net = Encoder()
# net.load_state_dict(torch.load("./models/cnn1"))
net = net.to(device)
lr = 1.5e-4
eps = 1000
indices = [1,2,3,4,5]
num_data = len(indices)
start = 1
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
loss = torch.nn.MSELoss() #torch.nn.MSELoss() #torch.nn.HuberLoss()
dl_arr = read_data(indices)

loading 1loading 2loading 3loading 4loading 5

In [19]:
for i in range(eps):
    for x_train_batch, y_train_batch in dl_arr[np.random.randint(num_data)]:
        x_train_gpu = x_train_batch.to(device)
        y_train_gpu = y_train_batch.to(device)
        y_pred = net(x_train_gpu)
        print(y_pred.shape)
        error = loss(y_pred, y_train_gpu) 
        print("The iteration number : " + str(i) + " The loss is :" + str(error.cpu().detach().numpy()), end='\r', flush  = True)
        optimizer.zero_grad()
        error.backward()
        optimizer.step()
        
    if i % 10 == 0:
        torch.save(net.state_dict(), "./models/enc1")
        
torch.save(net.state_dict(), "./models/enc1")  

torch.Size([32, 16, 30, 53])
torch.Size([32, 32, 15, 26])
torch.Size([32, 64, 7, 13])
torch.Size([32, 4, 3, 6])


RuntimeError: The size of tensor a (6) must match the size of tensor b (424) at non-singleton dimension 3