In [1]:
import torchvision.models as models
import torch
from torchvision import transforms
from PIL import Image
from torch.nn.utils.rnn import *
from torch import nn
from torch.utils import data
import time
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
GPU = torch.cuda.is_available()
device = torch.device("cuda" if GPU else "cpu")
cuda = torch.cuda.is_available()
num_workers = 4 if cuda else 0
GPU

True

In [3]:
torch.cuda.empty_cache()

### Data Preprocessing

In [4]:
X = []
Y = []
dataType = "center"
numDataPointsWanted = 3000

transform=transforms.ToTensor()
root_path = "./"
with open(root_path + "labels.txt") as f:
    start = time.time()
    i = 0
    for line in f:
        [filename, radians] = line.split(' ')
        
        if filename[0:4] == "left" and dataType == "left":
            with Image.open(root_path + filename) as img:
                X.append(transform(img.convert('RGB')))
            Y.append(((float(radians) * 180.0)/np.pi))
            i += 1
            
        if filename[0:5] == "right" and dataType == "right":
            with Image.open(root_path + filename) as img:
                X.append(transform(img.convert('RGB')))
            Y.append(((float(radians) * 180.0)/np.pi))
            i += 1
                
        if filename[0:6] == "center" and dataType == "center":
            with Image.open(root_path + filename) as img:
                X.append(transform(img.convert('RGB')))
            Y.append(((float(radians) * 180.0)/np.pi))
            i += 1
            
        if i == numDataPointsWanted: break
    end = time.time()
    
print(end - start, "seconds")
print('Images: ', len(X))

58.15475082397461 seconds
Images:  3000


In [5]:
class Dataset(data.Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y
        
    def __len__(self):
        return len(self.X)

    def __getitem__(self, index):
        return self.X[index], self.Y[index]

In [6]:
all_set = Dataset(X, Y)

all_dataloader = data.DataLoader(all_set, batch_size=1, shuffle=False, num_workers=num_workers)

In [7]:
resnet50 = models.resnet152(pretrained=True).to(device)

In [8]:
resnet50 = torch.nn.Sequential(*(list(resnet50.children())[:-1]))
print(resnet50)

Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)


### Run ResNet and Create Feature Lists

In [9]:
allfeaturesList = []

start = time.time()
for i, (x, _) in enumerate(all_dataloader):
    x = x.float().to(device)
    allfeaturesList.append((resnet50(x).squeeze(0).cpu().detach().numpy())[:, 0, 0])
    del x
    torch.cuda.empty_cache()
end = time.time()

print(end - start, "seconds")
print(len(allfeaturesList))

276.15412735939026 seconds
3000


In [13]:
import torch.nn.functional as F 
valL = []
for j in range(1, len(allfeaturesList)):
    image_1 = torch.tensor(allfeaturesList[j - 1])
    image_2 = torch.tensor(allfeaturesList[j])
    val = F.cosine_similarity(image_1, image_2, dim=0)
    valL.append(val)
    
print(valL[0])
print(np.mean(np.array(valL)))

tensor(0.9990)
0.9990827


# Feature Dataset

In [36]:
import random

BATCH_SIZE = 32
input_size = 2048
context_size = 15

newX = []
newY = []
start = time.time()
for i in range(context_size, len(allfeaturesList)):
    x = [torch.tensor(arr) for arr in allfeaturesList[i-context_size:i]]
    newX.append(torch.stack(x))
    newY.append(torch.tensor(Y[i-context_size:i]))

for i in range(len(newX)-1, 0, -1): 
      
    # Pick a random index from 0 to i  
    j = random.randint(0, i + 1)  
    
    # Swap arr[i] with the element at random index  
    newX[i], newX[j] = newX[j], newX[i]  
    newY[i], newY[j] = newY[j], newY[i]  
    
print(newX[0].shape)
print(newY[0].shape)
train_X, val_X, train_Y, val_Y = train_test_split(newX, newY, test_size=0.30)
end = time.time()
print(end - start, "seconds")

torch.Size([15, 2048])
torch.Size([15])
0.5655293464660645 seconds


In [37]:
class Dataset_Model(data.Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, index):
        return self.X[index], self.Y[index]
    
train_set = Dataset_Model(train_X, train_Y)
train_dataloader = data.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers)
val_set = Dataset_Model(val_X, val_Y)
val_dataloader = data.DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers)

In [38]:
print(len(train_set))
print(len(val_set))

print(train_set.__getitem__(0)[0].shape, train_set.__getitem__(0)[1].shape)

2089
896
torch.Size([15, 2048]) torch.Size([15])


# RNN model

In [39]:
class RNN(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers):
        super(RNN, self).__init__()
        
        self.hidden_dim = hidden_dim
        self.rnn = nn.LSTM(input_size, hidden_dim, n_layers, batch_first=True, bidirectional=True)
        self.fc1 = nn.Linear(hidden_dim * 2, hidden_dim * 4)
        self.fc2 = nn.Linear(hidden_dim * 4, output_size)

    def forward(self, x):
        output, hidden = self.rnn(x)
        output = self.fc1(output)
        output = self.fc2(output)
        return output, hidden

In [40]:
hidden_units = 512 #We should keep this large
f_model = RNN(input_size, 1, hidden_units, 3).to(device)
optimizer = torch.optim.Adam(f_model.parameters(), lr=0.01)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2)
criterion = nn.L1Loss()

print(f_model)

RNN(
  (rnn): LSTM(2048, 512, num_layers=3, batch_first=True, bidirectional=True)
  (fc1): Linear(in_features=1024, out_features=2048, bias=True)
  (fc2): Linear(in_features=2048, out_features=1, bias=True)
)


In [41]:
def train(num_epochs, model, save):
    EPOCH_TRAIN_LOSSES = []
    EPOCH_VAL_LOSSES = []
    EPOCH_TRAIN_ACC = []
    EPOCH_VAL_ACC = []
    
    for epoch in range(num_epochs):
        start = time.time()

        running_train_loss = 0.0
        running_val_loss = 0.0
        train_correct = 0.0
        val_correct = 0.0
        train_total = 0.0
        val_total = 0.0

        model.train()
        for i, (x, label) in enumerate(train_dataloader):
            x, label = x.to(device), label.to(device)
            optimizer.zero_grad()
            y, hidden = model(x.float())
            
            y = y.squeeze(2)
            loss = criterion(y, label)
            loss.backward()
            optimizer.step()

            running_train_loss += loss.item()
            predicted = y            
            train_correct += torch.sum(torch.abs(predicted - label) < 0.01).item()
            train_total += label.size(0)
            
            del x
            del label

        model.eval()
        with torch.no_grad():
            for i, (x, label) in enumerate(val_dataloader):
                x, label = x.to(device), label.to(device)
                y, _ = model(x.float())
                
                y = y.squeeze(2)
                loss = criterion(y, label)

                running_val_loss += loss.item()
                predicted = y
                val_correct += torch.sum(torch.abs(predicted - label) < 0.01).item()
                val_total += label.size(0)

        train_acc = (train_correct / (train_total*context_size))*100
        val_acc = (val_correct / (val_total*context_size))*100
        tloss = running_train_loss / len(train_dataloader)
        vloss = running_val_loss/ len(val_dataloader)

        print("Epoch", epoch, " Took", int(time.time() - start), "s")
        print("Train Acc:", train_acc, "Val Acc:", val_acc)
        print("Avg Train Loss:", tloss, "Avg Val Loss:", vloss)

        scheduler.step(tloss)

        EPOCH_TRAIN_LOSSES.append(tloss)
        EPOCH_VAL_LOSSES.append(vloss)
        EPOCH_TRAIN_ACC.append(train_acc)
        EPOCH_VAL_ACC.append(val_acc)

        if save:
            torch.save(model.state_dict(), './model_' + str(epoch + 1) + '_' + str(val_acc) + '.pt')
            
    return EPOCH_TRAIN_LOSSES, EPOCH_VAL_LOSSES, EPOCH_TRAIN_ACC, EPOCH_VAL_ACC

In [None]:
n_epochs = 9
t_l, v_l, t_a, v_a = train(n_epochs, f_model, save=False)

Epoch 0  Took 4 s
Train Acc: 0.3318972395085368 Val Acc: 0.05952380952380953
Avg Train Loss: 4.8677773511771 Avg Val Loss: 2.7064641756670818
Epoch 1  Took 4 s
Train Acc: 0.3318972395085368 Val Acc: 0.022321428571428572
Avg Train Loss: 2.724752431566065 Avg Val Loss: 2.8379004299640656
Epoch 2  Took 4 s
Train Acc: 0.44678474549226105 Val Acc: 0.1636904761904762
Avg Train Loss: 2.721789345596776 Avg Val Loss: 2.7484794003622874
Epoch 3  Took 4 s
Train Acc: 0.5712462103079624 Val Acc: 0.3794642857142857
Avg Train Loss: 2.6889385671326607 Avg Val Loss: 2.6706149705818722
Epoch 4  Took 4 s
Train Acc: 0.6925163555130046 Val Acc: 0.17113095238095238
Avg Train Loss: 2.658481164412065 Avg Val Loss: 2.67294704914093
Epoch 5  Took 4 s
Train Acc: 0.6733684378490505 Val Acc: 3.6309523809523814
Avg Train Loss: 2.6413171634529578 Avg Val Loss: 2.6680411185537065
Epoch 6  Took 4 s
Train Acc: 0.864847614488591 Val Acc: 0.044642857142857144
Avg Train Loss: 2.640387930653312 Avg Val Loss: 2.672638748373

In [None]:
import matplotlib.pyplot as plt

x = np.arange(n_epochs)

rangeVi = 2
rangeVf = 9
plt.plot(x[rangeVi:rangeVf], t_l[rangeVi:rangeVf], label="Train")
plt.plot(x[rangeVi:rangeVf], v_l[rangeVi:rangeVf], label="Validation")
plt.xlabel("Number of Epochs")
plt.ylabel("Loss")
plt.title("Epochs vs Loss")
plt.legend()
plt.savefig('RNNLossFinal_ResNet_34.png')
plt.show()

plt.plot(x, t_a, label="Train")
plt.plot(x, v_a, label="Validation")
plt.xlabel("Number of Epochs")
plt.ylabel("Accuracy")
plt.title("Epochs vs Accuracy")
plt.legend()
plt.savefig('RNNAccuracyFinal.png')
plt.show()