In [3]:
from fastai.vision import *
from glob import glob
from matplotlib import pyplot as plt
import cv2
import random
import numpy as np
from numpy.linalg import inv
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
import torch.optim as optim
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 

In [4]:
class Model(nn.Module):
    def __init__(self):
        super(Model,self).__init__()
        self.layer1 = nn.Sequential(nn.Conv2d(2,64,3,padding=1),
                                    nn.BatchNorm2d(64),
                                    nn.ReLU())
                                    
        self.layer2 = nn.Sequential(nn.Conv2d(64,64,3,padding=1),
                                    nn.BatchNorm2d(64),
                                    nn.ReLU(),
                                    nn.MaxPool2d(2))
        self.layer3 = nn.Sequential(nn.Conv2d(64,64,3,padding=1),
                                    nn.BatchNorm2d(64),
                                    nn.ReLU())
        self.layer4 = nn.Sequential(nn.Conv2d(64,64,3,padding=1),
                                    nn.BatchNorm2d(64),
                                    nn.ReLU(),
                                    nn.MaxPool2d(2))
        self.layer5 = nn.Sequential(nn.Conv2d(64,128,3,padding=1),
                                    nn.BatchNorm2d(128),
                                    nn.ReLU())        
        self.layer6 = nn.Sequential(nn.Conv2d(128,128,3,padding=1),
                                    nn.BatchNorm2d(128),
                                    nn.ReLU(),
                                    nn.MaxPool2d(2))
        self.layer7 = nn.Sequential(nn.Conv2d(128,128,3,padding=1),
                                    nn.BatchNorm2d(128),
                                    nn.ReLU())
        self.layer8 = nn.Sequential(nn.Conv2d(128,128,3,padding=1),
                                    nn.BatchNorm2d(128),
                                    nn.ReLU())
        self.fc1 = nn.Linear(128*16*16,1024)
        self.fc2 = nn.Linear(1024,8)
        
    def forward(self,x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = self.layer6(out)
        out = self.layer7(out)
        out = self.layer8(out)
        out = out.view(-1,128* 16* 16)
        out = self.fc1(out)
        out = self.fc2(out)
        return out



In [5]:
# Creating a customized dataset class in pytorch
 
class CocoDdataset(Dataset):
    def __init__(self,path):
        X=()
        Y=()
        lst = os.listdir(path)
        it=0
        for i in lst:
            array = np.load(path+'%s'%i)
            x = torch.from_numpy((array[0].astype(float)-127.5)/127.5)
            X = X+(x,)
            y = torch.from_numpy(array[1].astype(float) / 32.)
            Y = Y+(y,)
            it+=1
        self.len = it
        self.X_data = X
        self.Y_data = Y
    def __getitem__(self,index):
        return self.X_data[index], self.Y_data[index] 
    def __len__(self):
        return self.len
    



train_path = '/home/jupyter/train2017/train2017processed/'
validation_path = '/home/jupyter/val2017/val2017processed/'
test_path = '/home/jupyter/test2017/test2017processed/'

TrainingData = CocoDdataset(train_path)
ValidationData = CocoDdataset(validation_path)
TestData = CocoDdataset(test_path)


In [None]:
batch_size = 2
for i,(images, target) in enumerate(ValidationData):
    images = images.permute(2,0,1).float()
    target = target
print(target.float(),target.flatten(),target.view(-1,8))
plt.imshow(images[0,:,:])
plt.show()
plt.imshow(images[1,:,:])
plt.show()

In [6]:
batch_size = 64
TrainLoader = DataLoader(TrainingData,batch_size)
ValidationLoader = DataLoader(ValidationData,batch_size)
TestLoader = DataLoader(TestData,batch_size)
criterion = nn.MSELoss()
num_samples = 118287
total_iteration = 90000
steps_per_epoch = num_samples / batch_size
epochs = int(total_iteration / steps_per_epoch)
model = Model().to(device)
summary(model,(2,128,128))
optimizer = optim.SGD(model.parameters(),lr=0.005, momentum=0.9)
for epoch in range(epochs):
    
    for i, (images, target) in enumerate(TrainLoader):
        optimizer.zero_grad()
        images = images.to(device); target = target.to(device)
        images = images.permute(0,3,1,2).float(); target = target.float()
        outputs = model(images)
        loss = criterion(outputs, target.view(-1,8))
        loss.backward()
        optimizer.step()
        if (i+1) % len(TrainLoader) == 0:
            print('Train Epoch: [{}/{}] [{}/{} ({:.0f}%)]\Mean Squared Error: {:.6f}'.format(
                epoch+1,epochs, i , len(TrainLoader),
                100. * i / len(TrainLoader), loss))

state = {'epoch': epochs, 'state_dict': model.state_dict(),
             'optimizer': optimizer.state_dict() }
torch.save(state, 'DeepHomographyEstimation.pth')



----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 128, 128]           1,216
       BatchNorm2d-2         [-1, 64, 128, 128]             128
              ReLU-3         [-1, 64, 128, 128]               0
            Conv2d-4         [-1, 64, 128, 128]          36,928
       BatchNorm2d-5         [-1, 64, 128, 128]             128
              ReLU-6         [-1, 64, 128, 128]               0
         MaxPool2d-7           [-1, 64, 64, 64]               0
            Conv2d-8           [-1, 64, 64, 64]          36,928
       BatchNorm2d-9           [-1, 64, 64, 64]             128
             ReLU-10           [-1, 64, 64, 64]               0
           Conv2d-11           [-1, 64, 64, 64]          36,928
      BatchNorm2d-12           [-1, 64, 64, 64]             128
             ReLU-13           [-1, 64, 64, 64]               0
        MaxPool2d-14           [-1, 64,

In [None]:
model.eval()
with torch.no_grad():
    for i,(images, target) in enumerate(ValidationLoader):
        images = images.to(device)
        target = target.to(device)
        images = images.permute(0,3,1,2).float()
        target = target.float()
        outputs = model(images)
        loss = criterion(outputs, target.view(-1,8))
        print('\Mean Squared Error: {:.6f}'.format(loss))