In [1]:
from google.colab import drive
import numpy as np
import os
import cv2
import tensorflow as tf
import torch.nn as nn
import torch
import itertools 

from torch.utils.data import DataLoader
from torchvision import transforms
from torch.utils.data import TensorDataset, DataLoader

drive.mount('/content/gdrive/')

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).


In [2]:
#orignal model
class baseline_model(nn.Module):
  def __init__(self):
    super().__init__()

    # (3, 1280, 720)
    # output size = int((input_size + 2*padding - filter)/stride) + 1
    # number of filter = input_channel
    layers = [
        nn.Conv2d(3, 16, 5, padding=2),  # (16,1280,720)
        nn.BatchNorm2d(16),
        nn.ReLU(),         
        nn.MaxPool2d((4,3)),  # (16, 320, 240)
        nn.Dropout2d(),

        nn.Conv2d(16, 32, 5, padding = 2), # (32, 320, 240)
        nn.ReLU(),
        nn.Conv2d(32, 32, 3, padding=1),        
        nn.BatchNorm2d(32),
        nn.ReLU(),        
        nn.MaxPool2d((4,3)), # (32, 80, 80)
        nn.Dropout(),
        
        nn.Conv2d(32, 64, 3, padding = 1),  # (64, 80, 80)
        nn.ReLU(),
        nn.Conv2d(64, 64, 3, padding=1),        
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.MaxPool2d(2),    # (64, 40, 40)  
        nn.Dropout(),
        
        nn.Conv2d(64, 128, 3, padding = 1),  # (128, 40, 40)
        nn.ReLU(),
        nn.Conv2d(128, 128, 3, padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU(),
        nn.MaxPool2d(2),    # (128, 20, 20)
        nn.Dropout(),

        nn.Conv2d(128, 256, 3, padding = 1),  # (256, 20, 20)
        nn.ReLU(),
        nn.Conv2d(256, 256, 3, padding=1),
        nn.BatchNorm2d(256),
        nn.ReLU(),
        nn.MaxPool2d(2),   # (256, 10, 10)
        nn.Dropout(),

        nn.Conv2d(256, 512, 3, padding = 1),  # (512, 10, 10)
        nn.ReLU(),
        nn.Conv2d(512, 512, 3, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU(),
        nn.Dropout(),
        nn.MaxPool2d(2)  # (512, 5, 5)
    ]

    self.layers = nn.ModuleList(layers)
    self.linear1 = nn.Linear(512*5*5, 512) 
    self.relu = nn.ReLU()
    self.linear2 = nn.Linear(512, 136)

  def forward(self, x):
    temp = None
    h = x
    for i in range(len(self.layers)):
      if (i % 7 == 0 and i!=0):
        temp = h
        h = self.layers[i](h) + temp
      else :
        h = self.layers[i](h)
    h = self.linear1(h.reshape(-1,512*25))
    h = self.relu(h)
    h = self.linear2(h)
    return h

In [3]:
#import parameters
path_m = "/content/gdrive/My Drive/ESTsoft Internship/model_try6.pt"
path_mns = "/content/gdrive/My Drive/ESTsoft Internship/model_new_standard_try6.pt"
model = baseline_model()
model_new_standard = baseline_model()

model.load_state_dict(torch.load(path_m))
model_new_standard.load_state_dict(torch.load(path_mns))

model.cuda()
model_new_standard.cuda()

baseline_model(
  (layers): ModuleList(
    (0): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=(4, 3), stride=(4, 3), padding=0, dilation=1, ceil_mode=False)
    (4): Dropout2d(p=0.5, inplace=False)
    (5): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (6): ReLU()
    (7): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU()
    (10): MaxPool2d(kernel_size=(4, 3), stride=(4, 3), padding=0, dilation=1, ceil_mode=False)
    (11): Dropout(p=0.5, inplace=False)
    (12): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU()
    (14): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_r

In [4]:
#load test data
def load_data(dir, num):
  img_data = []
  annot_data = []
  for i in range(num):  
    frame_num = sorted(os.listdir(dir+"/frames"), key=lambda x: int(x[5:-4]))[i] #iterate over frames file for img
    img_path = os.path.join(dir+"/frames",frame_num)
    image = cv2.imread(img_path, cv2.IMREAD_COLOR)
    img_data.append(image.T)

    annot_num = sorted(os.listdir(dir+"/annot"), key=lambda x: int(x[:6]))[i] #iterate over annot file for coords
    annot_path = os.path.join(dir+"/annot",annot_num)
    annot = np.loadtxt(annot_path, comments=("version:", "n_points:", "{", "}"))

    annot_data.append(annot.flatten())
  return img_data, annot_data  

In [5]:
#get data for testing
test_data = "/content/gdrive/My Drive/ESTsoft Internship//300VW_Dataset_2015_12_14/test1/548"

X_test, y_test = load_data(test_data, 100)

In [7]:
#data
X_test_np = np.stack(X_test)
y_test_np = np.stack(y_test)

X_test_np = torch.tensor(X_test_np)
y_test_np = torch.tensor(y_test_np)

test_dataset = TensorDataset(X_test_np, y_test_np)
test_dataloader = DataLoader(test_dataset)

criterion = nn.MSELoss()

In [42]:
first_frame_points = []
second_frame_points = []

In [43]:
#Evaluation use MSE
model.eval()

with torch.no_grad():
  loss = []
  first = True
  for batch_idx, (inputs, target) in enumerate(test_dataloader):
    inputs = inputs.float()
    inputs = inputs.cuda()
    target = target.type(torch.FloatTensor)
    target = target.cuda()
    out = model(inputs)
    result = criterion(out, target)
    loss.append(result.item())
    if first:
      first_frame_points.append(out)
      first = False
    elif second:
      second_frame_points.append(out)
      second = False
  print("Model MSE: %f" %(sum(loss)/len(loss)))

Model MSE: 76438.208672


In [44]:
model_new_standard.eval()

with torch.no_grad():
  loss = []
  first = True
  second = True
  for batch_idx, (inputs, target) in enumerate(test_dataloader):
    inputs = inputs.float()
    inputs = inputs.cuda()
    target = target.type(torch.FloatTensor)
    target = target.cuda()
    out = model_new_standard(inputs)
    result = criterion(out, target)
    loss.append(result.item())
    if first:
      first_frame_points.append(out)
      first = False
    elif second:
      second_frame_points.append(out)
      second = False
  print("Model with Inidicator MSE: %f" %(sum(loss)/len(loss)))

Model with Inidicator MSE: 84739.324531


In [53]:
first = first_frame_points[0].cpu().numpy()
second = second_frame_points[0].cpu().numpy()

model_frame = [first, second]

In [54]:
first = first_frame_points[1].cpu().numpy()
second = second_frame_points[1].cpu().numpy()

indicator_frame = [first, second]

In [55]:
model_frame

[array([[393.63138 , 130.45793 , 396.25406 , 147.71014 , 396.5449  ,
         163.9221  , 399.53906 , 180.64304 , 402.41916 , 191.95847 ,
         409.66568 , 211.37068 , 424.10828 , 222.27478 , 438.0147  ,
         233.47778 , 456.90823 , 239.27826 , 473.14053 , 233.36992 ,
         489.58182 , 224.23956 , 502.79022 , 214.54669 , 515.7219  ,
         202.82208 , 520.0786  , 184.90468 , 522.15533 , 164.11551 ,
         526.9239  , 146.77684 , 526.1275  , 127.664085, 404.90195 ,
         115.74773 , 414.7089  , 105.039246, 424.7167  , 100.66762 ,
         434.67502 , 101.40166 , 445.51868 , 105.12319 , 469.53098 ,
         106.65715 , 478.19843 , 100.6045  , 491.10144 ,  99.399216,
         502.53134 , 104.70946 , 508.60742 , 111.66487 , 457.29575 ,
         117.403984, 458.69693 , 130.19876 , 455.39343 , 139.68977 ,
         453.6869  , 154.29796 , 443.17685 , 160.12575 , 449.02835 ,
         164.83908 , 456.56082 , 167.30655 , 462.32364 , 162.14598 ,
         468.99503 , 163.14758 , 4