In [2]:
from google.colab import drive
import numpy as np
import os
import cv2
import pandas as pd
import tensorflow as tf
import torch.nn as nn
import torch
import itertools 

from torch.utils.data import DataLoader
from torchvision import transforms
from torch.utils.data import TensorDataset, DataLoader

drive.mount('/content/gdrive/')

Mounted at /content/gdrive/


In [3]:
#load "num" numbers of frame and annot data per video. Insufficient RAM for full frame and annot data 
#int num gives num amount of frames, float num gives that much percentage of frames
def load_data(dir, num):
  img_data = []
  annot_data = []
  for i in sorted(os.listdir(dir)):
    f = os.path.join(dir, i) # ...training/001 etc
    if not os.path.isdir(f):
      continue
    total_num = len(os.listdir(f+"/frames"))
    if total_num ==0:
      continue
    if isinstance(num, int):
      chosen_start = np.random.choice(total_num-num, 1, replace=False)
    elif isinstance(num, float):
      chosen_idx = np.random.choice(total_num, int(total_num*num), replace=False)
    else:
      print("invalid num")
      return 0, 0

    chosen_idx = []
    for x in range(num):
      chosen_idx.append(chosen_start[0] + x)

    frame_list = sorted(os.listdir(f+"/frames"), key=lambda x: int(x[5:-4]))
    annot_list = sorted(os.listdir(f+"/annot"), key=lambda x: int(x[:6]))

    for j in chosen_idx:  
      frame_num = frame_list[j] #iterate over frames file for img
      img_path = os.path.join(f+"/frames",frame_num)
      image = cv2.imread(img_path, cv2.IMREAD_COLOR)
      if image.shape[0] != 720:
        continue
      img_data.append(image.T)

      annot_num = annot_list[j] #iterate over annot file for coords
      annot_path = os.path.join(f+"/annot",annot_num)
      annot = np.loadtxt(annot_path, comments=("version:", "n_points:", "{", "}"))

      annot_data.append(annot.flatten())
  return img_data, annot_data

In [4]:
root = "/content/gdrive/My Drive/ESTsoft Internship/300VW_Dataset_2015_12_14/training"
#root = "/content/gdrive/Shared with me/ESTsoft Internship/300VW_Dataset_2015_12_14/training"

continuous = 20 #number of continuous frames
X_train, y_train = load_data(root, continuous)

In [5]:
class baseline_model(nn.Module):
  def __init__(self):
    super().__init__()

    # (3, 1280, 720)
    # output size = int((input_size + 2*padding - filter)/stride) + 1
    # number of filter = input_channel
    layers = [
        nn.Conv2d(3, 16, 5, padding=2),  # (16,1280,720)
        nn.BatchNorm2d(16),
        nn.ReLU(),         
        nn.MaxPool2d((4,3)),  # (16, 320, 240)
        nn.Dropout2d(),

        nn.Conv2d(16, 32, 5, padding = 2), # (32, 320, 240)
        nn.ReLU(),
        nn.Conv2d(32, 32, 3, padding=1),        
        nn.BatchNorm2d(32),
        nn.ReLU(),        
        nn.MaxPool2d((4,3)), # (32, 80, 80)
        nn.Dropout(),
        
        nn.Conv2d(32, 64, 3, padding = 1),  # (64, 80, 80)
        nn.ReLU(),
        nn.Conv2d(64, 64, 3, padding=1),        
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.MaxPool2d(2),    # (64, 40, 40)  
        nn.Dropout(),
        
        nn.Conv2d(64, 128, 3, padding = 1),  # (128, 40, 40)
        nn.ReLU(),
        nn.Conv2d(128, 128, 3, padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU(),
        nn.MaxPool2d(2),    # (128, 20, 20)
        nn.Dropout(),

        nn.Conv2d(128, 256, 3, padding = 1),  # (256, 20, 20)
        nn.ReLU(),
        nn.Conv2d(256, 256, 3, padding=1),
        nn.BatchNorm2d(256),
        nn.ReLU(),
        nn.MaxPool2d(2),   # (256, 10, 10)
        nn.Dropout(),

        nn.Conv2d(256, 512, 3, padding = 1),  # (512, 10, 10)
        nn.ReLU(),
        nn.Conv2d(512, 512, 3, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU(),
        nn.Dropout(),
        nn.MaxPool2d(2)  # (512, 5, 5)
    ]

    self.layers = nn.ModuleList(layers)
    self.linear1 = nn.Linear(512*5*5, 512) 
    self.relu = nn.ReLU()
    self.linear2 = nn.Linear(512, 136)

  def forward(self, x):
    temp = None
    h = x
    for i in range(len(self.layers)):
      if (i % 7 == 0 and i!=0):
        temp = h
        h = self.layers[i](h) + temp
      else :
        h = self.layers[i](h)
    h = self.linear1(h.reshape(-1,512*25))
    h = self.relu(h)
    h = self.linear2(h)
    return h

In [6]:
model = baseline_model()
model_new_standard = baseline_model()

model = model.cuda()
model_new_standard = model_new_standard.cuda()

In [7]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
optimizer_n = torch.optim.Adam(model_new_standard.parameters(), lr=1e-4)

In [8]:
#data
X_train_np = np.stack(X_train)
y_train_np = np.stack(y_train)

X_train_np = torch.tensor(X_train_np)
y_train_np = torch.tensor(y_train_np)

train_dataset = TensorDataset(X_train_np, y_train_np)
train_dataloader = DataLoader(train_dataset)

  import sys


In [9]:
model.train()

num_epochs = 200
for epoch in range(num_epochs):
  batch_loss_c = []
  for batch_idx, (inputs, target) in enumerate(train_dataloader):
  #for inputs, target in zip(X_train_np, y_train_np):
    inputs = inputs.float()
    inputs = inputs.cuda()
    target = target.type(torch.FloatTensor)
    target = target.cuda()
    out = model(inputs)
    loss = criterion(out, target)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    batch_loss_c.append(loss.item())
  print(epoch, "Epoch loss: " + str(sum(batch_loss_c)/len(batch_loss_c)))

0 Epoch loss: 36268.34307731346
1 Epoch loss: 6193.227738655938
2 Epoch loss: 6293.201673493562
3 Epoch loss: 5175.835874387953
4 Epoch loss: 4186.23269353796
5 Epoch loss: 2843.294541277709
6 Epoch loss: 2199.706755927757
7 Epoch loss: 1630.4369399494594
8 Epoch loss: 1510.6089583290948
9 Epoch loss: 1596.7797979213574
10 Epoch loss: 1449.8926023094743
11 Epoch loss: 1307.0319834921095
12 Epoch loss: 1173.544247037393
13 Epoch loss: 1296.734477382236
14 Epoch loss: 1018.0336188563595
15 Epoch loss: 1017.7640664524503
16 Epoch loss: 972.6858133492647
17 Epoch loss: 964.902910455068
18 Epoch loss: 995.0148858882762
19 Epoch loss: 937.6086647457547
20 Epoch loss: 883.4346124931618
21 Epoch loss: 923.908103522548
22 Epoch loss: 947.50479893861
23 Epoch loss: 906.4070312288072
24 Epoch loss: 967.5946617797568
25 Epoch loss: 909.4795393378646
26 Epoch loss: 995.8659631464217
27 Epoch loss: 873.8276263272321
28 Epoch loss: 843.5653863500665
29 Epoch loss: 839.030314733364
30 Epoch loss: 796.

In [10]:
torch.save(model.state_dict(), "/content/gdrive/Shared with me/ESTsoft Internship/model.pt")

In [11]:
def indicator(curr, prev):
  if prev == None:
    data_for_next_step = []
    for i in curr:
      center_curr_x = (i[54]+i[66]+i[72]+i[90])/4
      center_curr_y = (i[55]+i[67]+i[73]+i[91])/4
      data_per_frame = []
      for k in range(68):
        dist_curr = ((i[2*k]-center_curr_x)**2 + (i[2*k+1]-center_curr_y)**2 + 1)**0.5   
        #dist_curr = (((i[2*k]-center_curr_x)/(i[72]-i[90]))**2 + ((i[2*k+1]-center_curr_y)/(i[55]-i[67]))**2 + 1)**0.5        
        data_per_frame.append(dist_curr)
      data_for_next_step.append(data_per_frame)
    return data_for_next_step, 0
  
  total_ind = []
  data_for_next_step = []
  for i in range(len(curr)):
    center_curr_x = (curr[i][54]+curr[i][66]+curr[i][72]+curr[i][90])/4
    center_curr_y = (curr[i][55]+curr[i][67]+curr[i][73]+curr[i][91])/4

    point_ind = []
    data_per_frame = []
    for j in range(68):
      dist_curr = ((curr[i][2*j]-center_curr_x)**2 + (curr[i][2*j+1]-center_curr_y)**2 + 1)**0.5
      #dist_curr = (((curr[i][2*j]-center_curr_x)/(curr[i][72]-curr[i][90]))**2 + ((curr[i][2*j+1]-center_curr_y)/(curr[i][55]-curr[i][67]))**2 + 1)**0.5
      point_ind.append(dist_curr/prev[i][j])
      data_per_frame.append(dist_curr)
    total_ind.append(max(point_ind))
    data_for_next_step.append(data_per_frame)
  return data_for_next_step, abs(max(total_ind) - 1)

In [12]:
model_new_standard.train()

num_epochs = 200
for epoch in range(num_epochs):
  batch_loss = []
  prev_out_calcs = None
  count = 0
  prev_loss = 100
  for batch_idx, (inputs, target) in enumerate(train_dataloader):
  #for inputs, target in zip(X_train_np, y_train_np):  
    inputs = inputs.float()
    inputs = inputs.cuda()
    target = target.type(torch.FloatTensor)
    target = target.cuda()
    out = model_new_standard(inputs)
    if prev_loss < 100:
      if count % continuous == 0:
        prev_out_calcs = None
      result = out.clone().detach()
      prev_out_calcs, indicator_result = indicator(result, prev_out_calcs)
      loss = criterion(out, target) + indicator_result #mse + indicator
    else:
      loss = criterion(out, target)
    prev_loss = loss.item()
    optimizer_n.zero_grad()
    loss.backward()
    optimizer_n.step()
    batch_loss.append(loss.item())
    count += 1
  print(epoch, "Epoch loss: " + str(sum(batch_loss)/len(batch_loss)))

0 Epoch loss: 35745.01816233882
1 Epoch loss: 6181.009503272728
2 Epoch loss: 5496.811677212186
3 Epoch loss: 5400.294537233423
4 Epoch loss: 4279.65853156337
5 Epoch loss: 3375.2322134865653
6 Epoch loss: 2503.4339504524514
7 Epoch loss: 2065.5625474788526
8 Epoch loss: 1704.944045582524
9 Epoch loss: 1481.9799641997727
10 Epoch loss: 1343.57186877286
11 Epoch loss: 1109.2784786471614
12 Epoch loss: 1252.2326113877473
13 Epoch loss: 1194.7757765169497
14 Epoch loss: 1079.947544059047
15 Epoch loss: 1036.402349585074
16 Epoch loss: 1066.064029983238
17 Epoch loss: 968.4496233516269
18 Epoch loss: 1050.8196352605466
19 Epoch loss: 928.1259347880329
20 Epoch loss: 864.5313400445161
21 Epoch loss: 927.432935723552
22 Epoch loss: 976.4013687504662
23 Epoch loss: 889.3174206945631
24 Epoch loss: 856.0624875174628
25 Epoch loss: 822.0733832500599
26 Epoch loss: 775.9120901390359
27 Epoch loss: 762.0341835701907
28 Epoch loss: 741.8618522873631
29 Epoch loss: 735.5620282402745
30 Epoch loss: 

In [13]:
torch.save(model_new_standard.state_dict(), "/content/gdrive/Shared with me/ESTsoft Internship/model_new_standard.pt")