## **Feature extraction, evaluation and feature transmission to pipeline 2**

## Import statements

In [1]:
%matplotlib inline
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import TensorDataset, Dataset, DataLoader
import cv2
import numpy as np

import matplotlib.pyplot as plt

!pip install -U -q PyDrive

from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

import pandas as pd 
import sklearn.model_selection as skl_ms

from skimage import data, io, filters, transform

# use GPU for computations if possible
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')



## Retrieving files 

In [2]:
# 1. Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

# 2. Load a file by ID and create local file.
downloaded = drive.CreateFile({'id':'1EFDLd1jc_hdEvdz1QZtAjQ5F35-oEx4A'}) # replace fileid with Id of file you want to access
downloaded.GetContentFile('Dataset_Subsystem_2.csv')

## Adding new column `frame_values` and creating new .csv file.

In [3]:
# uncomment first line if directory not made
from sklearn.feature_extraction import image
list_of_sources = [102, 159, 294, 441, 564, 576, 609, 666, 711, 723]


list_of_102_ids = ['1Uxx19om8leZvDzgfsKN90d09C1hiF175', '1krswdgQLcwhdrOtHVsNYfnl0S878bBD2', '1qofz42t8pYCLgBl8nX84mfDBR8d02nf_', '1KFSVHTJ0Dd4jG78Tz3VfrEPdGrsqx3Xy', '1B5tI3nEnj__XEtU2VfP9YXSADL5NyKqa', '1BNGJKyjsz-0LOa32Og0A7hSkdh0IxSYV']
list_of_159_ids = ['1VblUr99IV3rQWWPeUPTg0ULb9439U579', '1olc3MuWafvru4bOu4l4yNkkvj8iSWIWG', '1KAqDQzfri_g8UWl_t7Z03hFlLZU_wFKw', '149wTlot8--PUWlygTyf6NZJg68O4w2_A', '10-OAj5KXKwiyut19ZSD40-2fJey9EJmh', '15f0gAxdNXRceM7wE9Yf9X7R9vG1Yjhu3']
list_of_294_ids = ['12cfc5YYSKtnO17SkJbBNx2aFvv6pzhJV', '1GTURACRdj-quAQIMA_X5fo795NETuOqh', '1MJcc59M175xoP-rv8Xs-4wOJ3Cx3GNBx', '1kiswY1hgql3txASe184mMqHooCufEuds', '1HvD71920WanyMRD08RNPSD1I13MokCXY', '1Wepo1JUkoewLqrc9YyctCMz1CNsXJPrO']
list_of_441_ids = ['15U7EN3munkOLZN1lRCkTfGzz2wLuVkQS', '1TCKxL2ChMpWj7qhwr38LhIDsNAbgZwoc', '1uZe3JyXI6SsXE9ZoKP8kWWp_Egq7QU67', '1byKM6BW4vX2NlNLldTFpQmiqnJ0rlxnT', '1OsVKMaUyhi5oMaE5ta_y8yN6K47bQTOa', '1kans6ebx9tc4dNrf_AC2nbRC6spjndcn']
list_of_564_ids = ['1o8paJ-IZF6LVeq2RdLq0yndxlN3eRf0_', '1QqCrYRaRL8MXg9lgmNg4EZW-Ajd-ARRV', '1TnGL2RZA7C59tDi9WW0RlZKNFIfR730P', '1KHmp5oVdqwFdsxhP_2TEjKwI8sMbJzqx', '1QndPB7oRUKCu1nKOr_u8T1I-bAaIJlW1', '1rlz7Vl_fyk2wd0ZDpmyG9Cxd0TIn5gHK']
list_of_576_ids = ['1AacF2VYkbJyUrh_sJ2fAAtgX3XSXz06X', '13Q0_HZBl3O9D0M4EdEWaHl8LaDFmgoP9', '1oPNluAh73GuNBRqHeAURQloX4-2Pmh4Y', '16DBfrWhIcix1WC4mmqmB4ng1P3HpMJHf', '11BX9Fy2iOulrUErFsg6oSiKnSclQQZzF', '17ymGF8dYg9XjhB8RRRft8dZ4BrulzLIL']
list_of_609_ids = ['1fKt9ZaKjetSQQmQZjgGZZOVUzq_Je4BE', '1_geE2R4gFBxMTjIXO0HVVJ9LI1UbTrqz', '1i_B1Whv4qDmh3EtswyW2ZJGBarFv0ZKG', '1ZVdKFnKvmcrLJUS5nTZ7vZLfeOU7mJrS', '1LOnmpHV9BhAYDX4pbkG05MhI_21P83Zu', '172OoI2QZx-3Pj-3tT9-bvO36nQ8eN-4R']
list_of_666_ids = ['1PcQiiSJG2S7GJ-I2lVlj7gzNUT8NXxTL', '1G0tGeD7ck_TwvL6C290j5F-COnZmFXF_', '1bm8S18sqYtVG3je17dGl7bQy2AmSe5pa', '10joYNJGSKqMXoBlvvuPGx11fLlNJMksV', '1aEKO4Sjk2XdpxNiNeeR73sn9YAO0_zJP', '1j7PAHaXmcPS89likeRarG9l9fvUUAkUP']
list_of_711_ids = ['1zZSleaA1sLzdtEarNr5nsLDFCK6rJOsv', '1anvm6wMyK4z_ZnaVhPobGnk6GHwKAED3', '1lDUNulLt8rdFsVK20i8Ntweqt3ljJw1r', '19UR7n5U5DZ69wCxGcJBtfpirPkDH1wLu', '1zFtoJ0jlthQCgIbb9PmU7kya4DWjOOVO', '1YRvDiCtiYqdi9GKHVngsVueshZw4oRRZ']
list_of_723_ids = ['1IIukaNDu12punZdu03LnCoaCoD9O6jhy', '1UyiSFly7qvLQr3R6eZAPCAtmVeK2G6v0', '1anInYkgLbeaZcSZkfs2x6FaRO1IRhRCz', '150SuTYHVJn27TeXjro4_WLDu2egcX4vP', '1V7smEV49yAgnJC8x24fW2Rn4n0FB9VCF', '1B3Cex8HFJVRdXmdG0N6XRMq6gutInor9']
video_titles = ["open_palm.webm", "open_dorsal.webm", "fist_palm.webm", "fist_dorsal.webm", "three_fingers_palm.webm", "three_fingers_dorsal.webm"]
#rame_array = np.empty((256, 192, 3))
frame_array = []
list_of_dictionaries = [dict(zip(video_titles, list_of_102_ids)), 
                        dict(zip(video_titles, list_of_159_ids)),
                        dict(zip(video_titles, list_of_294_ids)),
                        dict(zip(video_titles, list_of_441_ids)),
                        dict(zip(video_titles, list_of_564_ids)),
                        dict(zip(video_titles, list_of_576_ids)),
                        dict(zip(video_titles, list_of_609_ids)),
                        dict(zip(video_titles, list_of_666_ids)),
                        dict(zip(video_titles, list_of_711_ids)),
                        dict(zip(video_titles, list_of_723_ids))]
placeholder_length = 0
counter = 0
for dictionary in list_of_dictionaries:
  for video_title in video_titles:
    downloaded = drive.CreateFile({'id': dictionary.get(video_title)})
    downloaded.GetContentFile(video_title)
    input_video = cv2.VideoCapture(video_title)
    print(video_title, counter, (counter - placeholder_length))
    placeholder_length = counter
    ret, frame = input_video.read()
    while ret:
      ret, frame = input_video.read()
      if not ret:
        continue
      if (counter != 10747):
        scale_percent = 40 # percent of original size
        width = int(frame.shape[1] * scale_percent / 100)
       # print(width)
       #  print(height)
        height = int(frame.shape[0] * scale_percent / 100)
        dim = (width, height)
        # print(dim)
        if ((dim[0] != 256) or  (dim[1] != 192)):
          dim = (256, 192)
        resized_frame = cv2.resize(frame, dim, interpolation=cv2.INTER_AREA)
        resized_frame = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2RGB)
        resized_frame = cv2.cvtColor(resized_frame, cv2.COLOR_RGB2GRAY)
        frame_array.append(resized_frame)
        # print(type(resized_frame))
                              
        counter += 1
      else:
        counter += 1

# print(len(frame_array))

frame_list_to_np = np.asarray(frame_array)
print(frame_list_to_np.shape)

open_palm.webm 0 0
open_dorsal.webm 346 346
fist_palm.webm 717 371
fist_dorsal.webm 822 105
three_fingers_palm.webm 1210 388
three_fingers_dorsal.webm 1317 107
open_palm.webm 1706 389
open_dorsal.webm 1863 157
fist_palm.webm 2030 167
fist_dorsal.webm 2187 157
three_fingers_palm.webm 2379 192
three_fingers_dorsal.webm 2576 197
open_palm.webm 2830 254
open_dorsal.webm 3105 275
fist_palm.webm 3304 199
fist_dorsal.webm 3517 213
three_fingers_palm.webm 3671 154
three_fingers_dorsal.webm 3851 180
open_palm.webm 4050 199
open_dorsal.webm 4230 180
fist_palm.webm 4470 240
fist_dorsal.webm 4637 167
three_fingers_palm.webm 4853 216
three_fingers_dorsal.webm 5080 227
open_palm.webm 5276 196
open_dorsal.webm 5430 154
fist_palm.webm 5540 110
fist_dorsal.webm 5683 143
three_fingers_palm.webm 5792 109
three_fingers_dorsal.webm 5887 95
open_palm.webm 5979 92
open_dorsal.webm 6164 185
fist_palm.webm 6352 188
fist_dorsal.webm 6454 102
three_fingers_palm.webm 6559 105
three_fingers_dorsal.webm 6658 99
ope

In [None]:

downloaded1 = drive.CreateFile({'id':'1gr6QL9GtTDqMflxhKX7tdwERpRnakOrQ'}) # replace fileid with Id of file you want to access
downloaded1.GetContentFile('Video Data.xlsx')
feature_dataset = pd.read_excel('Video Data.xlsx', sep=",")
feature_dataset.info()
# print(feature_dataset['palm_ring_1_x'])
landmarks_1 = feature_dataset.iloc[0:10747, 5:85]
landmarks_2 = feature_dataset.iloc[10747:11996, 5:85]
resized_landmarks_1 = landmarks_1.apply(lambda x: x * (scale_percent / 100))
resized_landmarks_2 = landmarks_2.apply(lambda x: x * (scale_percent / 100))
resized_landmarks_2 = landmarks_2.apply(lambda x: x * ((2.1*60) * (1/300)))
resized_landmarks = resized_landmarks_1.append(resized_landmarks_2)

landmarks_list = []
all_tuples = []
for i in range(len(frame_list_to_np)):
  tuple_list = []
  landmarks = resized_landmarks.iloc[i]
  landmarks = np.array([landmarks])
  landmarks = landmarks.astype('float').reshape(-1, 2)
  for landmark in landmarks:
        landmark_tuple = (np.int(landmark[0]), np.int(landmark[1]))
        tuple_list.append(landmark_tuple)
  
  all_tuples.append(tuple_list)
  landmarks_list.append(landmarks)
landmarks_list = np.asarray(landmarks_list)
print(all_tuples[10427])
print(all_tuples[10428])
print(all_tuples[10429])
# print(landmarks_list[11800])

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11996 entries, 0 to 11995
Data columns (total 85 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   ID                  11996 non-null  int64 
 1   source              11996 non-null  object
 2   frame               11996 non-null  int64 
 3   camera_facing_side  11996 non-null  object
 4   gesture             11996 non-null  object
 5   palm_root_x         11996 non-null  int64 
 6   palm_root_y         11996 non-null  int64 
 7   palm_thumb_1_x      11996 non-null  int64 
 8   palm_thumb_1_y      11996 non-null  int64 
 9   palm_thumb_2_x      11996 non-null  int64 
 10  palm_thumb_2_y      11996 non-null  int64 
 11  palm_thumb_3_x      11996 non-null  int64 
 12  palm_thumb_3_y      11996 non-null  int64 
 13  palm_index_1_x      11996 non-null  int64 
 14  palm_index_1_y      11996 non-null  int64 
 15  palm_index_2_x      11996 non-null  int64 
 16  palm_index_2_y      11

In [None]:
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# running this takes ~30 mins, use the code snippet below instead!!!!
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

target_list = []
frame_counter = 0
for i in range(len(frame_list_to_np)):
  frame_counter += 1
  print('entering frame number: ' + str(frame_counter))

  target_image = np.zeros((192, 256))
  for y in range(target_image.shape[0]):
    for x in range(target_image.shape[1]):
      landmarks = all_tuples[i]
      for landmark in landmarks:
        if ((landmark[1] == y) and (landmark[0] == x) and ((y,x) != (0,0))):
          #print(landmark, (y,x))
          target_image[y,x] = 1
         # print(target_image[y,x])
  target_list.append(target_image)

target_list_to_np = np.asarray(target_list, dtype='uint8')
print(target_list_to_np.shape)
print(frame_list_to_np.shape)

#np.save('outpy.npy', target_list_to_np)

#file5 = drive.CreateFile()
# Read file and set it as a content of this instance.
#file5.SetContentFile('outpy.npy')
#file5.Upload()


entering frame number: 1
entering frame number: 2
entering frame number: 3
entering frame number: 4
entering frame number: 5
entering frame number: 6
entering frame number: 7
entering frame number: 8
entering frame number: 9
entering frame number: 10
entering frame number: 11
entering frame number: 12
entering frame number: 13
entering frame number: 14
entering frame number: 15
entering frame number: 16
entering frame number: 17
entering frame number: 18
entering frame number: 19
entering frame number: 20
entering frame number: 21
entering frame number: 22
entering frame number: 23


KeyboardInterrupt: ignored

In [None]:
# Generate encoded landmark data 
target_list_2 = []
frame_counter = 0
for i in range(len(frame_list_to_np)):
  frame_counter += 1
  print('entering frame number: ' + str(frame_counter))
  landmark_counter = 0

  target_image = np.zeros((192, 256))
  landmarks = all_tuples[i]
  for landmark in landmarks:
    if (landmark != (0,0)):
      landmark_counter += 1
      if (landmark[1] > 191):
        y = 191
      else:
        y = landmark[1]
      if (landmark[0] > 255):
        x = 254
      else:
        x = landmark[0]
          #print(landmark, (y,x))
      target_image[y,x] = 1
         # print(target_image[y,x]
  print(landmark_counter) 
  target_list_2.append(target_image)

target_list_to_np = np.asarray(target_list_2, dtype='uint8')
#print(target_list_to_np_2.shape)
print(frame_list_to_np.shape)

#np.save('outpy.npy', target_list_to_np)

#file5 = drive.CreateFile()
# Read file and set it as a content of this instance.
#file5.SetContentFile('outpy.npy')
#file5.Upload()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
entering frame number: 9498
18
entering frame number: 9499
18
entering frame number: 9500
18
entering frame number: 9501
18
entering frame number: 9502
18
entering frame number: 9503
18
entering frame number: 9504
18
entering frame number: 9505
18
entering frame number: 9506
18
entering frame number: 9507
18
entering frame number: 9508
18
entering frame number: 9509
18
entering frame number: 9510
18
entering frame number: 9511
18
entering frame number: 9512
18
entering frame number: 9513
18
entering frame number: 9514
18
entering frame number: 9515
18
entering frame number: 9516
18
entering frame number: 9517
18
entering frame number: 9518
18
entering frame number: 9519
18
entering frame number: 9520
18
entering frame number: 9521
18
entering frame number: 9522
18
entering frame number: 9523
18
entering frame number: 9524
18
entering frame number: 9525
18
entering frame number: 9526
18
entering frame number: 9527
18
enter

## Instantiating transformed dataset.

In [None]:
# Generate tiles from the dataset's frames

def gen_tiles(image, tile_size, landmarks_encoded):
  y_max, x_max = image.shape
  one_counter = 0
  zero_counter = 0
  num_total_added_zero = 0
  #print(x_max//tile_size)
  for x in range(0, x_max, tile_size):
    for y in range(0, y_max, tile_size):
      # this is the input of the network
      img_tile = image[y:(y+tile_size), x:(x+tile_size)]
      # img_tile = np.uint8(img_tile)

      # landmarks_encoded is an np.array of shape (y_max, x_max)
      # that has 0 everywhere except if (y, x) is a landmark
      # (similar to what you did in Task 3 for Assigngment 1)
      landmark_tile = landmarks_encoded[y:(y+tile_size), x:(x+tile_size)]

      # this is the desired output of the network
      if np.any(landmark_tile == 1):
        has_landmark = 1
        one_counter += 1 
        tile_array.append(img_tile)
        target_array.append(has_landmark)
      # remove some 0:s in order to prevent overfitting
      elif (np.any(landmark_tile == 0) and (zero_counter % 3 == 0)):
        has_landmark = 0
        num_total_added_zero += 1
        zero_counter += 1
        tile_array.append(img_tile)
        target_array.append(has_landmark)
      else:
        zero_counter += 1

       # print(has_landmark)

  # print(one_counter, num_total_added_zero)

tile_array = []
target_array = []
for i in range(len(target_list_to_np)):
  gen_tiles(frame_list_to_np[i], 8, target_list_to_np[i])

tile_array_as_np = np.asarray(tile_array)
# tile_targets = np.asarray(tile_data[:,1]) 
target_array_as_np = np.asarray(target_array)
# tile_array = np.uint8(tile_data_as_np[:,0])
# target_array = tile_data_as_np[:,1]
print(tile_array_as_np.shape)
# reshape_tiles = np.reshape(tile_array_as_np, (tile_array_as_np.shape[0], tile_array_as_np.shape[3], tile_array_as_np.shape[1], tile_array_as_np.shape[2]))
# print(reshape_tiles.shape)
print(target_array_as_np.shape)

(3199937, 8, 8)
(3199937,)


In [None]:
## create test and train subsamples
from torch.utils.data.sampler import SubsetRandomSampler
# dataset = HandLandmarksDataset(transform=transforms.Compose([
#                                                ToTensor()
#                                            ]))

batch_size = 1000
validation_split = 0.15
shuffle_dataset = True

dataset = TensorDataset(torch.from_numpy(tile_array_as_np), torch.tensor(target_array_as_np))

dataset_size = len(dataset)
indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))
if (shuffle_dataset):
  np.random.shuffle(indices)

train_indices, validation_indices = indices[split:], indices[:split]

# creating PT data samplers and loaders
train_sampler = SubsetRandomSampler(train_indices)
validation_sampler = SubsetRandomSampler(validation_indices)

train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
test_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                                sampler=validation_sampler)

## Creating the model


In [None]:
## CNN net
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=2, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        #self.layer2 = nn.Sequential(
         #   nn.Conv2d(32, 64, kernel_size=2, stride=1, padding=2),
          #  nn.ReLU(),
           # nn.MaxPool2d(kernel_size=2, stride=2))
        self.drop_out = nn.Dropout(p=0.1)
        self.fc1 = nn.Linear(400, 200)
        self.fc2 = nn.Linear(200, 1)


    def forward(self, X):
        # flatten the data into a matrix with 192 x 256 = 49152 column
        # compute the linear transformation
       #  print('we made it to forward')
         #print(X)
        out = self.layer1(X)
         #print(out)
        #out = self.layer2(out)
        # print(out)
        out = out.reshape(out.size(0), -1)
        # print(out)
        out = self.drop_out(out)
        # print(out)
        out = self.fc1(out)
        # print(out)
        out = self.fc2(out)
        out = torch.sigmoid(out)
        #print(out)
        return out

In [None]:
## Dense layer net
class LinearNet1(nn.Module):
    def __init__(self):
        super(LinearNet1, self).__init__()
        self.layer_1 = nn.Linear(8*8, 256) 
        self.layer_2 = nn.Linear(256, 256)
        self.layer_out = nn.Linear(256, 1) 
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.1)
        self.batchnorm1 = nn.BatchNorm1d(256)
        self.batchnorm2 = nn.BatchNorm1d(256)


    def forward(self, inputs):
        inputs = inputs.reshape(inputs.size(0),-1)
        x = self.relu(self.layer_1(inputs))
        x = self.batchnorm1(x)
        x = self.relu(self.layer_2(x))
        x = self.batchnorm2(x)
        x = self.dropout(x)
        x = self.layer_out(x)
        return x

In [None]:
## More complex dense layer net
class LinearNet2(nn.Module):
    def __init__(self):
        super(LinearNet2, self).__init__()
        self.layer_1 = nn.Linear(8*8, 512) 
        self.layer_2 = nn.Linear(512, 512)
        self.layer_3 = nn.Linear(512, 256)
        self.layer_out = nn.Linear(256, 1) 
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.1)
        self.batchnorm1 = nn.BatchNorm1d(512)
        self.batchnorm2 = nn.BatchNorm1d(512)
        self.batchnorm3 = nn.BatchNorm1d(256)


    def forward(self, inputs):
        inputs = inputs.reshape(inputs.size(0),-1)
        x = self.relu(self.layer_1(inputs))
        x = self.batchnorm1(x)
        x = self.relu(self.layer_2(x))
        x = self.batchnorm2(x)
        x = self.relu(self.layer_3(x))
        x = self.batchnorm3(x)
        x = self.dropout(x)
        x = self.layer_out(x)
        return x

In [None]:
# Call the network

model = LinearNet1()
model.train()
num_epochs = 10
num_classes = 2
batch_size = 100
learning_rate = 0.001


# Loss and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

## Training the model

In [None]:
def binary_acc(y_pred, y_test):
    y_pred_tag = torch.round(torch.sigmoid(y_pred))

    correct_results_sum = (y_pred_tag == y_test).sum().float()
    acc = correct_results_sum/y_test.shape[0]
    acc = torch.round(acc * 100)
    
    return acc

for e in range(1, num_epochs+1):
    epoch_loss = 0
    epoch_acc = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device, dtype=torch.float), y_batch.to(device, dtype=torch.float)
        optimizer.zero_grad()
        
        y_pred = model(X_batch)
        
        loss = criterion(y_pred, y_batch.unsqueeze(1))
        acc = binary_acc(y_pred, y_batch.unsqueeze(1))
        
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        

    print(f'Epoch {e+0:03}: | Loss: {epoch_loss/len(train_loader):.5f} | Acc: {epoch_acc/len(train_loader):.3f}')

# Confusion matrix
model.eval()
nb_classes=2
confusion_matrix = torch.zeros(nb_classes, nb_classes)
with torch.no_grad():
    for i, (inputs, classes) in enumerate(test_loader):
        inputs = inputs.to(device, dtype=torch.float)
        classes = classes.to(device)
        outputs = model(inputs)
        y_test_pred = torch.sigmoid(outputs)
        y_pred_tag = torch.round(y_test_pred)
        for t, p in zip(classes.view(-1), y_pred_tag.view(-1)):
                confusion_matrix[t.long(), p.long()] += 1

print(confusion_matrix)

Epoch 001: | Loss: 0.18944 | Acc: 93.853
Epoch 002: | Loss: 0.16697 | Acc: 94.278
Epoch 003: | Loss: 0.16259 | Acc: 94.345
Epoch 004: | Loss: 0.15901 | Acc: 94.422
Epoch 005: | Loss: 0.15767 | Acc: 94.458
Epoch 006: | Loss: 0.15499 | Acc: 94.495
Epoch 007: | Loss: 0.15223 | Acc: 94.558
Epoch 008: | Loss: 0.15141 | Acc: 94.582
Epoch 009: | Loss: 0.14943 | Acc: 94.628
Epoch 010: | Loss: 0.14802 | Acc: 94.661
tensor([[449452.,   2540.],
        [ 22724.,   5274.]])


## Testing on unseen data

In [None]:
# Generate tiles for the unseen image

def gen_tiles_test(image, tile_size):
  tile_array = []
  y_max, x_max = image.shape[:2]
  #print(x_max//tile_size)
  for x in range(0, x_max, tile_size):
    for y in range(0, y_max, tile_size):
      # this is the input of the network
      img_tile = image[y:(y+tile_size), x:(x+tile_size)]
      tile_array.append(img_tile)

  tile_array_as_np = np.asarray(tile_array)
  # tile_array_as_np = np.reshape(tile_array_as_np, (tile_array_as_np.shape[0], tile_array_as_np.shape[3], tile_array_as_np.shape[1], tile_array_as_np.shape[2]))
  
  return tile_array_as_np

In [None]:
# Predict the landmarks for the unseen image

downloaded = drive.CreateFile({'id':'1tFDSECRpOky3gh09npZaMu8fA0Lo2IGW'})
downloaded.GetContentFile('7_dorsal.jpg')
frame = cv2.imread('7_dorsal.jpg')

#downloaded = drive.CreateFile({'id':'1y7FULAJ2JWRfsek0oSIHhoghLCXpOC1h'})
#downloaded.GetContentFile('3_three_palm.jpg')
#frame = cv2.imread('3_three_palm.jpg')

#downloaded = drive.CreateFile({'id':'1xDBAfkW5UrqHsOSeiwNBYRHZI8DMxFAb'})
#downloaded.GetContentFile('4_fist_dorsal.jpg')
#frame = cv2.imread('4_fist_dorsal.jpg')


dim = (256, 192)
resized_frame = cv2.resize(frame, dim, interpolation=cv2.INTER_AREA)
resized_frame = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2RGB)
resized_frame = cv2.cvtColor(resized_frame, cv2.COLOR_RGB2GRAY)

image = gen_tiles_test(resized_frame, 8)
outputs = model(torch.tensor(image, dtype=torch.float))
predicted = torch.round(torch.sigmoid(outputs))

# Check to see if landmarks are present in the image or not
c = 0
for p in torch.flatten(predicted):
 if (p == 1):
   c += 1

print("Number of landmarks present in the input image: ", str(c))

Number of landmarks present in the input image:  13


## Old, unused stuff

In [None]:
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# !!!         OLD TRAINING ALGORITHM          !!!
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

total_step = len(train_loader)
loss_list = []
acc_list = []
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # images = Variable(images)
        # labels = Variable(labels)
        x, y = images.to(device=device, dtype=torch.float), labels.to(device=device, dtype=torch.float)
        # print(x)
        # Run the forward pass
        optimizer.zero_grad()

        outputs = model(x)
        loss = criterion(outputs, y)
        loss_list.append(loss.item())

        #outputs = outputs.reshape(outputs.size(0), -1)

        # Backprop and perform Adam optimisation
        loss.backward()
        optimizer.step()

        # Track the accuracy
        total = labels.size(0)
        #predicted = torch.as_tensor((outputs - 0.5) > 0, dtype=torch.int32)
        predicted = torch.round(outputs)
        #preidcted = torch.flatten(predicted)
        #predicted = (outputs>0.5).float()
        #_, predicted = torch.max(outputs, 1)
        #print(torch.flatten(predicted))
        #print(labels, predicted)
        correct = (torch.flatten(predicted) == labels).sum().item()
        acc_list.append(correct / total)
        if (i + 1) % 100 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Accuracy: {:.2f}%'
                  .format(epoch + 1, num_epochs, i + 1, total_step, loss.item(),
                          (correct / total) * 100))