In [1]:
!nvidia-smi

Sat Jan 23 04:07:13 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   33C    P8     9W /  70W |      0MiB / 15079MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
os.chdir('/content/drive/MyDrive/NLP_and_CV_Projects/CV/Model/YOWO/')

In [66]:
num_vid = 5
testlist     = '/content/drive/MyDrive/NLP_and_CV_Projects/CV/data/annotations_yowo/testlist_000'+str(num_vid)+'.txt'

In [67]:
testlist

'/content/drive/MyDrive/NLP_and_CV_Projects/CV/data/annotations_yowo/testlist_0005.txt'

# Import YOWO

In [68]:
import os
os.chdir('/content/drive/MyDrive/NLP_and_CV_Projects/CV/Model/YOWO/')

In [69]:
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
from torch.autograd import Variable

from cfg import *
from cfam import CFAMBlock
from backbones_2d import darknet
from backbones_3d import mobilenet, shufflenet, mobilenetv2, shufflenetv2, resnext, resnet



num_classes = 2 # 24 is the number of classes of ucf101 dataset, 
#if we set this differently, we need to specify to not import pretrained weights of the last layer



class YOWO(nn.Module): # model.py
    def __init__(self):
        super(YOWO, self).__init__()
        
        ##### 2D Backbone #####
        # if opt.backbone_2d == "darknet":
        self.backbone_2d = darknet.Darknet("cfg/yolo.cfg")
        num_ch_2d = 425 # Number of output channels for backbone_2d
        # load pretrained weights
        self.backbone_2d.load_weights("weights/yolo.weights")

        ##### 3D Backbone ##### # resnet18: lighter but no pre-trained
        # default = "resnext101":
        self.backbone_3d = resnext.resnext101()
        num_ch_3d = 2048 # Number of output channels for backbone_3d
        # load 3d pretrained weights
        self.backbone_3d = self.backbone_3d.cuda()
        self.backbone_3d = nn.DataParallel(self.backbone_3d, device_ids=None) # Because the pretrained backbone models are saved in Dataparalled mode
        pretrained_3d_backbone = torch.load('weights/resnext-101-kinetics.pth')
        backbone_3d_dict = self.backbone_3d.state_dict()
        pretrained_3d_backbone_dict = {k: v for k, v in pretrained_3d_backbone['state_dict'].items() if k in backbone_3d_dict} # 1. filter out unnecessary keys
        backbone_3d_dict.update(pretrained_3d_backbone_dict) # 2. overwrite entries in the existing state dict
        self.backbone_3d.load_state_dict(backbone_3d_dict) # 3. load the new state dict
        self.backbone_3d = self.backbone_3d.module # remove the dataparallel wrapper


        ##### Attention & Final Conv #####f
        self.cfam = CFAMBlock(num_ch_2d+num_ch_3d, 1024)
        self.conv_final = nn.Conv2d(1024, 5*(num_classes+4+1), kernel_size=1, bias=False) # 5: number of anchors, num_classes, 4: coordinates, 1: confidence score
        self.seen = 0



    def forward(self, input):
        x_3d = input # Input clip (None, num_channels, num_frames, W, H) W and H must be divisible to 32 (480, 256?)
        x_2d = input[:, :, -1, :, :] # Last frame of the clip that is read

        x_2d = self.backbone_2d(x_2d)
        x_3d = self.backbone_3d(x_3d)
        x_3d = torch.squeeze(x_3d, dim=2)

        x = torch.cat((x_3d, x_2d), dim=1)
        x = self.cfam(x)

        out = self.conv_final(x)

        return out


def get_fine_tuning_parameters(model, freeze_backbone_2d = False, freeze_backbone_3d = False):
    ft_module_names = ['cfam', 'conv_final'] # Always fine tune 'cfam' and 'conv_final'
    if not freeze_backbone_2d:
        ft_module_names.append('backbone_2d') # Fine tune complete backbone_2d
    else:
        ft_module_names.append('backbone_2d.models.29') # Fine tune only layer 29 and 30
        ft_module_names.append('backbone_2d.models.30') # Fine tune only layer 29 and 30

    if not freeze_backbone_3d:
        ft_module_names.append('backbone_3d') # Fine tune complete backbone_3d
    else:
        ft_module_names.append('backbone_3d.layer4') # Fine tune only layer 4

    parameters = [] # trainable parameteres
    for k, v in model.named_parameters():
        for ft_module in ft_module_names:
            if ft_module in k:
                parameters.append({'params': v})
                break
            else:
                parameters.append({'params': v, 'lr': 0.0}) # freeze layers
    
    return parameters

In [70]:
# model       = YOWO()
# model       = model.cuda()
# model       = nn.DataParallel(model, device_ids=None) # in multi-gpu case
# model.seen  = 0
# # print(model)

# parameters = get_fine_tuning_parameters(model)#, opt)
# print(parameters)
# optimizer = optim.SGD(model.parameters(), lr=learning_rate/batch_size, momentum=momentum, dampening=0, weight_decay=decay*batch_size)

In [71]:
# count = 0
# weights1 = []
# for v in model.parameters():
#   if count < 5:
#     weights1.append(v)
#   count += 1
# weights2 = []
# count = 0
# for k, v in model.named_parameters():
#   if count <5:
#     weights2.append(v)
#   count += 1
# print(weights1[0]==weights2[0])

In [72]:
# count = 0
# for k, v in model.named_parameters():
#   if count < 2:
#     print('k = ', k)
#     print('v = ', v)
#   count += 1

In [73]:
# model       = YOWO()
# model       = model.cuda()
# model       = nn.DataParallel(model, device_ids=None) # in multi-gpu case
# print(model)

In [74]:
# model_state = torch.load('./backup/yowo_ucf101-24_16f_best.pth') # load pre-trained model on ucf101 dataset

In [75]:
# model.load_state_dict(model_state['state_dict']) # 

In [76]:
# from torchsummary import summary
# summary(model, (3, 16,480,256)) # channel x frames x W x H 480x256

# Setup and train function

In [77]:
from __future__ import print_function
import sys
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torchvision import datasets, transforms

import dataset
import random
import math
import os
# from opts import parse_opts
from utils import *
from cfg import parse_cfg
from region_loss import RegionLoss

# from model import YOWO, get_fine_tuning_parameters

# # Training settings
# opt = parse_opts()
# # which dataset to use
# dataset_use   = opt.dataset

dataset_use = 'jaad'

# assert dataset_use == 'ucf101-24' or dataset_use == 'jhmdb-21', 'invalid dataset'
# # path for dataset of training and validation
# datacfg       = opt.data_cfg
# # path for cfg file
# cfgfile       = opt.cfg_file

# data_options  = read_data_cfg(datacfg)
# net_options   = parse_cfg(cfgfile)[0]

# obtain list for training and testing
# Demo
basepath = '/content/drive/MyDrive/NLP_and_CV_Projects/CV/data'
trainlist     = '/content/drive/MyDrive/NLP_and_CV_Projects/CV/data/trainlist.txt' # /content/data/trainlist.txt
# testlist      = '/content/drive/MyDrive/NLP_and_CV_Projects/CV/data/annotations_yowo/testlistdemo.txt'# /content/data/testlist.txt
testlist = testlist

backupdir     = '/content/drive/MyDrive/NLP_and_CV_Projects/CV/Model/YOWO/backup'
# number of training samples
nsamples      = file_lines(trainlist)
gpus          = '0' #data_options['gpus']  # e.g. 0   ,1,2,3
ngpus         = len(gpus.split(','))
num_workers   = 4  #?int(data_options['num_workers']) 0, 4, 10?

batch_size    = 8        #int(net_options['batch']) # 12
clip_duration = 16        #int(net_options['clip_duration']) # 16 frames
max_batches   = 200000    #int(net_options['max_batches']) # 100000
learning_rate = 0.001    #float(net_options['learning_rate']) # 0.0001
momentum      = 0.9       # float(net_options['momentum']) # 0.9
decay         = 0.0005    #float(net_options['decay']) # 0.0005
steps         = [10000,20000,30000,40000]   # [float(step) for step in net_options['steps'].split(',')] # [10000,20000,30000,40000] or [30000,40000,50000,60000]
scales        = [0.5,0.5,0.5,0.5]           #[float(scale) for scale in net_options['scales'].split(',')] # [0.5,0.5,0.5,0.5]

# loss parameters
# loss_options               = parse_cfg(cfgfile)[1]
region_loss                = RegionLoss()
# anchors                    = loss_options['anchors'].split(',')
region_loss.anchors        = [0.70458, 1.18803, 1.26654, 2.55121, 1.59382, 4.08321, 2.30548, 4.94180, 3.52332, 5.91979]
#[float(i) for i in anchors] #anchors = 0.70458, 1.18803, 1.26654, 2.55121, 1.59382, 4.08321, 2.30548, 4.94180, 3.52332, 5.91979
region_loss.num_classes    = 2   # int(loss_options['classes']) # 24
region_loss.num_anchors    = 5    #int(loss_options['num']) # 5
region_loss.anchor_step    = len(region_loss.anchors)//region_loss.num_anchors #
region_loss.object_scale   = 5. # float(loss_options['object_scale']) # 5
region_loss.noobject_scale = 1. # float(loss_options['noobject_scale']) # 1 
region_loss.class_scale    = 1. # float(loss_options['class_scale']) # 1
region_loss.coord_scale    = 100. # float(loss_options['coord_scale']) # 1
region_loss.batch          = batch_size #batch_size # 12



#Train parameters
max_epochs    = max_batches*batch_size//nsamples+1
use_cuda      = True #True
seed          = int(time.time())
# seed          = 42
eps           = 1e-5
best_fscore   = 0   # initialize best fscore
begin_epoch = 1
end_epoch = 25


# Test parameters
nms_thresh    = 0.2 # non_maximum suppression threshold 0.4
iou_thresh    = 0.3 # >=iou threshold => correctly identified 0.5
# proposals when confidence score >= 0.25

# # print('N_samples = ', nsamples)
# print('num of batches per epoch =', nsamples//batch_size+1)

# print('nms_thresh =', nms_thresh)
# print('iou_thresh =', iou_thresh)

if not os.path.exists(backupdir):
    os.mkdir(backupdir)
    
torch.manual_seed(seed)
if use_cuda:
    os.environ['CUDA_VISIBLE_DEVICES'] = gpus
    torch.cuda.manual_seed(seed)

# Create model
# model = YOWO(opt)
model       = YOWO()
model       = model.cuda()
model       = nn.DataParallel(model, device_ids=None) # in multi-gpu case
model.seen  = 0
# print(model)

# parameters = get_fine_tuning_parameters(model)#, opt)
# optimizer = optim.SGD(parameters, lr=learning_rate/batch_size, momentum=momentum, dampening=0, weight_decay=decay*batch_size)
optimizer = optim.SGD(model.parameters(), lr=learning_rate/batch_size, momentum=momentum, dampening=0, weight_decay=decay*batch_size)

kwargs = {'num_workers': num_workers, 'pin_memory': True} if use_cuda else {}

resume_path = '/content/drive/MyDrive/NLP_and_CV_Projects/CV/Model/YOWO/backup/yowo_jaad_manual_tune5_coord100_ob25_e4_16f_checkpoint.pth'

# Load resume path if necessary
# if opt.resume_path:
if os.path.exists(resume_path):
    print("===================================================================")
    print('loading checkpoint {}'.format(resume_path))
    checkpoint = torch.load(resume_path) # opt.resume_path
    # opt.begin_epoch = checkpoint['epoch'] + 1
    begin_epoch = checkpoint['epoch'] + 1
    best_fscore = checkpoint['fscore']
    model.load_state_dict(checkpoint['state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer'])
    model.seen = checkpoint['epoch'] * nsamples
    print("Loaded model fscore: ", checkpoint['fscore'])
    print("===================================================================")

region_loss.seen  = model.seen
processed_batches = model.seen//batch_size

init_width        = 480 # int(net_options['width']) # 224 480 480
init_height       = 256 # int(net_options['height']) # 224 270 256
init_epoch        = model.seen//nsamples 

##########################

def adjust_learning_rate(optimizer, batch):
    lr = learning_rate
    for i in range(len(steps)):
        scale = scales[i] if i < len(scales) else 1
        if batch >= steps[i]:
            lr = lr * scale
            if batch == steps[i]:
                break
        else:
            break
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr/batch_size
    return lr



def train(epoch):
    global processed_batches
    t0 = time.time()
    cur_model = model.module
    region_loss.l_x.reset()
    region_loss.l_y.reset()
    region_loss.l_w.reset()
    region_loss.l_h.reset()
    region_loss.l_conf.reset()
    region_loss.l_cls.reset()
    region_loss.l_total.reset()

    train_loader = torch.utils.data.DataLoader(
        dataset.listDataset(basepath, trainlist, dataset_use=dataset_use, shape=(init_width, init_height),
                       shuffle=True,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                       ]), 
                       train=True, 
                       seen=cur_model.seen,
                       batch_size=batch_size,
                       clip_duration=clip_duration,
                       num_workers=num_workers),
        batch_size=batch_size, shuffle=False, **kwargs)

    lr = adjust_learning_rate(optimizer, processed_batches)
    logging('training at epoch %d, lr %f' % (epoch, lr))

    model.train()

    for batch_idx, (data, target) in enumerate(train_loader):
        # print(data.size)
        # print(target.size)
        adjust_learning_rate(optimizer, processed_batches)
        processed_batches = processed_batches + 1

        if use_cuda:
            data = data.cuda()

        optimizer.zero_grad()
        output = model(data)
        region_loss.seen = region_loss.seen + data.data.size(0)
        loss = region_loss(output, target)
        loss.backward()
        optimizer.step()

        # save result every 500 batches
        if processed_batches % 500 == 0: # From time to time, reset averagemeters to see improvements
            region_loss.l_x.reset()
            region_loss.l_y.reset()
            region_loss.l_w.reset()
            region_loss.l_h.reset()
            region_loss.l_conf.reset()
            region_loss.l_cls.reset()
            region_loss.l_total.reset()

    t1 = time.time()
    logging('trained with %f samples/s' % (len(train_loader.dataset)/(t1-t0)))
    print('')


loading checkpoint /content/drive/MyDrive/NLP_and_CV_Projects/CV/Model/YOWO/backup/yowo_jaad_manual_tune5_coord100_ob25_e4_16f_checkpoint.pth
Loaded model fscore:  0


In [78]:
print('epoch of checkpoint =',checkpoint['epoch'])

epoch of checkpoint = 4


# Test function

In [79]:
def test(epoch):
    def truths_length(truths):
        for i in range(50):
            if truths[i][1] == 0:
                return i

    test_loader = torch.utils.data.DataLoader(
    dataset.listDataset(basepath, testlist, dataset_use=dataset_use, shape=(init_width, init_height),
                   shuffle=False,
                   transform=transforms.Compose([
                       transforms.ToTensor()
                   ]), train=False),
    batch_size=batch_size, shuffle=False, **kwargs)

    num_classes = region_loss.num_classes
    anchors     = region_loss.anchors
    num_anchors = region_loss.num_anchors
    conf_thresh_valid = 0.005
    total       = 0.0
    proposals   = 0.0
    correct     = 0.0
    fscore = 0.0

    correct_classification = 0.0
    total_detected = 0.0

    nbatch      = file_lines(testlist) // batch_size

    logging('validation at epoch %d' % (epoch))
    model.eval()

    for batch_idx, (frame_idx, data, target) in enumerate(test_loader):
        if use_cuda:
            data = data.cuda()
        with torch.no_grad():
            output = model(data).data
            all_boxes = get_region_boxes(output, conf_thresh_valid, num_classes, anchors, num_anchors, 0, 1)
            #output (batch size, anchor*(4+1+num_classes), h, w) ; h, w: grid size
            for i in range(output.size(0)): # size(0): batch size
                boxes = all_boxes[i]
                boxes = nms(boxes, nms_thresh)
                if dataset_use == 'ucf101-24':
                    detection_path = os.path.join('ucf_detections', 'detections_'+str(epoch), frame_idx[i])
                    current_dir = os.path.join('ucf_detections', 'detections_'+str(epoch))
                    if not os.path.exists('ucf_detections'):
                        os.mkdir('ucf_detections')
                    if not os.path.exists(current_dir):
                        os.mkdir(current_dir)
                elif dataset_use == 'jaad': 
                    detection_path = os.path.join('jaad_detections_demo', 'detections'+str(num_vid), frame_idx[i])
                    current_dir = os.path.join('jaad_detections_demo', 'detections'+str(num_vid))
                    if not os.path.exists('jaad_detections_demo'):
                        os.mkdir('jaad_detections_demo')
                    if not os.path.exists(current_dir):
                        os.mkdir(current_dir)
                else:
                    detection_path = os.path.join(dataset_use+'_detections', 'detections_'+str(epoch), frame_idx[i])
                    current_dir = os.path.join(dataset_use+'_detections', 'detections_'+str(epoch))
                    if not os.path.exists(dataset_use+'_detections'):
                        os.mkdir(dataset_use+'_detections')
                    if not os.path.exists(current_dir):
                        os.mkdir(current_dir)
                # print all the boxes
                # boxes = n_boxes * [x_center, y_center, w, h, confidence score, prob of the activity, int of class predicted]
                with open(detection_path, 'w+') as f_detect:
                    for box in boxes:
                        x1 = round(float(box[0]-box[2]/2.0) * 320.0)
                        y1 = round(float(box[1]-box[3]/2.0) * 240.0)
                        x2 = round(float(box[0]+box[2]/2.0) * 320.0)
                        y2 = round(float(box[1]+box[3]/2.0) * 240.0)

                        det_conf = float(box[4])  # detected confidence > 0.25 is detected
                        for j in range((len(box)-5)//2):
                            cls_conf = float(box[5+2*j].item()) # class confidence

                            if type(box[6+2*j]) == torch.Tensor:
                                cls_id = int(box[6+2*j].item())
                            else:
                                cls_id = int(box[6+2*j])
                            prob = det_conf * cls_conf  # means = detected confidence * class confidence

                            # f_detect.write(str(int(box[6])+1) + ' ' + str(prob) + ' ' + str(x1) + ' ' + str(y1) + ' ' + str(x2) + ' ' + str(y2) + '\n')
                            f_detect.write(str(int(box[6])+1) + ' ' + str(det_conf) + ' ' + str(cls_conf) + ' ' + str(x1) + ' ' + str(y1) + ' ' + str(x2) + ' ' + str(y2) + '\n')
                
                truths = target[i].view(-1, 5)
                num_gts = truths_length(truths)
        
                total = total + num_gts  # total ground truths
    
                for i in range(len(boxes)): # for all proposal boxes in that frame
                    if boxes[i][4] > 0.25:
                        proposals = proposals+1

                for i in range(num_gts):
                    box_gt = [truths[i][1], truths[i][2], truths[i][3], truths[i][4], 1.0, 1.0, truths[i][0]]
                    best_iou = 0
                    best_j = -1
                    for j in range(len(boxes)): # find the best box (highest iou)
                        iou = bbox_iou(box_gt, boxes[j], x1y1x2y2=False)
                        if iou > best_iou:
                            best_j = j
                            best_iou = iou

                    if best_iou > iou_thresh:
                        total_detected += 1
                        if int(boxes[best_j][6]) == box_gt[6]: # correctly classified
                            correct_classification += 1

                    if best_iou > iou_thresh and int(boxes[best_j][6]) == box_gt[6]: # correct localization AND classification
                        correct = correct+1

            precision = 1.0*correct/(proposals+eps) # how accurate is the prediction
            recall = 1.0*correct/(total+eps)  # correctly localized AND classify
            fscore = 2.0*precision*recall/(precision+recall+eps)
            logging("[%d/%d] precision: %f, recall: %f, fscore: %f" % (batch_idx, nbatch, precision, recall, fscore))

    classification_accuracy = 1.0 * correct_classification / (total_detected + eps)  # accuracy of the classification if detected
    localization_recall = 1.0 * total_detected / (total + eps) # correct localization

    print("Classification accuracy: %.3f" % classification_accuracy)
    print("Localization recall: %.3f" % localization_recall)

    return classification_accuracy, localization_recall, precision, recall, fscore

# Main: Print demo results

In [80]:
test(0)

2021-01-23 04:25:55 validation at epoch 0


  cls_confs = torch.nn.Softmax()(Variable(output[5:5+num_classes].transpose(0,1))).data #softmax_class


2021-01-23 04:26:06 [0/19] precision: 0.000000, recall: 0.000000, fscore: 0.000000
2021-01-23 04:26:12 [1/19] precision: 0.000000, recall: 0.000000, fscore: 0.000000
2021-01-23 04:26:19 [2/19] precision: 0.000000, recall: 0.000000, fscore: 0.000000
2021-01-23 04:26:24 [3/19] precision: 0.000000, recall: 0.000000, fscore: 0.000000
2021-01-23 04:26:30 [4/19] precision: 0.000000, recall: 0.000000, fscore: 0.000000
2021-01-23 04:26:37 [5/19] precision: 0.000000, recall: 0.000000, fscore: 0.000000
2021-01-23 04:26:45 [6/19] precision: 0.000000, recall: 0.000000, fscore: 0.000000
2021-01-23 04:26:53 [7/19] precision: 0.000000, recall: 0.000000, fscore: 0.000000
2021-01-23 04:27:02 [8/19] precision: 0.000000, recall: 0.000000, fscore: 0.000000
2021-01-23 04:27:10 [9/19] precision: 0.000000, recall: 0.000000, fscore: 0.000000
2021-01-23 04:27:18 [10/19] precision: 0.000000, recall: 0.000000, fscore: 0.000000
2021-01-23 04:27:26 [11/19] precision: 0.000000, recall: 0.000000, fscore: 0.000000
20

(0.0, 0.0, 0.0, 0.0, 0.0)

In [None]:
'''
correct = iou>0.5 (location) AND class is correct
proposals = conf_score > 0.25

precision = correct/proposals, if I propose a box+class with confidence > 0.25, is it correct? 
recall = correct/total_ground_truths
fscore

Classification accuracy = classify_correct/total_detected, classification accuracy given detected
Localization recall = total_detected/total_gr_truths
'''