# Input Variables

In [1]:
arg = "test"
dataset = "BEST"
task = "origami"
lap = "1"
split = "1"
cuda = [0]

# Import 

In [2]:
import argparse
import yaml
import torch
import time
import os

from addict import Dict

from main.util import make_dirs, AverageMeter, data_augmentation, accuracy, sec2str
from main.loss import diversity_loss, disparity_loss
from main.dataset import SkillDataSet
from main.model import RAAN
from main.trainrun import Train_Runner, earlystopping

# Args

In [3]:
# yaml args
args = Dict(yaml.safe_load(open(os.path.join('args',arg+'.yaml'))))

# show args 
print(('\n''[Arguements]\n''{0}\n''\n'.format(args)))

# device setting
cuda = list(map(str, cuda))
os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(cuda)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


[Arguements]
{'data_path': '../../local/dataset/skill', 'ckpt_path': './ckpt/models', 'writer_path': './ckpt/logs', 'num_samples': 400, 'input_size': 1024, 'output_size': 256, 'attention': True, 'num_filters': 3, 'diversity_loss': True, 'disparity_loss': 'v1', 'rank_aware_loss': 'v1', 'lambda_param': 0.1, 'm1': 1.0, 'm2': 0.05, 'm3': 0.15, 'temporal_model': False, 'num_layers': 9, 'num_f_maps': 256, 'epochs': 3, 'transform': True, 'batch_size': 8, 'lr': 0.0001, 'workers': 4, 'start_epoch': 1, 'print_freq': 1, 'eval_freq': 2, 'ckpt_freq': 2, 'earlystopping': 20}




# Paths

In [4]:
# path
if dataset == "BEST":
    train_txt = "train.txt"
    val_txt = "test.txt"
elif dataset == "EPIC-Skills":
    train_txt = "train_split" + split + ".txt"
    val_txt = "test_split" + split + ".txt"
train_list = os.path.join(args.data_path, dataset, 'splits', task, train_txt)
valid_list = os.path.join(args.data_path, dataset, 'splits', task, val_txt)
feature_path = os.path.join(args.data_path, dataset, 'features', task)

# paths dict
paths = {'train_list': train_list, 'valid_list': valid_list, 'feature_path': feature_path}


# Models

In [5]:
### attention branch ###
# → 2branch(pos and neg)
if args.disparity_loss and args.rank_aware_loss:
    model_attention = {'pos': None, 'neg': None}
# → 1branch
else:
    model_attention = {'att': None}
# attention model
for k in model_attention.keys():
    model_attention[k] = RAAN(args, uniform=False, tcn=args.temporal_model)
    model_attention[k] = model_attention[k].to(device)

### uniform branch ###
if args.disparity_loss:
    model_uniform = RAAN(args, uniform=True, tcn=False)
    model_uniform = model_uniform.to(device)
else:
    model_uniform = None

# models dict
models = {"attention" : model_attention , "uniform" : model_uniform}

# Dataloader

In [6]:
# train_data = train_vid_list.txt 
train_loader = torch.utils.data.DataLoader(
    SkillDataSet(paths["feature_path"], paths["train_list"], ftr_tmpl='{}_{}.npz'),
    batch_size=args.batch_size,
    shuffle=True,
    num_workers=args.workers,
    pin_memory=True)

# validation_data = test_vid_list.txt
valid_loader = torch.utils.data.DataLoader(
    SkillDataSet(paths["feature_path"], paths["valid_list"], ftr_tmpl='{}_{}.npz'),
    batch_size=args.batch_size,
    shuffle=False,
    num_workers=args.workers,
    pin_memory=True)

# dataloaders dict
dataloaders = {"train" : train_loader, "valid" : valid_loader}

#iterator
train_iterator = iter(dataloaders["train"])
valid_iterator = iter(dataloaders["valid"])

# Loss

In [7]:
# lossses
ranking_loss = torch.nn.MarginRankingLoss(margin=args.m1)

# criterions dict
criterions = {"ranking" : ranking_loss, "disparity" : disparity_loss, "diversity" : diversity_loss}

# Without uniform

In [80]:
# model train mode
model = models["attention"][list(models["attention"].keys())[0]]
model.train()

RAAN(
  (att_net): ModuleList(
    (0): Sequential(
      (0): Linear(in_features=1024, out_features=256, bias=True)
      (1): ReLU()
      (2): Linear(in_features=256, out_features=1, bias=True)
      (3): Softmax(dim=1)
    )
    (1): Sequential(
      (0): Linear(in_features=1024, out_features=256, bias=True)
      (1): ReLU()
      (2): Linear(in_features=256, out_features=1, bias=True)
      (3): Softmax(dim=1)
    )
    (2): Sequential(
      (0): Linear(in_features=1024, out_features=256, bias=True)
      (1): ReLU()
      (2): Linear(in_features=256, out_features=1, bias=True)
      (3): Softmax(dim=1)
    )
  )
  (fc): Sequential(
    (0): Linear(in_features=1024, out_features=1, bias=True)
    (1): Tanh()
  )
)

In [81]:
# iterator
vid_pos, vid_neg, vid_list = next(train_iterator)
batch_size = vid_pos.size(0)
vid_pos_gpu = vid_pos.to(device)
vid_neg_gpu = vid_neg.to(device)

# make target label
target = torch.ones(vid_pos.size(0))
target = target.to(device)

# calc score, attention
score_pos, att_pos = model(vid_pos_gpu)
score_neg, att_neg = model(vid_neg_gpu)

# mean all filter
score_pos = score_pos.mean(dim=1)
score_neg = score_neg.mean(dim=1)

In [89]:
vid_pos.size(0)

8

# With uniform

In [8]:
# model train mode
for k in models["attention"].keys():
    models["attention"][k].train()
models["uniform"].train()

RAAN(
  (att_net): ModuleList(
    (0): Sequential(
      (0): Linear(in_features=1024, out_features=256, bias=True)
      (1): ReLU()
      (2): Linear(in_features=256, out_features=1, bias=True)
      (3): Softmax(dim=1)
    )
  )
  (fc): Sequential(
    (0): Linear(in_features=1024, out_features=1, bias=True)
    (1): Tanh()
  )
)

In [19]:
# iterator
vid_pos, vid_neg, vid_list = next(train_iterator)
batch_size = vid_pos.size(0)
vid_pos_gpu = vid_pos.to(device)
vid_neg_gpu = vid_neg.to(device)

# make target label
target = torch.ones(vid_pos.size(0))
target = target.to(device)

In [20]:
# calc score, attention
all_score_pos, all_score_neg, score_pos, score_neg, att_pos, att_neg = {}, {}, {}, {}, {}, {}
### attention model ###
for k in models["attention"].keys():
    all_score_pos[k], att_pos[k] = models["attention"][k](vid_pos_gpu)
    all_score_neg[k], att_neg[k] = models["attention"][k](vid_neg_gpu)
    # mean all filter
    score_pos[k] = all_score_pos[k].mean(dim=1)
    score_neg[k] = all_score_neg[k].mean(dim=1)
### uniform model ###
score_pos_uniform, _ = models["uniform"](vid_pos_gpu)
score_neg_uniform, _ = models["uniform"](vid_neg_gpu)
# mean all filter
score_pos_uniform = score_pos_uniform.mean(dim=1)
score_neg_uniform = score_neg_uniform.mean(dim=1)

In [21]:
all_score_pos

{'pos': tensor([[-0.0236, -0.0246, -0.0240],
         [-0.2008, -0.2023, -0.2010],
         [-0.2608, -0.2578, -0.2570],
         [-0.2276, -0.2294, -0.2276],
         [-0.2608, -0.2578, -0.2570],
         [-0.0808, -0.0794, -0.0774],
         [-0.0729, -0.0706, -0.0701],
         [-0.0808, -0.0794, -0.0774]], device='cuda:0', grad_fn=<StackBackward>),
 'neg': tensor([[ 0.0058,  0.0039,  0.0035],
         [ 0.1952,  0.1918,  0.1908],
         [ 0.0279,  0.0277,  0.0304],
         [ 0.2023,  0.1994,  0.2009],
         [ 0.0279,  0.0277,  0.0304],
         [-0.1775, -0.1749, -0.1778],
         [-0.2455, -0.2464, -0.2462],
         [-0.1775, -0.1749, -0.1778]], device='cuda:0', grad_fn=<StackBackward>)}

In [22]:
score_pos

{'pos': tensor([-0.0240, -0.2014, -0.2585, -0.2282, -0.2585, -0.0792, -0.0712, -0.0792],
        device='cuda:0', grad_fn=<MeanBackward1>),
 'neg': tensor([ 0.0044,  0.1926,  0.0287,  0.2009,  0.0287, -0.1768, -0.2460, -0.1768],
        device='cuda:0', grad_fn=<MeanBackward1>)}

In [23]:
score_pos[list(models["attention"].keys())[0]]

tensor([-0.0240, -0.2014, -0.2585, -0.2282, -0.2585, -0.0792, -0.0712, -0.0792],
       device='cuda:0', grad_fn=<MeanBackward1>)

In [24]:
score_pos_all = torch.zeros(score_pos[list(models["attention"].keys())[0]].data.shape)
score_pos_all

tensor([0., 0., 0., 0., 0., 0., 0., 0.])

In [25]:
a = torch.zeros(batch_size)
a

tensor([0., 0., 0., 0., 0., 0., 0., 0.])