 # Variables

# Import 

In [1]:
import argparse
import yaml
import torch
import time
import os
import glob
import numpy as np

from torch import nn
import torch.nn.functional as F

from addict import Dict

from main.util import make_dirs, AverageMeter, data_augmentation, accuracy, sec2str
from main.loss import diversity_loss, disparity_loss
from main.dataset import SkillDataSet
from main.model import RAAN
from main.trainrun import Train_Runner, earlystopping

In [2]:
# For Train & Eval
opts = {"arg" : "tcn",
        "dataset" : "BEST",
        "task" : "apply_eyeliner",
        "lap" : "40",
        "split" : "1",
        "cuda" : [0]
        }

opts = Dict(opts)

# Args

In [3]:
# yaml args
args = Dict(yaml.safe_load(open(os.path.join('args',opts.arg+'.yaml'))))
input_size = {"1d": 1024, "2d": 512}
args.input_size = input_size[args.input_feature]

# show args 
print(('\n''[Arguements]\n''{0}\n''\n'.format(args)))

# device setting
opts.cuda = list(map(str, opts.cuda))
os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(opts.cuda)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


[Arguements]
{'data_path': '../../local/dataset/skill', 'ckpt_path': './ckpt/models', 'result_path': './ckpt/results', 'writer_path': './ckpt/logs', 'demo_path': './demo/videos', 'video_sets': 'videos', 'input_feature': '1d', 'input_samples': 400, 'spatial_attention': False, 'spatial_attention_f_maps': 512, 'temporal_attention_samples': 400, 'temporal_attention_size': 256, 'temporal_attention_filters': 3, 'temporal_model': 'TCN', 'num_layers': 9, 'num_f_maps': 512, 'diversity_loss': True, 'disparity_loss': 'v1', 'rank_aware_loss': 'v1', 'lambda_param': 0.1, 'm1': 1.0, 'm2': 0.05, 'm3': 0.15, 'epochs': 2000, 'transform': True, 'batch_size': 64, 'lr': 0.0001, 'workers': 4, 'start_epoch': 1, 'print_freq': 5, 'eval_freq': 10, 'ckpt_freq': 5, 'earlystopping': 20, 'input_size': 1024}




# Paths

In [6]:
# BEST dataset
if opts.dataset == "BEST":
    train_txt = "train.txt"
    val_txt = "test.txt"
    savedir = os.path.join(opts.dataset, opts.task, opts.arg, "lap_"+opts.lap)

# Epic-skills dataset
elif opts.dataset == "EPIC-Skills":
    train_txt = "train_split" + opts.split + ".txt"
    val_txt = "test_split" + opts.split + ".txt"
    savedir = os.path.join(opts.dataset, opts.task, opts.arg, opts.split, "lap_"+opts.lap)

# paths dict
train_list, valid_list, feature_path, writedir, ckptdir, _ = make_dirs(args, opts, train_txt, val_txt, savedir)
paths = {'train_list': train_list, 'valid_list': valid_list, 
         'feature_path': feature_path, 'writedir': writedir, 'ckptdir': ckptdir}

In [7]:
paths["feature_path"]

'../../local/dataset/skill/BEST/features/apply_eyeliner'

# Models

In [8]:
### attention branch ###
# → 2branch(pos and neg)
if args.disparity_loss and args.rank_aware_loss:
    model_attention = {'p_att': None, 'n_att': None}
# → 1branch
else:
    model_attention = {'att': None}
# attention model
for k in model_attention.keys():
    model_attention[k] = RAAN(args, uniform=False)
    model_attention[k] = model_attention[k].to(device)

### uniform branch ###
if args.disparity_loss:
    model_uniform = RAAN(args, uniform=True)
    model_uniform = model_uniform.to(device)
else:
    model_uniform = None

# models dict
models = {"attention" : model_attention , "uniform" : model_uniform}
att_branches = models["attention"].keys()

# Dataloader

In [9]:
# train_data = train_vid_list.txt 
train_loader = torch.utils.data.DataLoader(
    SkillDataSet(paths["feature_path"], paths["train_list"], input_feature=args.input_feature),
    batch_size=args.batch_size,
    shuffle=True,
    num_workers=args.workers,
    pin_memory=True)

# validation_data = test_vid_list.txt
valid_loader = torch.utils.data.DataLoader(
    SkillDataSet(paths["feature_path"], paths["valid_list"], input_feature=args.input_feature),
    batch_size=args.batch_size,
    shuffle=False,
    num_workers=args.workers,
    pin_memory=True)

# dataloaders dict
dataloaders = {"train" : train_loader, "valid" : valid_loader}

#iterator
train_iterator = iter(dataloaders["train"])
valid_iterator = iter(dataloaders["valid"])

In [11]:
len(dataloaders["train"].dataset)

1520

# Loss

In [17]:
# lossses
ranking_loss = torch.nn.MarginRankingLoss(margin=args.m1)

# criterions dict
criterions = {"ranking" : ranking_loss, "disparity" : disparity_loss, "diversity" : diversity_loss}

# ========
# 1. Train  
# ========

# 1.1 Without uniform

In [10]:
# model train mode
model = models["attention"][list(att_branches)[0]]
model.train()

RAAN(
  (temporal_pooling): AdaptiveAvgPool1d(output_size=400)
  (att_net): ModuleList(
    (0): Sequential(
      (0): Linear(in_features=1024, out_features=256, bias=True)
      (1): ReLU()
      (2): Linear(in_features=256, out_features=1, bias=True)
      (3): Softmax(dim=1)
    )
    (1): Sequential(
      (0): Linear(in_features=1024, out_features=256, bias=True)
      (1): ReLU()
      (2): Linear(in_features=256, out_features=1, bias=True)
      (3): Softmax(dim=1)
    )
    (2): Sequential(
      (0): Linear(in_features=1024, out_features=256, bias=True)
      (1): ReLU()
      (2): Linear(in_features=256, out_features=1, bias=True)
      (3): Softmax(dim=1)
    )
  )
  (fc): Sequential(
    (0): Linear(in_features=1024, out_features=1, bias=True)
    (1): Tanh()
  )
)

In [None]:
# iterator
vid_pos, vid_neg, vid_list = next(train_iterator)
batch_size = vid_pos.size(0)
vid_pos_gpu = vid_pos.to(device)
vid_neg_gpu = vid_neg.to(device)

# make target label
target = torch.ones(vid_pos.size(0))
target = target.to(device)

# calc score, attention
score_pos, att_pos = model(vid_pos_gpu)
score_neg, att_neg = model(vid_neg_gpu)

# mean all filter
score_pos = score_pos.mean(dim=1)
score_neg = score_neg.mean(dim=1)

In [18]:
vid_pos, vid_neg, vid_list = next(train_iterator)
batch_size = vid_pos.size(0)
vid_pos_gpu = vid_pos.to(device)
vid_neg_gpu = vid_neg.to(device)

In [None]:
def data_augmentation(input_var1, input_var2, device):
    noise = torch.autograd.Variable(torch.normal(torch.zeros(input_var1.size()[1],
                                                             input_var1.size()[2]),
                                                 0.01)).to(device)
    input_var1 = torch.add(input_var1, noise)
    input_var2 = torch.add(input_var2, noise)
    return input_var1, input_var2

vid_pos_gpu, vid_neg_gpu = data_augmentation(vid_pos_gpu, vid_neg_gpu, self.device)

In [20]:
vid_pos_gpu.size()[2]

1024

# 1.2 With uniform

In [None]:
# model train mode
for k in att_branches:
    models["attention"][k].train()
models["uniform"].train()

In [None]:
# iterator
vid_pos, vid_neg, vid_list = next(train_iterator)
batch_size = vid_pos.size(0)
vid_pos_gpu = vid_pos.to(device)
vid_neg_gpu = vid_neg.to(device)

# make target label
target = torch.ones(vid_pos.size(0))
target = target.to(device)

In [None]:
vid_pos_gpu.size()

# 1.3 Valid

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# ヒートマップ表示
plt.figure(figsize=(20,2))
plt.imshow(a1,interpolation='nearest',vmin=0,vmax=1/200,cmap='jet')
plt.colorbar()
plt.show()

# ===========
# 2. Evaluate  
# ===========

In [None]:
# attach model weight
if is_best:
    weight_path = glob.glob(os.path.join(ckptdir, 'best_score*'))[0] 
else::
    weight_path = glob.glob(os.path.join(ckptdir, 'epoch_' + str(epoch).zfill(4) + '*'))[0]
    
for k in att_branches:
    models["attention"][k].load_state_dict(torch.load(weight_path)["state_dict_" + k])
        
print(torch.load(weight_path)["prec_score"])
print(torch.load(weight_path)["epoch"])

In [None]:
# iterator
vid_pos, vid_neg, vid_list = next(valid_iterator)
batch_size = vid_pos.size(0)
vid_pos_gpu = vid_pos.to(device)
vid_neg_gpu = vid_neg.to(device)
score_list["pos"].extend(vid_list["pos"]) 
score_list["neg"].extend(vid_list["neg"])

# calc score, attention
all_score_pos, all_score_neg, score_pos, score_neg, att_pos, att_neg = {}, {}, {}, {}, {}, {}
final_score_pos = torch.zeros(batch_size).to(device)
final_score_neg = torch.zeros(batch_size).to(device)
for k in att_branches:
    all_score_pos[k], att_pos[k] = models["attention"][k](vid_pos_gpu)
    all_score_neg[k], att_neg[k] = models["attention"][k](vid_neg_gpu)
    score_pos[k] = all_score_pos[k].mean(dim=1)
    score_neg[k] = all_score_neg[k].mean(dim=1)
    att_pos[k] = att_pos[k].mean(dim=2)
    att_neg[k] = att_neg[k].mean(dim=2)
    if att_list["pos_"+k] is None:
        att_list["pos_"+k] = np.squeeze(att_pos[k].cpu().detach().numpy(), 2)
        att_list["neg_"+k] = np.squeeze(att_neg[k].cpu().detach().numpy(), 2)
    else:
        att_list["pos_"+k] = np.concatenate((att_list["pos_"+k], np.squeeze(att_pos[k].cpu().detach().numpy(), 2)), axis=0)
        att_list["neg_"+k] = np.concatenate((att_list["neg_"+k], np.squeeze(att_neg[k].cpu().detach().numpy(), 2)), axis=0)
    final_score_pos += score_pos[k].data
    final_score_neg += score_neg[k].data
score_list["pos_score"].extend(final_score_pos.cpu().numpy())
score_list["neg_score"].extend(final_score_neg.cpu().numpy())

In [None]:
correct_list = [pos_score_list[i]>neg_score_list[i] for i in range(len(pos_score_list))]

In [None]:
# dataframe
eval_df = pd.DataFrame({'vid_pos' : score_list["pos"], 
                        'vid_pos_score' : score_list["pos_score"], 
                        'vid_neg' : score_list["neg"], 
                        'vid_neg_score' : score_list["neg_score"], 
                        'correct' : correct_list
                       })
eval_df.index = np.arange(1,len(pos_score_list)+1)
eval_df.to_csv("test.csv")

In [None]:
for k in att_list:
    att_df = pd.DataFrame(att_list[k],
                          index = score_list[k[0:3]]
    )
    att_df.columns = np.arange(1,401)
    att_df.to_csv(os.path.join("a.csv"))

# Test

In [149]:
class Spatial_Attention(nn.Module):
    def __init__(self, f_maps):
        super(Spatial_Attention, self).__init__()
        
        self.conv1 = nn.Conv2d(512,f_maps,3,1,1)
        self.attention_layer = nn.Sequential(
                               nn.Conv2d(512,f_maps,3,1,1),
                               nn.InstanceNorm2d(f_maps),
                               nn.ReLU()
                               )
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        
        y = x.view(-1, x.size(2), x.size(3), x.size(4))
                       
        z = self.attention_layer(y)
        
        attention = self.softmax(torch.mean(y, dim=1).view(y.size(0), -1)).view(x.size(0), x.size(1), 1, -1, y.size(-1))

        output = F.relu((self.conv1(y).view(x.size(0), x.size(1), -1, x.size(3), x.size(4)))*attention)

        return attention, output
    
att = Spatial_Attention(256)

In [150]:
# batch * sample * c * h * w
in_ten = torch.randn((5, 40, 512, 2, 2))

In [151]:
attention, out_ten = att(in_ten)

In [152]:
attention.size()

torch.Size([5, 40, 1, 2, 2])

In [153]:
out_ten.size()

torch.Size([5, 40, 256, 2, 2])

In [8]:
a = torch.randn(1)
b = torch.randn(1)
c = torch.gt(a, a)

In [3]:
vid_paths = glob.glob(os.path.join("data/BEST/vid_frames", "origami", '*'))

In [5]:
vid_name = [os.path.splitext(os.path.basename(path))[0] for path in vid_paths]

In [9]:
print(sum(os.path.isfile(os.path.join(vid_paths[0], name)) for name in os.listdir(vid_paths[0])))

15981


In [10]:
vid_paths[0]

'data/BEST/vid_frames/origami/LLEyDoh-JRQ'

In [94]:
frame_count = 24
frame_samples = 5
rate = frame_count/frame_samples
center = (frame_count/frame_samples)//2
sample_list = []
for j in range(1,frame_samples+1):
    sample_list.append(int((j*rate)//1-center))

In [95]:
sample_list

[2, 7, 12, 17, 22]

In [198]:
# Get sample frames list
def uniform_num(count, sample):
    rate = count/sample
    sample_list = []
    j=1
    for j in range(1,sample+1):
        sample_list.append(int((j*rate)//1))
        j += 1
    return sample_list

list = uniform_num(40, 10)

In [199]:
list

[4, 8, 12, 16, 20, 24, 28, 32, 36, 40]

In [120]:
result_path = 'ckpt/results/BEST'
task = 'apply_eyeliner'
arg = 'tcn'
lap = '1'
data_path = os.path.join(result_path, task, arg, "lap_"+lap, "best_epoch_pos_p_att.csv")

In [147]:
import pandas as pd
video_info_df = pd.read_csv(data_path)

In [153]:
df = video_info_df[video_info_df["Unnamed: 0"] == "494jOGutTwU"][0:1]
df

Unnamed: 0.1,Unnamed: 0,1,2,3,4,5,6,7,8,9,...,391,392,393,394,395,396,397,398,399,400
0,494jOGutTwU,0.002475,0.002459,0.002471,0.002421,0.002411,0.002413,0.002464,0.002436,0.002408,...,0.002499,0.002536,0.002508,0.002524,0.002522,0.002584,0.002571,0.002576,0.002595,0.002602


In [151]:
a1 = df[list(str(i) for i in range(1,401))].values

In [152]:
a1.shape

(1, 400)

In [5]:
# Get sample frames list
def shrink_uniform_num(count, sample):
    rate = count/sample
    sample_list = []
    j=1
    for j in range(1,sample+1):
        sample_list.append(int((j*rate)//1))
        j += 1
    return sample_list

In [20]:
# Get sample frames list
def expand_uniform_num(count, sample):
    rate = int((sample/count)//1)
    rest = sample%count
    sample_list = []
    uni_list = shrink_uniform_num(count, rest)
    for j in range(1, count+1):
        if j in uni_list:
            for k in range(rate+1):
                sample_list.append(j)
        else:
            for k in range(rate):
                sample_list.append(j)
        
    return sample_list

alist = expand_uniform_num(12, 20)

[1, 3, 4, 6, 7, 9, 10, 12]

In [190]:
# Get sample frames list
frame_count = 300
frame_samples = 400
rate = int((frame_samples/frame_count)//1)
rest = frame_samples%frame_count
sample_list = []
for j in range(1, frame_count+1):
    if j <= rest:
        for k in range(rate+1):
            sample_list.append(j)
    else:
        for k in range(rate):
            sample_list.append(j)
        

In [191]:
print(rate)
print(rest)

1
100


In [189]:
print(sample_list)

[1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30, 30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37, 38, 38, 39, 39, 40, 40, 41, 41, 42, 42, 43, 43, 44, 44, 45, 45, 46, 46, 47, 47, 48, 48, 49, 49, 50, 50, 51, 51, 52, 52, 53, 53, 54, 54, 55, 55, 56, 56, 57, 57, 58, 58, 59, 59, 60, 60, 61, 61, 62, 62, 63, 63, 64, 64, 65, 65, 66, 66, 67, 67, 68, 68, 69, 69, 70, 70, 71, 71, 72, 72, 73, 73, 74, 74, 75, 75, 76, 76, 77, 77, 78, 78, 79, 79, 80, 80, 81, 81, 82, 82, 83, 83, 84, 84, 85, 85, 86, 86, 87, 87, 88, 88, 89, 89, 90, 90, 91, 91, 92, 92, 93, 93, 94, 94, 95, 95, 96, 96, 97, 97, 98, 98, 99, 99, 100, 100, 101, 101, 102, 102, 103, 103, 104, 104, 105, 105, 106, 106, 107, 107, 108, 108, 109, 109, 110, 110, 111, 111, 112, 112, 113, 113, 114, 114, 115, 115, 116, 116, 117, 117, 118, 118, 119, 119, 120, 120, 121, 121,

In [179]:
print(len(sample_list))

40


In [3]:
import os


In [4]:
args_paths = glob.glob('{}/*/{}*/apply_eyeliner'.format('demo/results', 'lap_'))
arglist = [os.path.basename(os.path.dirname(x)) for x in args_paths]
laplist = [os.path.basename(x)[-2] for x in args_paths]

In [5]:
args_paths

['demo/results/origin/lap_1/apply_eyeliner']

In [14]:
for i in args_paths:
    print(os.path.basename(os.path.dirname(i)))

origin
