In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import wandb
import os, sys
import glob
import numpy as np
import torch
import pandas as pd
import random
import torch.nn as nn
import pickle
import torch.nn.functional as F
import matplotlib.pyplot as plt
import multiprocessing as mp
from time import time

In [3]:
from mstcn_model import *
from utility.adaptive_data_loader import Breakfast, collate_fn_override
from utility.adaptive_data_loader import BreakfastWithWeights, collate_fn_override_wtd
from utils import calculate_mof, dotdict

In [4]:
os.environ["WANDB_API_KEY"] = "992b3b1371ba79f48484cfca522b3786d7fa52c2"
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mdipika_singhania[0m (use `wandb login --relogin` to force relogin)


True

In [5]:
seed = 42

# Ensure deterministic behavior
def set_seed():
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.cuda.manual_seed(seed)
set_seed()

# Device configuration
os.environ['CUDA_VISIBLE_DEVICES']='7'
# os.environ['CUDA_LAUNCH_BLOCKING']='6'
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [6]:
config = dotdict(
    epochs=500,
    num_class=48,
    batch_size=8,
    learning_rate=5e-4,
    weight_decay=0,
    dataset="Breakfast",
    architecture="unet-ensemble",
    features_file_name="/mnt/ssd/all_users/dipika/ms_tcn/data/breakfast/features/",
    chunk_size=1,
    max_frames_per_video=1200,
    feature_size=2048,
    ground_truth_files_dir="/mnt/ssd/all_users/dipika/ms_tcn/data/breakfast/groundTruth/",
    label_id_csv="/mnt/ssd/all_users/dipika/ms_tcn/data/breakfast/mapping.csv",
    gamma=0.1,
    step_size=500,
    split=1,
#     output_dir="/mnt/data/ar-datasets/dipika/breakfast/ms_tcn/data/breakfast/results/unsuper-finetune-split2-0.05-data-llr/",
    output_dir="/mnt/ssd/all_users/dipika/ms_tcn/data/breakfast//results/em-random-select4/",
    project_name="breakfast-split-1",
    train_split_file="/mnt/ssd/all_users/dipika/ms_tcn/data/breakfast/splits/train.split{}.bundle",
    test_split_file="/mnt/ssd/all_users/dipika/ms_tcn/data/breakfast/splits/test.split{}.bundle",
    all_files="/mnt/ssd/all_users/dipika/ms_tcn/data/breakfast/splits/all_files.txt",
    cutoff=8,
    data_per = 0.2,
    budget=40,
    semi_supervised_split="/mnt/ssd/all_users/dipika/ms_tcn/data/breakfast/semi_supervised/train.split{}_amt{}.bundle")

config.train_split_file = config.train_split_file.format(config.split)
config.semi_supervised_split = config.semi_supervised_split.format(config.split, config.data_per)
config.test_split_file = config.test_split_file.format(config.split)

if not os.path.exists(config.output_dir):
    os.mkdir(config.output_dir)

config.output_dir = config.output_dir + f"split{config.split}"
if not os.path.exists(config.output_dir):
    os.mkdir(config.output_dir)
config.output_dir = config.output_dir + "/"
if not os.path.exists(os.path.join(config.output_dir, "posterior_weights")):
    os.mkdir(os.path.join(config.output_dir, "posterior_weights"))
print(config)

{'epochs': 500, 'num_class': 48, 'batch_size': 8, 'learning_rate': 0.0005, 'weight_decay': 0, 'dataset': 'Breakfast', 'architecture': 'unet-ensemble', 'features_file_name': '/mnt/ssd/all_users/dipika/ms_tcn/data/breakfast/features/', 'chunk_size': 1, 'max_frames_per_video': 1200, 'feature_size': 2048, 'ground_truth_files_dir': '/mnt/ssd/all_users/dipika/ms_tcn/data/breakfast/groundTruth/', 'label_id_csv': '/mnt/ssd/all_users/dipika/ms_tcn/data/breakfast/mapping.csv', 'gamma': 0.1, 'step_size': 500, 'split': 1, 'output_dir': '/mnt/ssd/all_users/dipika/ms_tcn/data/breakfast//results/em-random-select4/split1/', 'project_name': 'breakfast-split-1', 'train_split_file': '/mnt/ssd/all_users/dipika/ms_tcn/data/breakfast/splits/train.split1.bundle', 'test_split_file': '/mnt/ssd/all_users/dipika/ms_tcn/data/breakfast/splits/test.split1.bundle', 'all_files': '/mnt/ssd/all_users/dipika/ms_tcn/data/breakfast/splits/all_files.txt', 'cutoff': 8, 'data_per': 0.2, 'budget': 40, 'semi_supervised_split':

In [7]:
traindataset = BreakfastWithWeights(config, fold='train', fold_file_name=config.train_split_file)
testdataset = Breakfast(config, fold='test', fold_file_name=config.test_split_file)

Number of videos logged in train fold is 1460
Number of videos not found in train fold is 0
Number of videos logged in test fold is 252
Number of videos not found in test fold is 0


In [8]:
def _init_fn(worker_id):
    np.random.seed(int(seed))
trainloader = torch.utils.data.DataLoader(dataset=traindataset,
                                          batch_size=config.batch_size, 
                                          shuffle=True,
                                          pin_memory=True, num_workers=4, 
                                          collate_fn=lambda x: collate_fn_override_wtd(x, config.max_frames_per_video),
                                          worker_init_fn=_init_fn)
testloader = torch.utils.data.DataLoader(dataset=testdataset,
                                          batch_size=config.batch_size, 
                                          shuffle=False,
                                          pin_memory=True, num_workers=4,
                                          collate_fn=lambda x: collate_fn_override(x, config.max_frames_per_video),
                                          worker_init_fn=_init_fn)

trainloder_expectation = torch.utils.data.DataLoader(dataset=traindataset,
                                          batch_size=20,
                                          shuffle=True,
                                          pin_memory=True, num_workers=4, 
                                          collate_fn=lambda x: collate_fn_override_wtd(x, config.max_frames_per_video),
                                          worker_init_fn=_init_fn)

In [9]:
df = pd.read_csv(config.label_id_csv)
label_id_to_label_name = {}
label_name_to_label_id_dict = {}
for i, ele in df.iterrows():
    label_id_to_label_name[ele.label_id] = ele.label_name
    label_name_to_label_id_dict[ele.label_name] = ele.label_id

In [10]:
# selected_frames_dict = pickle.load(open("data/breakfast_len_assum_annotations.pkl", 'rb'))
# loaded_vidid_selected_frames
boundary_frames_dict = pickle.load(open("data/breakfast_boundary_annotations.pkl", "rb"))
num_boundary = 0
for key in boundary_frames_dict.keys():
    num_boundary += len(boundary_frames_dict[key])
# video_id_boundary_frames

In [11]:
selected_frames_dict = pickle.load(open("data/breakfast_random4frame_selection.pkl", "rb"))
# print(selected_frames_dict)

In [12]:
loaded_mean_var_actions = pickle.load(open("data/breakfast_meanvar_actions.pkl", "rb"))
mat_poisson = pickle.load(open("data/breakfast_possion_class_dict.pkl", "rb"))

def get_possion_prob(minlen, maxlen, cur_class):
    prob = mat_poisson[label_id_to_label_name[cur_class]][minlen:maxlen]
    return torch.tensor(prob)

def get_poisson_logcdf(minlen, cur_class):
    return np.log(np.sum(np.exp(mat_poisson[label_id_to_label_name[cur_class]][minlen:])) + 1e-20)

def get_possion_prob_for_all_class(minlen, maxlen):
    ele_list = []
    for i in range(config.num_class):
        prob = mat_poisson[label_id_to_label_name[i]][minlen:maxlen]
        ele_list.append(torch.tensor(prob))
    return torch.stack(ele_list, dim=-1)

In [13]:
def validate(model, dataloader, best_val_acc=None):
    model.eval()
    print("Calculating Validation Data Accuracy")
    correct = 0.0
    total = 0.0
    for i, item in enumerate(testloader):
        with torch.no_grad():
            item_0 = item[0].to(device)
            item_1 = item[1].to(device)
            item_2 = item[2].to(device)
            src_mask = torch.arange(item_2.shape[1], device=item_2.device)[None, :] < item_1[:, None]
            src_mask_mse = src_mask.unsqueeze(1).to(torch.float32).to(device)
            middle_pred, predictions = model(item_0, src_mask_mse)
            pred = torch.argmax(predictions[-1], dim=1)
            correct += float(torch.sum((pred == item_2) * src_mask).item())
            total += float(torch.sum(src_mask).item())
    val_acc = correct * 100.0 / total
    if best_val_acc is not None and val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), config.output_dir + "ms-tcn-emmax-best-model.wt")
    torch.save(model.state_dict(), config.output_dir + "ms-tcn-emmax-last-model.wt")
    print(f"Validation:: Probability Accuracy {val_acc}")
    _ = model.train()
    return val_acc, best_val_acc

In [14]:
def prob_vals_per_segment(selected_frames, cur_vid_feat, labels, first_ele_flag, last_ele_flag, vidid, gt_labels):
    prob_each_segment = []
    LOW_VAL = -10000000
    num_frames = len(cur_vid_feat)
    log_probs = torch.log(cur_vid_feat + 1e-8)
    cumsum_feat = torch.cumsum(log_probs, dim=0)
    prev_boundary = 0
    per_frame_weights = torch.zeros((num_frames, config.num_class))
    start_time = time()
    boundary_error = 0
    current_boundary = 0
    labels = [config.num_class-1] + labels if selected_frames[0] != 0 else labels
    labels = labels + [config.num_class-1] if selected_frames[-1] != num_frames-1 else labels
    selected_frames = [0] + selected_frames if selected_frames[0] != 0 else selected_frames
    selected_frames = selected_frames + [num_frames-1] if selected_frames[-1] != num_frames-1 else selected_frames

    for i, cur_ele in enumerate(selected_frames[:-1]):
        next_ele = selected_frames[i + 1]
        label_cur_ele = labels[i]
        label_next_ele = labels[i + 1]
        if cur_ele == next_ele-1:
            per_frame_weights[cur_ele, label_cur_ele] = 1.0
            if label_cur_ele != label_next_ele:
                prev_boundary = cur_ele
            continue
        
        seg_len = next_ele - cur_ele
        mat_b1_b2_c_prob = LOW_VAL * torch.ones((seg_len, seg_len, config.num_class), dtype=cumsum_feat.dtype)
        b1_prior = get_possion_prob(cur_ele-prev_boundary, next_ele-prev_boundary, label_cur_ele)
        
        # find dummy label where we will keep the diagonal (b1=b2) probabilities, later we will distribute among
        # rest of the classes after the softmax by dividing by (num_class - 2)
        dummy_label = 0
        while True:
            if dummy_label != label_cur_ele and dummy_label != label_next_ele:
                break
            else:
                dummy_label += 1
        
        for b1 in range(cur_ele, next_ele - 1):

            cur_boundary_len = b1 - prev_boundary
            strt_index = cumsum_feat[cur_ele - 1, label_cur_ele] if cur_ele > 0 else 0
            left_sum = (cumsum_feat[b1, label_cur_ele] - strt_index)
            right_sum = cumsum_feat[next_ele-1, label_next_ele] - cumsum_feat[b1+1:next_ele, label_next_ele] # mid_seg_len
            mid_sum = (cumsum_feat[b1+1:next_ele, :] - cumsum_feat[b1, :])  # mid_seg_len
            b2_prior = get_possion_prob_for_all_class(1, next_ele-b1)  # mid_seg_len x num_class
            
            mat_b1_b2_c_prob[b1-cur_ele, b1+1-cur_ele:next_ele-cur_ele] = (left_sum + right_sum[:,None] + mid_sum) \
                                                                            + b1_prior[b1-cur_ele] + b2_prior
            # when mid segment is absent but right and left is not the same
            # we assign the probability to a dummy label for now and then later 
            # re-distribute among other classes after the softmax
            if label_cur_ele != label_next_ele:
                rightsum_wo_midseg = cumsum_feat[next_ele-1, label_next_ele] - cumsum_feat[b1, label_next_ele]
                mat_b1_b2_c_prob[b1-cur_ele, b1-cur_ele, dummy_label] = left_sum + rightsum_wo_midseg + b1_prior[b1-cur_ele]
        
#         if vidid=='P39_cam02_P39_scrambledegg' and cur_ele==574:
#             import pdb
#             pdb.set_trace()
        # when mid segment is absent b1 can also be next_ele-1
        b1 = next_ele - 1
        if label_cur_ele != label_next_ele:
            left_sum = (cumsum_feat[b1, label_cur_ele] - strt_index)
            mat_b1_b2_c_prob[b1-cur_ele, b1-cur_ele, dummy_label] = left_sum + b1_prior[b1-cur_ele]
        else:
            # returns prob that the left class length >= seg len
            b1_prior_ = get_poisson_logcdf(next_ele - prev_boundary, label_cur_ele) 
            mat_b1_b2_c_prob[b1-cur_ele, b1-cur_ele, dummy_label] = left_sum + b1_prior_
        
        mat_b1_b2_c_prob[:, :, label_cur_ele] = LOW_VAL
        mat_b1_b2_c_prob[:, :, label_next_ele] = LOW_VAL
        mat_b1_b2_c_prob = torch.softmax(mat_b1_b2_c_prob.flatten(), dim=0).reshape((seg_len, seg_len, config.num_class))
        
        # re-distribute the dummy class probability among the left-over classes
        left_over_classes = config.num_class - 2 + (label_cur_ele==label_next_ele)
        for b1 in range(cur_ele, next_ele):
            assigned_prob = mat_b1_b2_c_prob[b1-cur_ele, b1-cur_ele, dummy_label]
            mat_b1_b2_c_prob[b1-cur_ele, b1-cur_ele, :] = assigned_prob/left_over_classes
            mat_b1_b2_c_prob[b1-cur_ele, b1-cur_ele, label_cur_ele] = 0
            mat_b1_b2_c_prob[b1-cur_ele, b1-cur_ele, label_next_ele] = 0
        
        marginal_b1 = torch.sum(mat_b1_b2_c_prob, axis=(1,2))
        mean_b1 = round(torch.sum(marginal_b1.squeeze() * torch.arange(cur_ele, next_ele, 1)).item())
        cumm_b1_prob = torch.cumsum(marginal_b1, dim=0)
        cumm_b1_c_prob = torch.cumsum(torch.sum(mat_b1_b2_c_prob, dim=1), dim=0)
        cumm_b2_c_prob = torch.cumsum(torch.sum(mat_b1_b2_c_prob, dim=0), dim=0)

        per_frame_weights[cur_ele, label_cur_ele] = 1.0
        per_frame_weights[cur_ele+1:next_ele, :] = cumm_b1_c_prob[:-1] - cumm_b2_c_prob[:-1]
        per_frame_weights[cur_ele+1:next_ele, label_cur_ele] = 1 - cumm_b1_prob[:-1]
        per_frame_weights[cur_ele+1:next_ele, label_next_ele] = 0
        remaining_probability = 1 - torch.sum(per_frame_weights[cur_ele+1:next_ele, :], dim=-1)
        # we use "+=" in the next line because left and right label might be the same
        # in that case using "=" would just overwrite the previous probability
        per_frame_weights[cur_ele+1:next_ele, label_next_ele] += remaining_probability
        
        expected_boundary = round(torch.sum(torch.sum(mat_b1_b2_c_prob, axis=(0,2)).squeeze() * \
                            torch.arange(cur_ele, next_ele, 1)).item())
        if not (label_cur_ele == label_next_ele and expected_boundary >= next_ele-2):
            prev_boundary = expected_boundary
        if expected_boundary == 0 and i > 0:
            print(f'Estimated boundary has become zero! for {vidid} and cur_ele, next_ele {cur_ele, next_ele}')
            import pdb
            pdb.set_trace()
        # boundary_error += (boundary_frames_dict[vidid + '.txt'][current_boundary] - mean_b1)**2
        # boundary_error += (boundary_frames_dict[vidid + '.txt'][current_boundary+1] - prev_boundary)**2
        # current_boundary += 2
        # prob_each_segment.append(mat_b1_b2_c_prob)
        
    posterior_prediction = torch.argmax(per_frame_weights, dim=1)
    correct = torch.sum(posterior_prediction == gt_labels[:num_frames]).item()
    
    return (vidid, per_frame_weights, [correct, num_frames, boundary_error]) #, prob_each_segment)

In [15]:
posterior_acc_correct, posterior_acc_total = 0, 0
posterior_boundary_total_mse = 0
results = []

# Step 2: Define callback function to collect the output in `results`
def collect_result(result):
    global posterior_acc_correct, posterior_acc_total, posterior_boundary_total_mse
    fname = os.path.join(config.output_dir, 'posterior_weights', result[0] + '.wt')
    torch.save(result[1], fname)
    correct, total, boundary_err = result[2]
    posterior_acc_correct += correct
    posterior_acc_total += total
    posterior_boundary_total_mse += boundary_err
    # print(f'Dumped in file {fname} at time {time()}')
    return

def calculate_element_probb(data_feat, data_count, video_ids, gt_labels): # loaded_vidid_selected_frames, boundaries_dict):
    global posterior_acc_correct, posterior_acc_total, posterior_boundary_total_mse
    pool = mp.Pool(20)
    for iter_num in range(len(data_count)):
        cur_vidid = video_ids[iter_num]
#         if cur_vidid!='P39_cam02_P39_scrambledegg':
#             continue
        cur_vid_count = data_count[iter_num]
        cur_vid_feat = data_feat[iter_num][:cur_vid_count].detach().cpu()
        cur_gt_labels = gt_labels[iter_num].detach().cpu()
        
        cur_video_select_frames = selected_frames_dict[cur_vidid + ".txt"]
        selected_frames_indices_and_labels = cur_video_select_frames
        selected_frames_indices = [ele[0] for ele in selected_frames_indices_and_labels]
        selected_frames_labels = [label_name_to_label_id_dict[ele[1]] for ele in selected_frames_indices_and_labels]
        with torch.no_grad():
            # Multi-processing
            pool.apply_async(prob_vals_per_segment,
                             args=(selected_frames_indices, cur_vid_feat, selected_frames_labels,
                                   cur_video_select_frames[1], cur_video_select_frames[2], cur_vidid, cur_gt_labels),
                             callback=collect_result)
#             results.append(prob_vals_per_segment(selected_frames_indices, cur_vid_feat, selected_frames_labels,
#                                    cur_video_select_frames[1], cur_video_select_frames[2], cur_vidid, cur_gt_labels))
    # Step 4: Close Pool and let all the processes complete
    pool.close()
    pool.join()  # postpones the execution of next line of code until all processes in the queue are done.
    return results

def perform_expectation(model, dataloader):
    global posterior_acc_correct, posterior_acc_total, posterior_boundary_total_mse
    posterior_acc_correct, posterior_acc_total, posterior_boundary_total_mse = 0, 0, 0
    model.eval()
    correct = 0.0
    total = 0.0
    curtime = time()
    print(f'Calculating expectation')

    for i, item in enumerate(dataloader):
        with torch.no_grad():
            item_0 = item[0].to(device) # features
            item_1 = item[1].to(device) # count
            item_2 = item[2].to(device) # gt frame-wise labels
            item_4 = item[4] # video-ids
            src_mask = torch.arange(item_2.shape[1], device=item_2.device)[None, :] < item_1[:, None]
            src_mask_mse = src_mask.unsqueeze(1).to(torch.float32).to(device)
            middle_pred, predictions = model(item_0, src_mask_mse)
            prob = torch.softmax(predictions[-1], dim=1)
            prob = prob.permute(0, 2, 1)
            
            calculate_element_probb(prob, item_1, item_4, item_2)
            if (i+1) % 10 == 0:
                print(f"iter {i+1} of Expectation completed in a total of {(time() - curtime)/60.: .1f} minutes")
    _ = model.train()
    print(f'Expectation step finished, '
          f'posterior frame-wise accuracy {100*posterior_acc_correct/posterior_acc_total: .2f}%, '
          f'boundary mse {(posterior_boundary_total_mse/num_boundary)**0.5: .2f}')
    return

In [16]:
set_seed()
model = MultiStageModel(num_stages=4, num_layers=10, num_f_maps=64, dim=2048, num_classes=48).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)

# Requires loaded_vidid_selected_frames, boundaries_dict
ce_criterion = nn.CrossEntropyLoss(ignore_index=-100)
mse_criterion = nn.MSELoss(reduction='none')

In [17]:
loaded_file=torch.load(os.path.join(config.output_dir, "ms-tcn-initial-30-epochs.wt"))
model.load_state_dict(loaded_file)
# loaded_file=torch.load('/mnt/ssd/all_users/dipika/ms_tcn/data/breakfast//results/mstcnnew-full-supervised-split1/ms-tcn-best-model.wt')
# model.load_state_dict(loaded_file)
_ = validate(model, testloader)

Calculating Validation Data Accuracy
Validation:: Probability Accuracy 55.835123916252165


In [18]:
# item = next(iter(trainloader))
    
# with torch.no_grad():
#     item_0 = item[0].to(device) # features
#     item_1 = item[1].to(device) # count
#     item_2 = item[2].to(device) # gt frame-wise labels
#     item_4 = item[4] # video-ids
#     src_mask = torch.arange(item_2.shape[1], device=item_2.device)[None, :] < item_1[:, None]
#     src_mask_mse = src_mask.unsqueeze(1).to(torch.float32).to(device)
#     middle_pred, predictions = model(item_0, src_mask_mse)
#     prob = torch.softmax(predictions[-1], dim=1)
#     prob = prob.permute(0, 2, 1)

#     res = calculate_element_probb(prob, item_1, item_4, item_2)

In [19]:
# idx = 2
# vidid = res[idx][0]
# mat = res[idx][1]
# mat.shape

In [20]:
# np.linspace(0, 5281, 4 + 1).astype(int)

In [21]:
# boundary_frames_dict[f'{vidid}.txt'], selected_frames_dict[f'{vidid}.txt'], weakly_labels[f'{vidid}.txt']

In [22]:
# fig = plt.figure(figsize=(20, 5))
# for i in range(48):
#     plt.plot(mat[:,i])
    
# for bd in boundary_frames_dict[f'{vidid}.txt']:
#     plt.plot([bd, bd], [0, 2])
    
# for bd in selected_frames_dict[f'{vidid}.txt']:
#     plt.plot([bd[0], bd[0]], [0, 2], '--')

In [23]:
# bd

In [24]:
# Calculating Expectation Step
# perform_expectation(model, trainloder_expectation)

In [25]:
def get_single_random(video_ids, len_frames, device):
    # Generate target for only timestamps. Do not generate pseudo labels at first 30 epochs.
    boundary_target_tensor = torch.ones((len(video_ids), len_frames), dtype=torch.long, device=device) * (-100)
    for iter_num, cur_vidid in enumerate(video_ids):
        selected_frames_indices_and_labels = selected_frames_dict[cur_vidid + ".txt"]
        selected_frames_indices = [ele[0] for ele in selected_frames_indices_and_labels]
        selected_frames_labels = [label_name_to_label_id_dict[ele[1]] for ele in selected_frames_indices_and_labels]

        frame_idx_tensor = torch.from_numpy(np.array(selected_frames_indices))
        frame_labels = torch.from_numpy(np.array(selected_frames_labels)).to(device)
        boundary_target_tensor[iter_num, frame_idx_tensor] = frame_labels

    return boundary_target_tensor

In [26]:
weakly_labels = pickle.load(open("data/breakfast_weaklysupervised_labels.pkl", "rb"))
prior_probs = pickle.load(open('data/breakfast_lengthmodel_multinomial_prior.pkl', 'rb'))

In [27]:
initialize_epoch = 30
expectation_cal_gap = 5
best_val_acc = 0
for epoch in range(30, 150):
    print("Starting Training")
    model.train()
    for i, item in enumerate(trainloader):
        item_0 = item[0].to(device)  # features
        item_1 = item[1].to(device)  # count
        item_2 = item[2].to(device)  # target
        weights = item[5].to(device)  # posterior weight
        src_mask = torch.arange(item_2.shape[1], device=item_2.device)[None, :] < item_1[:, None]
        src_mask_mse = src_mask.unsqueeze(1).to(torch.float32).to(device)
        optimizer.zero_grad()
        
        middle_pred, predictions = model(item_0, src_mask_mse)
        boundary_target_tensor = get_single_random(item[4], item_2.shape[1], item_2.device)
        
        loss = 0
        for p in predictions:
            if epoch <= initialize_epoch:
                loss += ce_criterion(p, boundary_target_tensor)
                loss += 0.15 * torch.mean(torch.clamp(mse_criterion(F.log_softmax(p[:, :, 1:], dim=1), 
                                                                    F.log_softmax(p.detach()[:, :, :-1], dim=1)), min=0,
                                            max=16) * src_mask_mse[:, :, 1:])
            else:
                prob = torch.softmax(p, dim=1)
                prob = prob.permute(0, 2, 1)
                total_count = torch.sum(src_mask)
                weighted_loss_sum = -torch.sum(torch.sum(torch.log(prob + 1e-8) * weights, dim=-1) * src_mask)
                loss += weighted_loss_sum/total_count

        loss.backward()
        optimizer.step()
        if (i+1)%20 == 0:
            print(f'Epoch {epoch+1}: Iteration {i+1} with loss {loss.item()}')

    if (epoch >= initialize_epoch) and ((epoch % (3 * expectation_cal_gap)) == 0):
        torch.save(model.state_dict(), config.output_dir + f"ms-tcn-initial-{epoch}-epochs.wt")

    if epoch >= initialize_epoch and (epoch % expectation_cal_gap == 0):
        perform_expectation(model, trainloder_expectation)
    
    print(f'Epoch {epoch+1} finished, starting validation')
    val_acc, best_val_acc = validate(model, testloader, best_val_acc)


Starting Training
Epoch 31: Iteration 20 with loss 1.2421857118606567
Epoch 31: Iteration 40 with loss 1.193537712097168
Epoch 31: Iteration 60 with loss 0.6960170269012451
Epoch 31: Iteration 80 with loss 1.0945032835006714
Epoch 31: Iteration 100 with loss 0.7095000147819519
Epoch 31: Iteration 120 with loss 0.6708444952964783
Epoch 31: Iteration 140 with loss 2.113300323486328
Epoch 31: Iteration 160 with loss 1.212669849395752
Epoch 31: Iteration 180 with loss 0.5667948126792908
Calculating expectation
iter 10 of Expectation completed in a total of  6.8 minutes
iter 20 of Expectation completed in a total of  13.7 minutes
iter 30 of Expectation completed in a total of  18.8 minutes
iter 40 of Expectation completed in a total of  24.1 minutes
iter 50 of Expectation completed in a total of  31.1 minutes
iter 60 of Expectation completed in a total of  34.3 minutes
iter 70 of Expectation completed in a total of  38.0 minutes
Expectation step finished, posterior frame-wise accuracy  77.1

Epoch 42: Iteration 120 with loss 1.8190981149673462
Epoch 42: Iteration 140 with loss 1.5592681169509888
Epoch 42: Iteration 160 with loss 1.3646129369735718
Epoch 42: Iteration 180 with loss 0.9108996391296387
Epoch 42 finished, starting validation
Calculating Validation Data Accuracy
Validation:: Probability Accuracy 57.21456525438149
Starting Training
Epoch 43: Iteration 20 with loss 1.6122702360153198
Epoch 43: Iteration 40 with loss 1.283385992050171
Epoch 43: Iteration 60 with loss 2.1941707134246826
Epoch 43: Iteration 80 with loss 1.3589376211166382
Epoch 43: Iteration 100 with loss 1.0183168649673462
Epoch 43: Iteration 120 with loss 1.3828750848770142
Epoch 43: Iteration 140 with loss 2.1910760402679443
Epoch 43: Iteration 160 with loss 0.6894497275352478
Epoch 43: Iteration 180 with loss 0.6364771723747253
Epoch 43 finished, starting validation
Calculating Validation Data Accuracy
Validation:: Probability Accuracy 59.613550656678974
Starting Training
Epoch 44: Iteration 20 

Epoch 54: Iteration 20 with loss 0.9988481998443604
Epoch 54: Iteration 40 with loss 0.41554734110832214
Epoch 54: Iteration 60 with loss 0.5077337026596069
Epoch 54: Iteration 80 with loss 1.5505235195159912
Epoch 54: Iteration 100 with loss 1.484798550605774
Epoch 54: Iteration 120 with loss 1.523590326309204
Epoch 54: Iteration 140 with loss 1.6119850873947144
Epoch 54: Iteration 160 with loss 0.7083007097244263
Epoch 54: Iteration 180 with loss 1.0229507684707642
Epoch 54 finished, starting validation
Calculating Validation Data Accuracy
Validation:: Probability Accuracy 59.51482127806071
Starting Training
Epoch 55: Iteration 20 with loss 2.3743484020233154
Epoch 55: Iteration 40 with loss 0.5011219382286072
Epoch 55: Iteration 60 with loss 1.2677690982818604
Epoch 55: Iteration 80 with loss 0.40027517080307007
Epoch 55: Iteration 100 with loss 0.4590405821800232
Epoch 55: Iteration 120 with loss 0.9679972529411316
Epoch 55: Iteration 140 with loss 0.7717399001121521
Epoch 55: Iter

Epoch 65: Iteration 140 with loss 0.4508247673511505
Epoch 65: Iteration 160 with loss 0.8115043640136719
Epoch 65: Iteration 180 with loss 0.42172250151634216
Epoch 65 finished, starting validation
Calculating Validation Data Accuracy
Validation:: Probability Accuracy 62.31940833600437
Starting Training
Epoch 66: Iteration 20 with loss 0.6926485300064087
Epoch 66: Iteration 40 with loss 0.3721248209476471
Epoch 66: Iteration 60 with loss 0.458239883184433
Epoch 66: Iteration 80 with loss 0.721886396408081
Epoch 66: Iteration 100 with loss 0.28227171301841736
Epoch 66: Iteration 120 with loss 0.2464321106672287
Epoch 66: Iteration 140 with loss 0.6153993606567383
Epoch 66: Iteration 160 with loss 0.4452989399433136
Epoch 66: Iteration 180 with loss 0.3143503665924072
Calculating expectation
iter 10 of Expectation completed in a total of  4.0 minutes
iter 20 of Expectation completed in a total of  7.9 minutes
iter 30 of Expectation completed in a total of  11.5 minutes
iter 40 of Expect

iter 30 of Expectation completed in a total of  9.9 minutes


Process ForkPoolWorker-14343:
Process ForkPoolWorker-14335:
Process ForkPoolWorker-14337:
Process ForkPoolWorker-14334:
Process ForkPoolWorker-14346:
Process ForkPoolWorker-14329:
Process ForkPoolWorker-14347:
Process ForkPoolWorker-14333:
Process ForkPoolWorker-14345:
Process ForkPoolWorker-14348:
Process ForkPoolWorker-14340:
Process ForkPoolWorker-14330:
Process ForkPoolWorker-14342:
Traceback (most recent call last):
Process ForkPoolWorker-14336:
Process ForkPoolWorker-14338:
  File "/home/dipika16/anaconda3/envs/video_r/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/dipika16/anaconda3/envs/video_r/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/dipika16/anaconda3/envs/video_r/lib/python3.7/multiprocessing/process.py", line 29

  File "/home/dipika16/anaconda3/envs/video_r/lib/python3.7/multiprocessing/queues.py", line 351, in get
    with self._rlock:
  File "/home/dipika16/anaconda3/envs/video_r/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/dipika16/anaconda3/envs/video_r/lib/python3.7/multiprocessing/synchronize.py", line 95, in __enter__
    return self._semlock.__enter__()
  File "/home/dipika16/anaconda3/envs/video_r/lib/python3.7/multiprocessing/queues.py", line 351, in get
    with self._rlock:
KeyboardInterrupt
  File "/home/dipika16/anaconda3/envs/video_r/lib/python3.7/multiprocessing/synchronize.py", line 95, in __enter__
    return self._semlock.__enter__()
  File "/home/dipika16/anaconda3/envs/video_r/lib/python3.7/multiprocessing/pool.py", line 110, in worker
    task = get()
  File "/home/dipika16/anaconda3/envs/video_r/lib/python3.7/multiprocessing/synchronize.py", line 95, in __enter__
    return self._semlock.__enter__(

KeyboardInterrupt: 

In [22]:
print(f"Validation:: Epoch {epoch}, Probability Accuracy {val_acc}")

Validation:: Epoch 105, Probability Accuracy 61.02425300046298


In [23]:
print(f"Validation:: Epoch {epoch}, Probability Accuracy {best_val_acc}")

Validation:: Epoch 105, Probability Accuracy 63.70656599831428


In [24]:
torch.save(model.state_dict(),
"/mnt/ssd/all_users/dipika/ms_tcn/data/breakfast//results/em-maximize-mstcn-speed/final-em-maximized.wt")

In [24]:
config.output_dir

'/mnt/ssd/all_users/dipika/ms_tcn/data/breakfast//results/em-maximize-mstcn-split1/'

In [34]:
model.load_state_dict(torch.load(config.output_dir + "ms-tcn-emmax-best-model.wt"))
# model.load_state_dict(torch.load(config.output_dir + "ms-tcn-initial-15-epochs.wt"))

<All keys matched successfully>