#attention

In [2]:
# -*- coding: utf-8 -*-
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import math

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
import numpy as np

class SelfAttention(nn.Module):
    def __init__(self, input_size=1024, output_size=1024, block_size=60):
        """ The basic Attention 'cell' containing the learnable parameters of Q, K and V.

        :param int input_size: Feature input size of Q, K, V.
        :param int output_size: Feature -hidden- size of Q, K, V.
        :param int block_size: The size of the blocks utilized inside the attention matrix.
        """
        super(SelfAttention, self).__init__()

        self.input_size = input_size
        self.output_size = output_size
        self.block_size = block_size
        self.Wk = nn.Linear(in_features=input_size, out_features=output_size, bias=False)
        self.Wq = nn.Linear(in_features=input_size, out_features=output_size, bias=False)
        self.Wv = nn.Linear(in_features=input_size, out_features=output_size, bias=False)
        self.out = nn.Linear(in_features=output_size+2, out_features=input_size, bias=False)

        self.softmax = nn.Softmax(dim=-1)

    @staticmethod
    def get_entropy(logits):
        """ Compute the entropy for each row of the attention matrix.

        :param torch.Tensor logits: The raw (non-normalized) attention values with shape [T, T].
        :return: A torch.Tensor containing the normalized entropy of each row of the attention matrix, with shape [T].
        """
        _entropy = F.softmax(logits, dim=-1) * F.log_softmax(logits, dim=-1)
        _entropy = -1.0 * _entropy.sum(-1)

        # https://stats.stackexchange.com/a/207093 Maximum value of entropy is log(k), where k the # of used categories.
        # Here k is when all the values of a row is different of each other (i.e., k = # of video frames)
        return _entropy / np.log(logits.shape[0])

    def forward(self, x):
        """ Compute the weighted frame features, through the Block diagonal sparse attention matrix and the estimates of
        the frames attentive uniqueness and the diversity.

        :param torch.Tensor x: Frame features with shape [T, input_size].
        :return: A tuple of:
                    y: The computed weighted features, with shape [T, input_size].
                    att_win : The Block diagonal sparse attention matrix, with shape [T, T].
        """
        # Compute the pairwise dissimilarity of each frame, on the initial feature space (GoogleNet features)
        x_unit = F.normalize(x, p=2, dim=1)
        similarity = x_unit @ x_unit.t()
        diversity = 1 - similarity

        K = self.Wk(x)
        Q = self.Wq(x)
        V = self.Wv(x)

        energies = torch.matmul(Q, K.transpose(1, 0))
        att_weights = self.softmax(energies)

        # Entropy is a measure of uncertainty: Higher value means less information.
        entropy = self.get_entropy(logits=energies)
        entropy = F.normalize(entropy, p=1, dim=-1)

        # Compute the mask to form the Block diagonal sparse attention matrix
        D = self.block_size
        num_blocks = math.ceil(energies.shape[0] / D)
        keepingMask = torch.ones(num_blocks, D, D, device=att_weights.device)
        keepingMask = torch.block_diag(*keepingMask)[:att_weights.shape[0], :att_weights.shape[0]]
        zeroingMask = (1 - keepingMask)
        att_win = att_weights * keepingMask

        # Pick those frames that are "invisible" to a frame, aka outside the block (mask)
        attn_remainder = att_weights * zeroingMask
        div_remainder = diversity * zeroingMask

        # Compute non-local dependencies based on the diversity of those frames
        dep_factor = (div_remainder * attn_remainder).sum(-1).div(div_remainder.sum(-1))
        dep_factor = dep_factor.unsqueeze(0).expand(dep_factor.shape[0], -1)
        masked_dep_factor = dep_factor * keepingMask
        att_win += masked_dep_factor

        y = torch.matmul(att_win, V)
        characteristics = (entropy, dep_factor[0, :])
        characteristics = torch.stack(characteristics).detach()
        outputs = torch.cat(tensors=(y, characteristics.t()), dim=-1)

        y = self.out(outputs)
        return y, att_win.clone()


In [4]:
if __name__ == '__main__':
    pass
    # Uncomment for a quick proof of concept
    model = SelfAttention(input_size=256, output_size=128, block_size=30)
    _input = torch.randn(500, 256)  # [seq_len, hidden_size]
    output, weights = model(_input)
    print(f"Output shape: {output.shape}\tattention shape: {weights.shape}")
            

Output shape: torch.Size([500, 256])	attention shape: torch.Size([500, 500])


#summarizer

In [5]:
class CA_SUM(nn.Module):
    def __init__(self, input_size=1024, output_size=1024, block_size=60):
        """ Class wrapping the CA-SUM model; its key modules and parameters.
        
        :param int input_size: The expected input feature size.
        :param int output_size: The produced output feature size.
        :param int block_size: The size of the blocks utilized inside the attention matrix.
        """
        super(CA_SUM, self).__init__()
        self.attention = SelfAttention(input_size=input_size, output_size=output_size, block_size=block_size)
        self.linear_1 = nn.Linear(in_features=input_size, out_features=input_size)
        self.linear_2 = nn.Linear(in_features=self.linear_1.out_features, out_features=1)
        self.drop = nn.Dropout(p=0.5)
        self.norm_y = nn.LayerNorm(normalized_shape=input_size, eps=1e-6)
        self.norm_linear = nn.LayerNorm(normalized_shape=self.linear_1.out_features, eps=1e-6)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
    def forward(self, frame_features):
        """ Produce frame-level importance scores from the frame features, using the CA-SUM model.
        :param torch.Tensor frame_features: Tensor of shape [T, input_size] containing the frame features produced by 
        using the pool5 layer of GoogleNet.
        :return: A tuple of:
            y: Tensor with shape [1, T] containing the frames importance scores in [0, 1].
            attn_weights: Tensor with shape [T, T] containing the attention weights.
        """
        residual = frame_features
        weighted_value, attn_weights = self.attention(frame_features)
        y = residual + weighted_value
        y = self.drop(y)
        y = self.norm_y(y)

        # 2-layer NN (Regressor Network)
        y = self.linear_1(y)
        y = self.relu(y)
        y = self.drop(y)
        y = self.norm_linear(y)
        y = self.linear_2(y)
        y = self.sigmoid(y)
        y = y.view(1, -1)
        return y, attn_weights
    

In [6]:
if __name__ == '__main__':
    pass
    # Uncomment for a quick proof of concept
    model = CA_SUM(input_size=256, output_size=128, block_size=30)
    _input = torch.randn(500, 256)  # [seq_len, hidden_size]
    output, weights = model(_input)
    print(f"Output shape: {output.shape}\tattention shape: {weights.shape}")
    

Output shape: torch.Size([1, 500])	attention shape: torch.Size([500, 500])


#evaluation_metrics

In [97]:
# -*- coding: utf-8 -*-
import numpy as np
import csv
import sys
import os
from os import listdir
from scipy.stats import spearmanr, kendalltau, rankdata
from collections import Counter

In [98]:
def evaluate_summary(predicted_summary, user_summary, eval_method):
    """ Compare the predicted summary with the user defined one(s).
    :param np.ndarray predicted_summary: The generated summary from our model.
    :param np.ndarray user_summary: The user defined ground truth summaries (or summary).
    :param str eval_method: The proposed evaluation method; either 'max' (SumMe) or 'avg' (TVSum).
    :return: The reduced fscore based on the eval_method
    """
    max_len = max(len(predicted_summary), user_summary.shape[1])
    S = np.zeros(max_len, dtype=int)
    G = np.zeros(max_len, dtype=int)
    S[:len(predicted_summary)] = predicted_summary
    f_scores = []
    for user in range(user_summary.shape[0]):
        G[:user_summary.shape[1]] = user_summary[user]
        overlapped = S & G
        
        # Compute precision, recall, f-score
        precision = sum(overlapped)/sum(S)
        recall = sum(overlapped)/sum(G)
        if precision+recall == 0:
            f_scores.append(0)
        else:
            f_scores.append(2 * precision * recall * 100 / (precision + recall))
    if eval_method == 'max':
        return max(f_scores)
    else:
        return sum(f_scores)/len(f_scores)

In [9]:
def get_corr_coeff(pred_imp_scores, video, dataset):
    """ Read users annotations (frame-level importance scores) for the `video` of the `dataset`* in use. Compare the
    multiple user annotations for the test video with the predicted frame-level importance scores of our CA-SUM for the
    same video, by computing the Spearman's rho and Kendall's tau correlation coefficients. It must be noted, that the
    calculated values are the average correlation coefficients over the multiple annotators.
    * Applicable only for the TVSum dataset.
    :param list[float] pred_imp_scores: The predicted frame-level importance scores from our CA-SUM model.
    :param str video: The name of the test video being inferenced.
    :param str dataset: The dataset in use.
    :return: A tuple containing the video-level Spearman's rho and Kendall's tau correlation coefficients.
    """

    # Read the user annotations from the file
    annot_path = f"C:/Users/abhis/OneDrive/Desktop/video summarization/CA-SUM-main/data/{dataset}/ydata-anno.tsv"
    with open(annot_path) as annot_file:
        user = int(video.split("_")[-1])
        annot = list(csv.reader(annot_file, delimiter="\t"))
        annotation_length = list(Counter(np.array(annot)[:, 0]).values())[user-1]
        init = (user - 1) * annotation_length
        till = user * annotation_length
        user_scores = []
        for row in annot[init:till]:
            curr_user_score = row[2].split(",")
            curr_user_score = np.array([float(num) for num in curr_user_score])
            curr_user_score = curr_user_score / curr_user_score.max(initial=-1)  # Normalize scores between 0 and 1
            curr_user_score = curr_user_score[::15]
            user_scores.append(curr_user_score)
    pred_imp_scores = np.array(pred_imp_scores)
    rho_coeff, tau_coeff = [], []
    for annot in range(len(user_scores)):
        true_user_score = user_scores[annot]
        curr_rho_coeff, _ = spearmanr(pred_imp_scores, true_user_score)
        curr_tau_coeff, _ = kendalltau(rankdata(pred_imp_scores), rankdata(true_user_score))
        rho_coeff.append(curr_rho_coeff)
        tau_coeff.append(curr_tau_coeff)
    rho_coeff = np.array(rho_coeff).mean()  # mean over all user annotations
    tau_coeff = np.array(tau_coeff).mean()  # mean over all user annotations
    return rho_coeff, tau_coeff

#compute_fscores

In [10]:

args={"path":r"C:\Users\abhis\OneDrive\Desktop\video summarization\CA-SUM-main\summaries\exp1\reg0.6\SumMe\results\split0",
       "dataset":"SumMe",
       "eval":"max"
}

path = args["path"]
dataset = args["dataset"]
eval_method = args["eval"]

results = [f for f in path if f.endswith(".json")]
results.sort(key=lambda video: int(video[6:-5]))
dataset_path = 'C:/Users/abhis/OneDrive/Desktop/video summarization/CA-SUM-main/data/' + dataset + '/eccv16_dataset_' + dataset.lower() + '_google_pool5.h5'

In [11]:
f_score_epochs = []
for epoch in results:                       # for each epoch ...
    all_scores = []
    with open(path + '/' + epoch) as f:     # read the json file ...
        data = json.loads(f.read())
        keys = list(data.keys())
        for video_name in keys:                    # for each video inside that json file ...
            scores = np.asarray(data[video_name])  # read the importance scores from frames
            all_scores.append(scores)
    all_user_summary, all_shot_bound, all_nframes, all_positions = [], [], [], []
    with h5py.File(dataset_path, 'r') as hdf:
        for video_name in keys:
            user_summary = np.array(hdf.get(video_name + '/user_summary'))
            sb = np.array(hdf.get(video_name + '/change_points'))
            n_frames = np.array(hdf.get(video_name + '/n_frames'))
            positions = np.array(hdf.get(video_name + '/picks'))
            all_user_summary.append(user_summary)
            all_shot_bound.append(sb)
            all_nframes.append(n_frames)
            all_positions.append(positions)
    all_summaries = generate_summary(all_shot_bound, all_scores, all_nframes, all_positions)
    all_f_scores = []
    # compare the resulting summary with the ground truth one, for each video
    for video_index in range(len(all_summaries)):
        summary = all_summaries[video_index]
        user_summary = all_user_summary[video_index]
        f_score = evaluate_summary(summary, user_summary, eval_method)
        all_f_scores.append(f_score)
    f_score_epochs.append(np.mean(all_f_scores))
    num_epoch = epoch.split(".")[0][6:]
    print(f"[epoch {num_epoch}] f_score: {np.mean(all_f_scores)}")

In [12]:
with open(path + '/f_scores.txt', 'w') as outfile:
    for f_score in f_score_epochs:
        outfile.write('%s\n' % f_score)

#choose_best_model

In [13]:
# -*- coding: utf-8 -*-
import numpy as np
import csv
import json
import sys
from scipy.stats import spearmanr, kendalltau, rankdata

In [117]:
exp_num =sys.argv[1]   #changes
# exp_num =1
dataset = args["dataset"]

In [118]:
base_path = "C:/Users/abhis/OneDrive/Desktop/video summarization/CA-SUM-main/summaries"
eligible_datasets = ["TVSum"]

In [119]:
def get_corr_coeff(epoch, split_id, reg_factor):
    """ Read users annotations (frame-level importance scores) for each video in the dataset*. Compare the multiple
    user annotations for each test video with the predicted frame-level importance scores of our CA-SUM for the same
    video, by computing the Spearman's rho and Kendall's tau correlation coefficients. It must be noted, that for each
    test video the calculated values are the average correlation coefficients over the multiple annotators. The final
    split-level values are the average over the entire test set.
    * Applicable only for the TVSum dataset.
    :param int epoch: The chosen training epoch for the given split and regularization factor.
    :param int split_id: The id of the current evaluated split.
    :param float reg_factor: The value of the current evaluated length regularization factor.
    :return: A tuple containing the split-level Spearman's rho and Kendall's tau correlation coefficients.
    """
    if dataset not in eligible_datasets:
        print(f"Correlation coefficients are not supported by {dataset} dataset.")
        return None, None

    # Read the user annotations from the file
    annot_path = f"C:/Users/abhis/OneDrive/Desktop/video summarization/CA-SUM-main/data/{dataset}/ydata-anno.tsv"
    with open(annot_path) as annot_file:
        annot = csv.reader(annot_file, delimiter="\t")
        names, user_scores = [], {}
        for row in annot:
            str_user = row[0]
            curr_user_score = row[2].split(",")
            curr_user_score = np.array([float(num) for num in curr_user_score])
            curr_user_score = curr_user_score / curr_user_score.max(initial=-1)  # Normalize scores between 0 and 1
            curr_user_score = curr_user_score[::15]
            if str_user not in names:
                names.append(str_user)
                user = f"video_{len(names)}"
                user_scores[user] = [curr_user_score]
            else:
                user_scores[user].append(curr_user_score)

    # Read each score and compared it
    scores_path = f"{base_path}/exp{exp_num}/reg{reg_factor}/{dataset}/results/split{split_id}/{dataset}_{epoch-1}.json"
    with open(scores_path) as score_file:       # Read the importance scores affiliated with the selected epoch
        scores = json.loads(score_file.read())
        keys = list(scores.keys())
    rho_coeff_video, tau_coeff_video = [], []
    for video in keys:
        pred_imp_score = np.array(scores[video])
        curr_user_scores = user_scores[video]
        rho_coeff, tau_coeff = [], []
        for annot in range(len(curr_user_scores)):
            true_user_score = curr_user_scores[annot]
            curr_rho_coeff, _ = spearmanr(pred_imp_score, true_user_score)
            curr_tau_coeff, _ = kendalltau(rankdata(pred_imp_score), rankdata(true_user_score))
            rho_coeff.append(curr_rho_coeff)
            tau_coeff.append(curr_tau_coeff)
        rho_coeff = np.array(rho_coeff).mean()  # mean over all user annotations
        rho_coeff_video.append(rho_coeff)
        tau_coeff = np.array(tau_coeff).mean()  # mean over all user annotations
        tau_coeff_video.append(tau_coeff)
    rho_coeff_split = np.array(rho_coeff_video).mean()  # mean over all videos
    tau_coeff_split = np.array(tau_coeff_video).mean()  # mean over all videos
    return rho_coeff_split, tau_coeff_split

In [120]:
def get_improvement_score(epoch, split_id, reg_factor):
    """ Using the estimated frame-level importance scores from an untrained model, calculate the improvement (eq. 2-3)
    of a  trained model for the chosen epoch, on a given split and regularization factor.
    :param int epoch: The chosen training epoch for the given split and regularization factor.
    :param int split_id: The id of the current evaluated split.
    :param float reg_factor: The value of the current evaluated length regularization factor
    :return: The relative improvement of a trained model over an untrained (random) one.
    """
    untr_path = f"{base_path}/exp{exp_num}/reg{reg_factor}/{dataset}/results/split{split_id}/{dataset}_-1.json"
    curr_path = f"{base_path}/exp{exp_num}/reg{reg_factor}/{dataset}/results/split{split_id}/{dataset}_{epoch}.json"
    with open(curr_path) as curr_file, open(untr_path) as untr_file:
        untr_data = json.loads(untr_file.read())
        curr_data = json.loads(curr_file.read())
        keys = list(curr_data.keys())
        mean_untr_scores, mean_curr_scores = [], []
        for video_name in keys:                              # For a video inside that split get the ...
            untr_scores = np.asarray(untr_data[video_name])  # Untrained model computed importance scores
            curr_scores = np.asarray(curr_data[video_name])  # trained model computed importance scores
            mean_untr_scores.append(np.mean(untr_scores))
            mean_curr_scores.append(np.mean(curr_scores))
    mean_untr_scores = np.array(mean_untr_scores)
    mean_curr_scores = np.array(mean_curr_scores)

    # Measure how much did we improve a random model, relatively to moving towards sigma (minimum loss)
    improvement = abs(mean_curr_scores.mean() - mean_untr_scores.mean())
    result = (improvement / abs(reg_factor - mean_untr_scores.mean()))
    return result

In [122]:
def train_logs(log_file, method="argmin"):
    """ Choose and return the epoch based only on the training loss. Through the `method` argument you can get the epoch
    associated with the minimum training loss (argmin) or the last epoch of the training process (last).
    :param str log_file: Path to the saved csv file containing the loss information.
    :param str method: The chosen criterion for the epoch (model) picking process.
    :return: The epoch of the best model, according to the chosen criterion.
    """
    losses = {}
    losses_names = []

    # Read the csv file with the training losses
    with open(log_file) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        for (i, row) in enumerate(csv_reader):
            if i == 0:
                for col in range(len(row)):
                    losses[row[col]] = []
                    losses_names.append(row[col])
            else:
                for col in range(len(row)):
                    losses[losses_names[col]].append(float(row[col]))

    # criterion: The length regularization of the generated summary (400 epochs, after which overfitting problems occur)
    loss = losses["loss_epoch"]
    loss = loss[:400]          
    START_EPOCH = 20                      # If unstable training is observed at the start
    if method == "last":
        epoch = len(loss) - 1
    elif method == "argmin":
        epoch = np.array(loss[START_EPOCH:]).argmin() + START_EPOCH
    else:
        raise ValueError(f"Method {method} is not currently supported. Only `last` and `argmin` are available.")
    return epoch

In [123]:
# Choose the model associated with the min training loss for each regularization factor and get its improvement score
all_improvements, all_epochs = [], []
sigmas = [i/10 for i in range(5, 10)]  # The valid values for the length regularization factor
for sigma in sigmas:
    split_improvements, split_epochs = np.zeros(5, dtype=float), np.zeros(5, dtype=int)
    for split in range(0, 5):
        log = f"{base_path}/exp{exp_num}/reg{sigma}/{dataset}/logs/split{split}/scalers.csv"
        selected_epoch = train_logs(log, method="argmin")  # w/o +1. (only needed to pick the f-score value)
        split_improvements[split] = get_improvement_score(epoch=selected_epoch, split_id=split, reg_factor=sigma)
        split_epochs[split] = selected_epoch
    all_improvements.append(split_improvements)
    all_epochs.append(split_epochs)
    

OSError: [Errno 22] Invalid argument: 'C:/Users/abhis/OneDrive/Desktop/video summarization/CA-SUM-main/summaries/exp1/reg0.5/C:\\Users\\abhis\\OneDrive\\Desktop\\video summarization\\CA-SUM-main\\data\\SumMe\\eccv16_dataset_summe_google_pool5.h5/logs/split0/scalers.csv'

In [None]:
all_improvements = np.stack(all_improvements)
all_epochs = np.stack(all_epochs)

ValueError: need at least one array to stack

In [None]:
all_improvements = np.where(all_improvements > 1.5, 0, all_improvements)
print(all_improvements)
improvement_per_spit = all_improvements.max(axis=0, initial=-1)
chosen_indices = all_improvements.argmax(axis=0)
sigma_per_split = np.array(sigmas)[chosen_indices]

TypeError: '>' not supported between instances of 'list' and 'float'

In [None]:
all_fscores, all_rho_coeff, all_tau_coeff = np.zeros(5, dtype=float), np.zeros(5, dtype=float), np.zeros(5, dtype=float)
for split in range(0, 5):
    curr_sigma = sigma_per_split[split]
    curr_epoch = all_epochs[chosen_indices[split], split] + 1  # because of the evaluation on the untrained model

    # Read the fscore values
    results_file = f"{base_path}/exp{exp_num}/reg{curr_sigma}/{dataset}/results/split{split}/f_scores.txt"
    with open(results_file) as f:
        f_scores = f.read().strip()  # read F-Scores
        if "\n" in f_scores:
            f_scores = f_scores.splitlines()
        else:
            f_scores = json.loads(f_scores)
    f_scores = np.array([float(f_score) for f_score in f_scores])
    curr_fscore = np.round(f_scores[curr_epoch], 2)
    all_fscores[split] = curr_fscore
    print(f"[Split: {split}] Fscore: {curr_fscore:.2f}", end="")

    # Compute correlation coefficients
    if dataset in eligible_datasets:    
        rho, tau = get_corr_coeff(epoch=curr_epoch, split_id=split, reg_factor=curr_sigma)
        all_rho_coeff[split] = rho
        all_tau_coeff[split] = tau
        print(f"  Spearman's \u03C1: {rho:.3f}  Kendall's \u03C4: {tau:.3f}", end="")
    print(f" [\u03C3={curr_sigma}, epoch: {curr_epoch}]")

NameError: name 'sigma_per_split' is not defined

In [None]:
avg_fscore = np.round(np.mean(all_fscores), 2)
if dataset in eligible_datasets:
    avg_rho, avg_tau = np.round(np.mean(all_rho_coeff), 2), np.round(np.mean(all_tau_coeff), 2)
    print("====================================================================================")
    print(f"Avg values :=> F1: {avg_fscore}  Spearman's \u03C1: {avg_rho:.3f}  Kendall's \u03C4: {avg_tau:.3f}")
else:
    print(f"Avg values :=> F1: {avg_fscore:.2f}")

NameError: name 'all_fscores' is not defined

#inference

In [20]:
from os import listdir
from os.path import isfile, join
import h5py
import json
import argparse

In [22]:
eligible_datasets = ["TVSum"]

In [23]:
def str2bool(v):
    """ Transcode string to boolean.
    :param str v: String to be transcoded.
    :return: The boolean transcoding of the string.
    """
    if v.lower() in ('yes', 'true', 't', 'y', '1'):
        return True
    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
        return False
    else:
        raise argparse.ArgumentTypeError('Boolean value expected.')

In [113]:
def inference(model, data_path, keys, eval_method):
    """ Used to inference a pretrained `model` on the `keys` test videos, based on the `eval_method` criterion; using
        the dataset located in `data_path'.
        :param nn.Module model: Pretrained model to be inferenced.
        :param str data_path: File path for the dataset in use.
        :param list keys: Containing the test video keys of the used data split.
        :param str eval_method: The evaluation method in use {SumMe: max, TVSum: avg}.
    """
    model.eval()
    video_fscores, video_rho, video_tau = [], [], []
    for video in keys:
        with h5py.File(data_path, "r") as hdf:
            # Input features for inference
            frame_features = torch.Tensor(np.array(hdf[f"{video}/features"])).view(-1, 1024)
            frame_features = frame_features.to(model.linear_1.weight.device)

            # Input need for evaluation
            user_summary = np.array(hdf[f"{video}/user_summary"])
            sb = np.array(hdf[f"{video}/change_points"])
            n_frames = np.array(hdf[f"{video}/n_frames"])
            positions = np.array(hdf[f"{video}/picks"])
        with torch.no_grad():
            scores, _ = model(frame_features)  # [1, seq_len]
            scores = scores.squeeze(0).cpu().numpy().tolist()
            summary = generate_summary([sb], [scores], [n_frames], [positions])[0]
            f_score = evaluate_summary(summary, user_summary, eval_method)
            video_fscores.append(f_score)
            if dataset in eligible_datasets and corr_coef:
                rho, tau = get_corr_coeff(pred_imp_scores=scores, video=video, dataset=dataset)
                video_rho.append(rho)
                video_tau.append(tau)
    print(f"CA-SUM model trained for split: {split_id} achieved an F-score: {np.mean(video_fscores):.2f}%", end="")
    if dataset not in eligible_datasets or not corr_coef:
        print("\n", end="")
    else:
        print(f", a Spearman's \u03C1: {np.mean(video_rho):.3f}  and a Kendall's \u03C4: {np.mean(video_tau):.3f}")

In [115]:
if __name__ == "__main__":
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    # arguments to run the script
    parser = argparse.ArgumentParser()
    parser.add_argument("--dataset", type=str, default='SumMe', help="Dataset to be used. Supported: {SumMe, TVSum}")
    parser.add_argument("--corr_coef", type=str2bool, default=False, help="Calculate or not, the correlation coefficients")
    args = vars(parser.parse_args())
    dataset = args["dataset"]
    corr_coef = args["corr_coef"]
    eval_metric = 'avg' if dataset.lower() == 'tvsum' else 'max'
    for split_id in range(5):
        # # Model data
        model_path = f"C:/Users/abhis/OneDrive/Desktop/video summarization/CA-SUM-main/inference/pretrained_models/{dataset}/split{split_id}"
        model_file = [f for f in listdir(model_path) if isfile(join(model_path, f))]

        # Read current split
        split_file = f"C:/Users/abhis/OneDrive/Desktop/video summarization/CA-SUM-main/data/splits/{dataset.lower()}_splits.json"
        with open(split_file) as f:
            data = json.loads(f.read())
            test_keys = data[split_id]["test_keys"]

        # Dataset path
        dataset_path = f"C:/Users/abhis/OneDrive/Desktop/video summarization/CA-SUM-main/data/{dataset}/eccv16_dataset_{dataset.lower()}_google_pool5.h5"

        # Create model with paper reported configuration
        trained_model = CA_SUM(input_size=1024, output_size=1024, block_size=60).to(device)
        trained_model.load_state_dict(torch.load(join(model_path, model_file[-1])))
        inference(trained_model, dataset_path, test_keys, eval_metric)
        

usage: ipykernel_launcher.py [-h] [--dataset DATASET] [--corr_coef CORR_COEF]
ipykernel_launcher.py: error: unrecognized arguments: --f=c:\Users\abhis\AppData\Roaming\jupyter\runtime\kernel-v2-82521E8ygEXgZCeV.json


SystemExit: 2

#exportTensorFlowLog

In [26]:
# -*- coding: utf-8 -*-
# author: Anders Krogh Mortensen (GitHub: @anderskm)
# link: https://github.com/anderskm/exportTensorFlowLog/blob/master/exportTensorFlowLog.py
import time
import csv
import sys
import os
try:
    import collections.abc as collections
except ImportError:
    import collections

In [27]:
# Import the event accumulator from Tensorboard. Location varies between Tensorflow versions.<br>
# Try each known location until one works.
eventAccumulatorImported = False

In [93]:
# TF version < 1.1.0
import tensorflow.python
if not eventAccumulatorImported:
    try:
        from tensorflow.python.summary import event_accumulator
        eventAccumulatorImported = True
    except ImportError:
        eventAccumulatorImported = False

In [29]:
# TF version = 1.1.0
if not eventAccumulatorImported:
    try:
        from tensorboard.backend.event_processing import event_accumulator
        eventAccumulatorImported = True
    except ImportError:
        eventAccumulatorImported = False

In [30]:

# TF version >= 1.3.0
if not eventAccumulatorImported:
    try:
        from tensorboard.backend.event_processing import event_accumulator
        eventAccumulatorImported = True
    except ImportError:
        eventAccumulatorImported = False

In [31]:
# TF version = Unknown
if not eventAccumulatorImported:
    raise ImportError('Could not locate and import Tensorflow event accumulator.')

In [32]:
summariesDefault = ['scalars']  # ['scalars', 'histograms', 'images', 'audio', 'compressedHistograms']

In [33]:
class Timer(object):
    # link: https://stackoverflow.com/a/5849861
    def __init__(self, name=None):
        self.name = name
    def __enter__(self):
        self.tStart = time.time()
    def __exit__(self, exc_type, value, exc_traceback):
        if self.name:
            print('[%s]' % self.name)
            print('Elapsed: %s' % (time.time() - self.tStart))

In [34]:
def exitWithUsage():
    print(' ')
    print('Usage:')
    print('   python readLogs.py <output-folder> <output-path-to-csv> <summaries>')
    print('Inputs:')
    print('   <input-path-to-logfile>  - Path to TensorFlow logfile.')
    print('   <output-folder>          - Path to output folder.')
    print(
        '   <summaries>              - (Optional) Comma separated list of summaries to save in output-folder. Default: '
        + ', '.join(summariesDefault))
    print(' ')
    sys.exit()

In [124]:
if len(sys.argv) < 3:
    exitWithUsage()             

 
Usage:
   python readLogs.py <output-folder> <output-path-to-csv> <summaries>
Inputs:
   <input-path-to-logfile>  - Path to TensorFlow logfile.
   <output-folder>          - Path to output folder.
   <summaries>              - (Optional) Comma separated list of summaries to save in output-folder. Default: scalars
 


SystemExit: 

In [106]:
inputLogFile = sys.argv[1]
outputFolder = sys.argv[2]

IndexError: list index out of range

In [37]:
if len(sys.argv) < 4:
    summaries = summariesDefault
else:
    if sys.argv[3] == 'all':
        summaries = summariesDefault
    else:
        summaries = sys.argv[3].split(',')

In [38]:
if any(x not in summariesDefault for x in summaries):
    print('Unknown summary! See usage for acceptable summaries.')
    exitWithUsage()

In [116]:
# Setting up event accumulator...
with Timer():
    ea = event_accumulator.EventAccumulator(inputLogFile,
                                            size_guidance={
                                                event_accumulator.COMPRESSED_HISTOGRAMS: 0,  # 0 = grab all
                                                event_accumulator.IMAGES: 0,
                                                event_accumulator.AUDIO: 0,
                                                event_accumulator.SCALARS: 0,
                                                event_accumulator.HISTOGRAMS: 0, 
                                            })

AttributeError: module 'tensorflow' has no attribute 'compat'

In [40]:
# Loading events from file...
with Timer():
    ea.Reload()  # loads events from file

NameError: name 'ea' is not defined

In [41]:
tags = ea.Tags()
''' Uncomment for logging 
print(' ')
print('Log summary:')
for t in tags:
    tagSum = []
    if isinstance(tags[t], collections.Sequence):
        tagSum = str(len(tags[t])) + ' summaries'
    else:
        tagSum = str(tags[t])
    print('   ' + t + ': ' + tagSum)
'''

NameError: name 'ea' is not defined

In [42]:
if not os.path.isdir(outputFolder):
    os.makedirs(outputFolder)

NameError: name 'outputFolder' is not defined

In [43]:
if 'audio' in summaries:
    print(' ')
    print('Exporting audio...')
    with Timer():
        print('   Audio is not yet supported!')

In [44]:
if 'compressedHistograms' in summaries:
    print(' ')
    print('Exporting compressedHistograms...')
    with Timer():
        print('   Compressed histograms are not yet supported!')

In [45]:
if 'histograms' in summaries:
    print(' ')
    print('Exporting histograms...')
    with Timer():
        print('   Histograms are not yet supported!')

In [46]:
if 'images' in summaries:
    print(' ')
    print('Exporting images...')
    imageDir = outputFolder + 'images'
    print('Image dir: ' + imageDir)
    with Timer():
        imageTags = tags['images']
        for imageTag in imageTags:
            images = ea.Images(imageTag)
            imageTagDir = imageDir + '/' + imageTag
            if not os.path.isdir(imageTagDir):
                os.makedirs(imageTagDir)
            for image in images:
                imageFilename = imageTagDir + '/' + str(image.step) + '.png'
                with open(imageFilename, 'wb') as f:
                    f.write(image.encoded_image_string)

In [47]:
if 'scalars' in summaries:
    csvFileName = os.path.join(outputFolder, 'scalars.csv')
    # Exporting scalars to csv-file...
    print('CSV-path: ' + csvFileName)
    scalarTags = tags['scalars']
    with Timer():
        with open(csvFileName, 'w') as csvfile:
            logWriter = csv.writer(csvfile, delimiter=',')

            # Write headers to columns
            headers = ['wall_time', 'step']
            for s in scalarTags:
                headers.append(s)
            logWriter.writerow(headers)
            vals = ea.Scalars(scalarTags[0])
            for i in range(len(vals)):
                v = vals[i]
                data = [v.wall_time, v.step]
                for s in scalarTags:
                    scalarTag = ea.Scalars(s)
                    S = scalarTag[i]
                    data.append(S.value)
                logWriter.writerow(data)

NameError: name 'outputFolder' is not defined

#knapsack_implementation

In [48]:
def knapSack(W, wt, val, n):
	""" Maximize the value that a knapsack of capacity W can hold. You can either put the item or discard it, there is
	no concept of putting some part of item in the knapsack.

	:param int W: Maximum capacity -in frames- of the knapsack.
	:param list[int] wt: The weights (lengths -in frames-) of each video shot.
	:param list[float] val: The values (importance scores) of each video shot.
	:param int n: The number of the shots.
	:return: A list containing the indices of the selected shots.
	"""
	K = [[0 for _ in range(W + 1)] for _ in range(n + 1)]

	# Build table K[][] in bottom up manner
	for i in range(n + 1):
		for w in range(W + 1):
			if i == 0 or w == 0:
				K[i][w] = 0
			elif wt[i - 1] <= w:
				K[i][w] = max(val[i - 1] + K[i - 1][w - wt[i - 1]], K[i - 1][w])
			else:
				K[i][w] = K[i - 1][w]

	selected = []
	w = W
	for i in range(n, 0, -1):
		if K[i][w] != K[i - 1][w]:
			selected.insert(0, i - 1)
			w -= wt[i - 1]

	return selected


if __name__ == "__main__":
	pass
	""" Driver program to test above function
	val = [4, 4, 2, 2, 2, 4]
	wt =  [2, 2, 1, 1, 1, 2]
	W = 7
	n = len(val)
	selected = knapSack(W, wt, val, n)
	print(selected) 
	"""

#configs

In [49]:
# -*- coding: utf-8 -*-
import argparse
import torch
from pathlib import Path
import pprint

In [50]:
save_dir = Path('C:/Users/abhis/OneDrive/Desktop/video summarization/CA-SUM-main/summaries/exp1')

In [51]:
def str2bool(v):
    """ Transcode string to boolean.
    :param str v: String to be transcoded.
    :return: The boolean transcoding of the string.
    """
    if v.lower() in ('yes', 'true', 't', 'y', '1'):
        return True
    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
        return False
    else:
        raise argparse.ArgumentTypeError('Boolean value expected.')

In [53]:
class Config(object):
    def __init__(self, **kwargs):
        """ Configuration Class: set kwargs as class attributes with setattr. """
        self.log_dir, self.score_dir, self.save_dir = None, None, None
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        for k, v in kwargs.items():
            setattr(self, k, v)
        self.set_dataset_dir(self.reg_factor, self.video_type)
    def set_dataset_dir(self, reg_factor=0.6, video_type='SumMe'):
        """ Function that sets as class attributes the necessary directories for logging important training information.
        :param float reg_factor: The utilized length regularization factor.
        :param str video_type: The Dataset being used, SumMe or TVSum.
        """
        self.log_dir = save_dir.joinpath('reg' + str(reg_factor), video_type, 'logs/split' + str(self.split_index))
        self.score_dir = save_dir.joinpath('reg' + str(reg_factor), video_type, 'results/split' + str(self.split_index))
        self.save_dir = save_dir.joinpath('reg' + str(reg_factor), video_type, 'models/split' + str(self.split_index))
    def __repr__(self):
        """ Pretty-print configurations in alphabetical order. """
        config_str = 'Configurations\n'
        config_str += pprint.pformat(self.__dict__)
        return config_str

In [54]:
def get_config(parse=True, **optional_kwargs):
    """ Get configurations as attributes of class
        1. Parse configurations with argparse.
        2. Create Config class initialized with parsed kwargs.
        3. Return Config class.
    """
    # If using argparse to parse command line arguments, uncomment the following:
    # parser = argparse.ArgumentParser()

    # # Mode
    # parser.add_argument('--mode', type=str, default='train', help='Mode for the configuration [train | test]')
    # parser.add_argument('--verbose', type=str2bool, default='false', help='Print or not training messages')
    # parser.add_argument('--video_type', type=str, default='SumMe', help='Dataset to be used')

    # # Model
    # parser.add_argument('--input_size', type=int, default=1024, help='Feature size expected in the input')
    # parser.add_argument('--block_size', type=int, default=60, help="Size of blocks used inside the attention matrix")
    # parser.add_argument('--init_type', type=str, default="xavier", help='Weight initialization method')
    # parser.add_argument('--init_gain', type=float, default=1.4142, help='Scaling factor for the initialization methods')

    # # Train
    # parser.add_argument('--n_epochs', type=int, default=400, help='Number of training epochs')
    # parser.add_argument('--batch_size', type=int, default=20, help='Size of each batch in training')
    # parser.add_argument('--seed', type=int, default=12345, help='Chosen seed for generating random numbers')
    # parser.add_argument('--clip', type=float, default=5.0, help='Max norm of the gradients')
    # parser.add_argument('--lr', type=float, default=5e-4, help='Learning rate used for the modules')
    # parser.add_argument('--l2_req', type=float, default=1e-5, help='Weight regularization factor')
    # parser.add_argument('--reg_factor', type=float, default=0.6, help='Length regularization factor')
    # parser.add_argument('--split_index', type=int, default=0, help='Data split to be used [0-4]')

    # if parse:
    #     kwargs = parser.parse_args()
    # else:
    #     kwargs = parser.parse_known_args()[0]
    # args, unknown = parser.parse_known_args()

    # # Namespace => Dictionary
    # kwargs = vars(kwargs)
    
    # Example of hardcoded arguments
    kwargs = {
        "mode": "train",
        "verbose": 'false',
        "video_type": "SumMe",
        # Model
        "input_size": 1024,
        "block_size": 60,
        "init_type": "xavier",
        "init_gain": 1.4142,
        # Train
        "n_epochs": 1,
        "batch_size": 20,
        "seed": 12345,
        "clip": 5.0,
        "lr": 5e-4,
        "l2_req": 1e-5,
        "reg_factor": 0.6,
        "dataset": r"C:\Users\abhis\OneDrive\Desktop\video summarization\CA-SUM-main\data\SumMe\eccv16_dataset_summe_google_pool5.h5",
        "split_index": 0,
        "beta":0.01,
        "weight_decay":1e-05,
        "num_episode":5

    }
    
    kwargs.update(optional_kwargs)
    
    # Ensure Config class is defined somewhere
    return Config(**kwargs)




In [55]:
if __name__ == '__main__':
    config = get_config()
    # import ipdb   
    # ipdb.set_trace()

#uutils

In [56]:
from __future__ import absolute_import
import os
import sys
import errno
import shutil
import json
import os.path as osp
import torch

In [57]:
def mkdir_if_missing(directory):
    if not osp.exists(directory):
        try:
            os.makedirs(directory)
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise

In [59]:
class AverageMeter(object):
    """Computes and stores the average and current value.
       
       Code imported from https://github.com/pytorch/examples/blob/master/imagenet/main.py#L247-L262
    """
    def __init__(self):
        self.reset()
    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0
    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [60]:
def save_checkpoint(state, fpath='checkpoint.pth.tar'):
    mkdir_if_missing(osp.dirname(fpath))
    torch.save(state, fpath)

In [61]:
class Logger(object):
    """
    Write console output to external text file.
    Code imported from https://github.com/Cysu/open-reid/blob/master/reid/utils/logging.py.
    """
    def __init__(self, fpath=None):
        self.console = sys.stdout
        self.file = None
        if fpath is not None:
            mkdir_if_missing(os.path.dirname(fpath))
            self.file = open(fpath, 'w')
    def __del__(self):
        self.close()
    def __enter__(self):
        pass
    def __exit__(self, *args):
        self.close()
    def write(self, msg):
        self.console.write(msg)
        if self.file is not None:
            self.file.write(msg)
    def flush(self):
        self.console.flush()
        if self.file is not None:
            self.file.flush()
            os.fsync(self.file.fileno())
    def close(self):
        self.console.close()
        if self.file is not None:
            self.file.close()

In [62]:
def read_json(fpath):
    with open(fpath, 'r') as f:
        obj = json.load(f)
    return obj

In [63]:
def write_json(obj, fpath):
    mkdir_if_missing(osp.dirname(fpath))
    with open(fpath, 'w') as f:
        json.dump(obj, f, indent=4, separators=(',', ': '))

#create_split

In [64]:
from __future__ import print_function
import os
import os.path as osp
import argparse
import h5py
import math
import numpy as np

In [65]:
args = { "dataset":r"C:\Users\abhis\OneDrive\Desktop\video summarization\CA-SUM-main\data\SumMe\eccv16_dataset_summe_google_pool5.h5",
"save_dir": r"C:\Users\abhis\OneDrive\Desktop\video summarization\CA-SUM-main\summaries\summaries",
"save_name": 'splits',
"num_splits": 5,
"train_percent": 0.8}

In [66]:
def split_random(keys, num_videos, num_train):
    """Random split"""
    train_keys, test_keys = [], []
    rnd_idxs = np.random.choice(range(num_videos), size=num_train, replace=False)
    for key_idx, key in enumerate(keys):
        if key_idx in rnd_idxs:
            train_keys.append(key)
        else:
            test_keys.append(key)
    assert len(set(train_keys) & set(test_keys)) == 0, "Error: train_keys and test_keys overlap"
    return train_keys, test_keys

In [67]:
def create():
    print("==========\nArgs:{}\n==========".format(args))
    print("Goal: randomly split data for {} times, {:.1%} for training and the rest for testing".format(args["num_splits"], args["train_percent"]))
    print("Loading dataset from {}".format(args["dataset"]))
    dataset = h5py.File(args["dataset"], 'r')
    keys = dataset.keys()
    num_videos = len(keys)
    num_train = int(math.ceil(num_videos * args["train_percent"]))
    num_test = num_videos - num_train
    print("Split breakdown: # total videos {}. # train videos {}. # test videos {}".format(num_videos, num_train, num_test))
    splits = []
    for split_idx in range(args["num_splits"]):
        train_keys, test_keys = split_random(keys, num_videos, num_train)
        splits.append({
            'train_keys': train_keys,
            'test_keys': test_keys,
            })
    saveto = osp.join(args["save_dir"], args["save_name"] + '.json')
    write_json(splits, saveto)
    print("Splits saved to {}".format(saveto))
    dataset.close()

In [69]:
if __name__ == '__main__':
    create()

Args:{'dataset': 'C:\\Users\\abhis\\OneDrive\\Desktop\\video summarization\\CA-SUM-main\\data\\SumMe\\eccv16_dataset_summe_google_pool5.h5', 'save_dir': 'C:\\Users\\abhis\\OneDrive\\Desktop\\video summarization\\CA-SUM-main\\summaries\\summaries', 'save_name': 'splits', 'num_splits': 5, 'train_percent': 0.8}
Goal: randomly split data for 5 times, 80.0% for training and the rest for testing
Loading dataset from C:\Users\abhis\OneDrive\Desktop\video summarization\CA-SUM-main\data\SumMe\eccv16_dataset_summe_google_pool5.h5
Split breakdown: # total videos 25. # train videos 20. # test videos 5
Splits saved to C:\Users\abhis\OneDrive\Desktop\video summarization\CA-SUM-main\summaries\summaries\splits.json


#data_loader

In [70]:
# -*- coding: utf-8 -*-
import torch
from torch.utils.data import Dataset, DataLoader
import h5py
import numpy as np
import json

In [71]:
class VideoData(Dataset):
    def __init__(self, mode, video_type, split_index):
        """ Custom Dataset class wrapper for loading the frame features.
        :param str mode: The mode of the model, train or test.
        :param str video_type: The Dataset being used, SumMe or TVSum.
        :param int split_index: The index of the Dataset split being used.
        """
        self.mode = mode
        self.name = video_type.lower()
        self.datasets = [r'C:/Users/abhis/OneDrive/Desktop/video summarization/CA-SUM-main/data/SumMe/eccv16_dataset_summe_google_pool5.h5',
                         r'C:/Users/abhis/OneDrive/Desktop/video summarization/CA-SUM-main/data/TVSum/eccv16_dataset_tvsum_google_pool5.h5']
        self.splits_filename = [r'C:/Users/abhis/OneDrive/Desktop/video summarization/CA-SUM-main/data/splits/' + self.name + r'_splits.json']
        self.split_index = split_index
        if 'summe' in self.splits_filename[0]:
            self.filename = self.datasets[0]
        elif 'tvsum' in self.splits_filename[0]:
            self.filename = self.datasets[1]
        hdf = h5py.File(self.filename, 'r')
        self.list_frame_features = []
        with open(self.splits_filename[0]) as f:
            data = json.loads(f.read())
            for i, split in enumerate(data):
                if i == self.split_index:
                    self.split = split
                    break
        for video_name in self.split[self.mode + '_keys']:
            frame_features = torch.Tensor(np.array(hdf[video_name + '/features']))
            self.list_frame_features.append(frame_features)
        hdf.close()
    def __len__(self):
        """ Function to be called for the `len` operator of `VideoData` Dataset. """
        self.len = len(self.split[self.mode+'_keys'])
        return self.len
    
    def __getitem__(self, index):
        """ Function to be called for the index operator of `VideoData` Dataset.
        train mode returns: frame_features
        test  mode returns: frame_features and video name
        :param int index: The above-mentioned id of the data.
        """
        frame_features = self.list_frame_features[index]
        if self.mode == 'test':
            video_name = self.split[self.mode + '_keys'][index]
            return frame_features, video_name
        else:
            return frame_features

In [72]:
def get_loader(mode, video_type, split_index):
    """ Loads the `data.Dataset` of the `split_index` split for the `video_type` Dataset.
    Wrapped by a Dataloader, shuffled and `batch_size` = 1 in train `mode`.
    :param str mode: The mode of the model, train or test.
    :param str video_type: The Dataset being used, SumMe or TVSum.
    :param int split_index: The index of the Dataset split being used.
    :return: The Dataset used in each mode.
    """
    if mode.lower() == 'train':
        vd = VideoData(mode, video_type, split_index)
        return DataLoader(vd, batch_size=1, shuffle=True)
    else:
        return VideoData(mode, video_type, split_index)

In [74]:
if __name__ == '__main__':
    pass

#rewards for RL

In [75]:
import torch
import sys

def compute_reward(seq, actions, ignore_far_sim=True, temp_dist_thre=20, use_gpu=False):
    """
    Compute diversity reward and representativeness reward

    Args:
        seq: sequence of features, shape (1, seq_len, dim)
        actions: binary action sequence, shape (1, seq_len, 1)
        ignore_far_sim (bool): whether to ignore temporally distant similarity (default: True)
        temp_dist_thre (int): threshold for ignoring temporally distant similarity (default: 20)
        use_gpu (bool): whether to use GPU
    """
    _seq = seq.detach()
    _actions = actions.detach()
    pick_idxs = _actions.squeeze().nonzero().squeeze()
    num_picks = len(pick_idxs) if pick_idxs.ndimension() > 0 else 1
    
    if num_picks == 0:
        # give zero reward is no frames are selected
        reward = torch.tensor(0.)
        if use_gpu: reward = reward.cuda()
        return reward   

    _seq = _seq.squeeze()
    n = _seq.size(0)

    # compute diversity reward
    if num_picks == 1:
        reward_div = torch.tensor(0.)
        if use_gpu: reward_div = reward_div.cuda()
    else:
        normed_seq = _seq / _seq.norm(p=2, dim=1, keepdim=True)
        dissim_mat = 1. - torch.matmul(normed_seq, normed_seq.t()) # dissimilarity matrix [Eq.4]
        dissim_submat = dissim_mat[pick_idxs,:][:,pick_idxs]
        if ignore_far_sim:
            # ignore temporally distant similarity
            pick_mat = pick_idxs.expand(num_picks, num_picks)
            temp_dist_mat = torch.abs(pick_mat - pick_mat.t())
            dissim_submat[temp_dist_mat > temp_dist_thre] = 1.
        reward_div = dissim_submat.sum() / (num_picks * (num_picks - 1.)) # diversity reward [Eq.3]

    # compute representativeness reward
    dist_mat = torch.pow(_seq, 2).sum(dim=1, keepdim=True).expand(n, n)
    dist_mat = dist_mat + dist_mat.t()
    dist_mat.addmm_(1, -2, _seq, _seq.t())
    dist_mat = dist_mat[:,pick_idxs]
    dist_mat = dist_mat.min(1, keepdim=True)[0]
    #reward_rep = torch.exp(torch.FloatTensor([-dist_mat.mean()]))[0] # representativeness reward [Eq.5]
    reward_rep = torch.exp(-dist_mat.mean())

    # combine the two rewards
    reward = (reward_div + reward_rep) * 0.5

    return reward


#solver

In [76]:
# -*- coding: utf-8 -*-
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import os
import random
import json
import h5py
from tqdm import tqdm, trange


In [77]:
class Solver(object):
    def __init__(self, config=None, train_loader=None, test_loader=None):
        """ Class that Builds, Trains and Evaluates CA-SUM model. """
        # Initialize variables to None, to be safe
        self.model, self.optimizer, self.writer = None, None, None
        self.config = config
        self.train_loader = train_loader
        self.test_loader = test_loader

        # Set the seed for generating reproducible random numbers
        if self.config.seed is not None:
            torch.manual_seed(self.config.seed)
            torch.cuda.manual_seed_all(self.config.seed)
            np.random.seed(self.config.seed)
            random.seed(self.config.seed)
    def build(self):
        """ Function for constructing the CA-SUM model, its key modules and parameters. """
        # Model creation    
        self.model = CA_SUM(input_size=self.config.input_size,
                            output_size=self.config.input_size,
                            block_size=self.config.block_size).to(self.config.device)
        if self.config.init_type is not None:
            self.init_weights(net=self.model, init_type=self.config.init_type, init_gain=self.config.init_gain)
        if self.config.mode == 'train':
            self.optimizer = optim.Adam(self.model.parameters(), lr=self.config.lr, weight_decay=self.config.l2_req)
            self.writer = TensorboardWriter(str(self.config.log_dir))
    @staticmethod
    def init_weights(net, init_type="xavier", init_gain=1.4142):
        """ Initialize 'net' network weights, based on the chosen 'init_type' and 'init_gain'.
        :param nn.Module net: Network to be initialized.
        :param str init_type: Name of initialization method: normal | xavier | kaiming | orthogonal.
        :param float init_gain: Scaling factor for normal.
        """
        for name, param in net.named_parameters():
            if 'weight' in name and "norm" not in name:
                if init_type == "normal":
                    nn.init.normal_(param, mean=0.0, std=init_gain)
                elif init_type == "xavier":
                    nn.init.xavier_uniform_(param, gain=np.sqrt(2.0))  # ReLU activation function
                elif init_type == "kaiming":
                    nn.init.kaiming_uniform_(param, mode="fan_in", nonlinearity="relu")
                elif init_type == "orthogonal":
                    nn.init.orthogonal_(param, gain=np.sqrt(2.0))      # ReLU activation function
                else:
                    raise NotImplementedError(f"initialization method {init_type} is not implemented.")
            elif 'bias' in name:
                nn.init.constant_(param, 0.1)
    def length_regularization_loss(self, scores):
        """ Compute the summary-length regularization loss based on eq. (1).
        :param torch.Tensor scores: Frame-level importance scores, produced by our CA-SUM model.
        :return: A (torch.Tensor) value indicating the summary-length regularization loss.
        """
        return torch.abs(torch.mean(scores) - self.config.reg_factor)
    def train(self):
        """ Main function to train the CA-SUM model. """
        if self.config.verbose:
            tqdm.write('Time to train the model...')
        for epoch_i in trange(self.config.n_epochs, desc='Epoch', ncols=80):
            self.model.train()
            loss_history = []
            num_batches = int(len(self.train_loader) / self.config.batch_size)  # full-batch or mini batch
            iterator = iter(self.train_loader)
            for _ in trange(num_batches, desc='Batch', ncols=80, leave=False):
                self.optimizer.zero_grad()
                for _ in trange(self.config.batch_size, desc='Video', ncols=80, leave=False):
                    frame_features = next(iterator)
                    frame_features = frame_features.squeeze(0).to(self.config.device)
                    output, _ = self.model(frame_features)
                    loss = self.length_regularization_loss(output)
                    loss_history.append(loss.data)
                    loss.backward()
                # Update model parameters every 'batch_size' iterations
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.config.clip)
                self.optimizer.step()

            # Mean loss of each training step
            loss = torch.stack(loss_history).mean()
            if self.config.verbose:
                tqdm.write(f'[{epoch_i}] loss: {loss.item()}')

            # Plot
            if self.config.verbose:
                tqdm.write('Plotting...')
            self.writer.update_loss(loss, epoch_i, 'loss_epoch')

            # Uncomment to save parameters at checkpoint
            if not os.path.exists(self.config.save_dir):
                os.makedirs(self.config.save_dir)
            ckpt_path = str(self.config.save_dir) + f'/epoch-{epoch_i}.pkl'
            tqdm.write(f'Save parameters at {ckpt_path}')
            torch.save(self.model.state_dict(), ckpt_path)
            self.evaluate(epoch_i)
    def evaluate(self, epoch_i, save_weights=False):
        """ Saves the frame's importance scores for the test videos in json format.
        :param int epoch_i: The current training epoch.
        :param bool save_weights: Optionally, the user can choose to save the attention weights in a (large) h5 file.
        """
        self.model.eval()
        weights_save_path = self.config.score_dir.joinpath("weights.h5")
        out_scores_dict = {}
        for frame_features, video_name in tqdm(self.test_loader, desc='Evaluate', ncols=80, leave=False):
            # [seq_len, input_size]
            frame_features = frame_features.view(-1, self.config.input_size).to(self.config.device)
            with torch.no_grad():
                scores, attn_weights = self.model(frame_features)  # [1, seq_len]
                scores = scores.squeeze(0).cpu().numpy().tolist()
                attn_weights = attn_weights.cpu().numpy()
                out_scores_dict[video_name] = scores
            if not os.path.exists(self.config.score_dir):
                os.makedirs(self.config.score_dir)
            scores_save_path = self.config.score_dir.joinpath(f"{self.config.video_type}_{epoch_i}.json")
            with open(scores_save_path, 'w') as f:
                if self.config.verbose:
                    tqdm.write(f'Saving score at {str(scores_save_path)}.')
                json.dump(out_scores_dict, f)
            scores_save_path.chmod(0o777)
            if save_weights and (epoch_i+1 == self.config.n_epochs or epoch_i+1 == 0):
                with h5py.File(weights_save_path, 'a') as weights:
                    weights.create_dataset(f"{video_name}/epoch_{epoch_i}", data=attn_weights)

In [78]:
if __name__ == '__main__':
    pass

#utils

In [79]:
# -*- coding: utf-8 -*-
from tensorboardX import SummaryWriter

In [80]:
class TensorboardWriter(SummaryWriter):
    def __init__(self, logdir):
        """ Extended SummaryWriter Class from tensorboard-pytorch (tensorboardX)
        https://github.com/lanpa/tensorboard-pytorch/blob/master/tensorboardX/writer.py
        Internally calls self.file_writer
        :param str logdir: Save directory location.
        """
        super(TensorboardWriter, self).__init__(logdir)
        self.logdir = self.file_writer.get_logdir()
    def update_parameters(self, module, step_i):
        """ Add module's parameters' histogram to summary.
        :param torch.nn.Module module: Module from which the parameters will be taken.
        :param int step_i: Step value to record.
        """
        for name, param in module.named_parameters():
            self.add_histogram(name, param.clone().cpu().data.numpy(), step_i)
    def update_loss(self, loss, step_i, name='loss'):
        """ Add scalar data to summary.
        :param float loss: Value to save.
        :param int step_i: Step value to record.
        :param str name: Data identifier.
        """
        self.add_scalar(name, loss, step_i)
    def update_histogram(self, values, step_i, name='hist'):
        """ Add histogram to summary.
        :param torch.Tensor | numpy.ndarray values: Values to build histogram.
        :param int step_i: Step value to record.
        :param str name: Data identifier.
        """
        self.add_histogram(name, values, step_i)

#main

In [81]:
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
from torch.optim import lr_scheduler
from torch.distributions import Bernoulli
import numpy as np

In [82]:
if __name__ == '__main__':
    """ Main function that sets the data loaders; trains and evaluates the solver."""
    config = get_config(mode='train')

In [83]:
if __name__ == '__main__':
    """ Main function that sets the data loaders; trains and evaluates the solver."""
    config = get_config(mode='train')
    test_config = get_config(mode='test')
    print(f"[Current split: {config.split_index}]: block_size={config.block_size} and "
          f"\u03C3={config.reg_factor} for {config.video_type} dataset.")
    train_loader = get_loader(config.mode, config.video_type, config.split_index)
    test_loader = get_loader(test_config.mode, test_config.video_type, test_config.split_index)
    solver = Solver(config, train_loader, test_loader)
    solver.build()
    solver.evaluate(-1)	 # evaluates the summaries using the initial random weights of the network
    solver.train()
  
    dataset = h5py.File(config.dataset, 'r')
    num_videos = len(dataset.keys())
    splits = read_json(r"C:\Users\abhis\OneDrive\Desktop\video summarization\CA-SUM-main\data\splits\summe_splits.json")
    
    assert config.split_index < len(splits), "split_id (got {}) exceeds {}".format(config.split_index, len(splits))
    split = splits[config.split_index]
    train_keys = split['train_keys']
    test_keys = split['test_keys']
    start_epoch=0

# tensorboard --logdir '.../CA-SUM/Summaries/' --host localhost
    

[Current split: 0]: block_size=60 and σ=0.6 for SumMe dataset.


Evaluate:   0%|                                           | 0/5 [00:00<?, ?it/s]

Saving score at C:\Users\abhis\OneDrive\Desktop\video summarization\CA-SUM-main\summaries\summaries\reg0.6\SumMe\results\split0\SumMe_-1.json.


Evaluate:  80%|████████████████████████████       | 4/5 [00:00<00:00, 11.65it/s]

Saving score at C:\Users\abhis\OneDrive\Desktop\video summarization\CA-SUM-main\summaries\summaries\reg0.6\SumMe\results\split0\SumMe_-1.json.
Saving score at C:\Users\abhis\OneDrive\Desktop\video summarization\CA-SUM-main\summaries\summaries\reg0.6\SumMe\results\split0\SumMe_-1.json.
Saving score at C:\Users\abhis\OneDrive\Desktop\video summarization\CA-SUM-main\summaries\summaries\reg0.6\SumMe\results\split0\SumMe_-1.json.


                                                                                

Saving score at C:\Users\abhis\OneDrive\Desktop\video summarization\CA-SUM-main\summaries\summaries\reg0.6\SumMe\results\split0\SumMe_-1.json.
Time to train the model...


Epoch:   0%|                                              | 0/1 [00:00<?, ?it/s]
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
Epoch:   0%|                                              | 0/1 [00:04<?, ?it/s]

[0] loss: 0.11152105033397675
Plotting...
Save parameters at C:\Users\abhis\OneDrive\Desktop\video summarization\CA-SUM-main\summaries\summaries\reg0.6\SumMe\models\split0/epoch-0.pkl



Epoch:   0%|                                              | 0/1 [00:04<?, ?it/s]

Saving score at C:\Users\abhis\OneDrive\Desktop\video summarization\CA-SUM-main\summaries\summaries\reg0.6\SumMe\results\split0\SumMe_0.json.



Epoch:   0%|                                              | 0/1 [00:04<?, ?it/s]
Epoch:   0%|                                              | 0/1 [00:04<?, ?it/s]

Saving score at C:\Users\abhis\OneDrive\Desktop\video summarization\CA-SUM-main\summaries\summaries\reg0.6\SumMe\results\split0\SumMe_0.json.
Saving score at C:\Users\abhis\OneDrive\Desktop\video summarization\CA-SUM-main\summaries\summaries\reg0.6\SumMe\results\split0\SumMe_0.json.



Epoch:   0%|                                              | 0/1 [00:04<?, ?it/s]
Epoch:   0%|                                              | 0/1 [00:05<?, ?it/s]

Saving score at C:\Users\abhis\OneDrive\Desktop\video summarization\CA-SUM-main\summaries\summaries\reg0.6\SumMe\results\split0\SumMe_0.json.
Saving score at C:\Users\abhis\OneDrive\Desktop\video summarization\CA-SUM-main\summaries\summaries\reg0.6\SumMe\results\split0\SumMe_0.json.


Epoch: 100%|██████████████████████████████████████| 1/1 [00:05<00:00,  5.12s/it]


# RL for Summarization 

In [84]:

baselines = {key: 0. for key in train_keys} # baseline rewards for videos
reward_writers = {key: [] for key in train_keys} # record reward changes for each video
for epoch in range(start_epoch, config.n_epochs):
        idxs = np.arange(len(train_keys))
        np.random.shuffle(idxs) # shuffle indices
        for idx in idxs:
            key = train_keys[idx]
            seq = dataset[key]['features'][...] # sequence of features, (seq_len, dim)
            seq = torch.from_numpy(seq) # input shape (seq_len, dim)  
            #
            trained_model = solver.model
            # input=torch.randn(100,1024)
            y,attn_weights = trained_model(seq) # output shape (1, seq_len, 1)
            probs = y.squeeze()
            #*
            cost = config.beta * (probs.mean() - 0.5)**2 # minimize summary length penalty term [Eq.11]
            m = Bernoulli(probs)
            epis_rewards = []
            for _ in range(config.num_episode):
                actions = m.sample()
                log_probs = m.log_prob(actions)
                #* 
                use_gpu=False
                reward = compute_reward(seq, actions, use_gpu=use_gpu)
                expected_reward = log_probs.mean() * (reward - baselines[key])
                cost -= expected_reward # minimize negative expected reward
                epis_rewards.append(reward.item())
            optimizer = torch.optim.Adam(trained_model.parameters(), lr=config.lr, weight_decay=config.weight_decay)    
            optimizer.zero_grad()
            cost.backward()
            torch.nn.utils.clip_grad_norm_(trained_model.parameters(), 5.0)
            optimizer.step()
            baselines[key] = 0.9 * baselines[key] + 0.1 * np.mean(epis_rewards) # update baseline reward via moving average
            reward_writers[key].append(np.mean(epis_rewards))
        epoch_reward = np.mean([reward_writers[key][epoch] for key in train_keys])
        print("epoch {}/{}\t reward {}\t".format(epoch+1, config.n_epochs, epoch_reward))
write_json(reward_writers, osp.join(config.save_dir, 'rewards.json'))
solver.evaluate(-1)	 # evaluates the summaries using the initial random weights of the network
solver.train()
  
dataset = h5py.File(config.dataset, 'r')
num_videos = len(dataset.keys())
splits = read_json(r"C:\Users\abhis\OneDrive\Desktop\video summarization\CA-SUM-main\data\splits\summe_splits.json")
    
assert config.split_index < len(splits), "split_id (got {}) exceeds {}".format(config.split_index, len(splits))
split = splits[config.split_index]
train_keys = split['train_keys']
test_keys = split['test_keys']
start_epoch=0

             


#changes 
# seq = torch.from_numpy(seq).unsqueeze(0) # input shape (1,seq_len, dim) TO  seq = torch.from_numpy(seq) # input shape (seq_len, dim)  
# to fit in the input shape of CASUM model 

	addmm_(Number beta, Number alpha, Tensor mat1, Tensor mat2)
Consider using one of the following signatures instead:
	addmm_(Tensor mat1, Tensor mat2, *, Number beta, Number alpha) (Triggered internally at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\torch\csrc\utils\python_arg_parser.cpp:1630.)
  dist_mat.addmm_(1, -2, _seq, _seq.t())


epoch 1/1	 reward 0.9309951132535934	


Evaluate:  20%|███████                            | 1/5 [00:00<00:00,  8.26it/s]

Saving score at C:\Users\abhis\OneDrive\Desktop\video summarization\CA-SUM-main\summaries\summaries\reg0.6\SumMe\results\split0\SumMe_-1.json.


Evaluate:  80%|████████████████████████████       | 4/5 [00:00<00:00,  8.54it/s]

Saving score at C:\Users\abhis\OneDrive\Desktop\video summarization\CA-SUM-main\summaries\summaries\reg0.6\SumMe\results\split0\SumMe_-1.json.
Saving score at C:\Users\abhis\OneDrive\Desktop\video summarization\CA-SUM-main\summaries\summaries\reg0.6\SumMe\results\split0\SumMe_-1.json.
Saving score at C:\Users\abhis\OneDrive\Desktop\video summarization\CA-SUM-main\summaries\summaries\reg0.6\SumMe\results\split0\SumMe_-1.json.


                                                                                

Saving score at C:\Users\abhis\OneDrive\Desktop\video summarization\CA-SUM-main\summaries\summaries\reg0.6\SumMe\results\split0\SumMe_-1.json.
Time to train the model...


Epoch:   0%|                                              | 0/1 [00:00<?, ?it/s]
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
Epoch:   0%|                                              | 0/1 [00:04<?, ?it/s]

[0] loss: 0.3994632959365845
Plotting...
Save parameters at C:\Users\abhis\OneDrive\Desktop\video summarization\CA-SUM-main\summaries\summaries\reg0.6\SumMe\models\split0/epoch-0.pkl


                                                                                
Epoch:   0%|                                              | 0/1 [00:04<?, ?it/s]

Saving score at C:\Users\abhis\OneDrive\Desktop\video summarization\CA-SUM-main\summaries\summaries\reg0.6\SumMe\results\split0\SumMe_0.json.


                                                                                
                                                                                
                                                                                
Epoch:   0%|                                              | 0/1 [00:05<?, ?it/s]

Saving score at C:\Users\abhis\OneDrive\Desktop\video summarization\CA-SUM-main\summaries\summaries\reg0.6\SumMe\results\split0\SumMe_0.json.
Saving score at C:\Users\abhis\OneDrive\Desktop\video summarization\CA-SUM-main\summaries\summaries\reg0.6\SumMe\results\split0\SumMe_0.json.
Saving score at C:\Users\abhis\OneDrive\Desktop\video summarization\CA-SUM-main\summaries\summaries\reg0.6\SumMe\results\split0\SumMe_0.json.


                                                                                
Epoch: 100%|██████████████████████████████████████| 1/1 [00:05<00:00,  5.44s/it]

Saving score at C:\Users\abhis\OneDrive\Desktop\video summarization\CA-SUM-main\summaries\summaries\reg0.6\SumMe\results\split0\SumMe_0.json.





In [85]:
from __future__ import print_function
import os
import os.path as osp
import argparse
import sys
import h5py
import time
import datetime
import numpy as np
from tabulate import tabulate