<a href="https://colab.research.google.com/github/noahdrakes/mldl-final/blob/noah/mm_violence_det_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Multi-Modal Violence Detection Network

original src code: https://github.com/Roc-Ng/XDVioDet.git

### Copying Training and Testing Data

The folders are pretty large (~40/50GB) so it takes a while to copy all of the data over.


In [1]:
from google.colab import drive
drive.mount('/mydrive', force_remount=True)

Mounted at /mydrive


In [3]:
%cd /mydrive/MyDrive

/mydrive/MyDrive


In [4]:
!unzip final_dl.zip -d /content/


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/final_dl/dl_files/i3d-features/Flow/v=vsI_pKNcgeQ__#00-12-00_00-15-00_label_A__4.npy  
  inflating: /content/final_dl/dl_files/i3d-features/Flow/v=vsI_pKNcgeQ__#00-12-00_00-15-00_label_A__0.npy  
  inflating: /content/final_dl/dl_files/i3d-features/Flow/v=vsI_pKNcgeQ__#00-12-00_00-15-00_label_A__3.npy  
  inflating: /content/final_dl/dl_files/i3d-features/Flow/v=vsI_pKNcgeQ__#00-18-00_00-21-00_label_A__4.npy  
  inflating: /content/final_dl/dl_files/i3d-features/Flow/v=vsI_pKNcgeQ__#00-18-00_00-21-00_label_A__1.npy  
  inflating: /content/final_dl/dl_files/i3d-features/Flow/v=vsI_pKNcgeQ__#00-18-00_00-21-00_label_A__0.npy  
  inflating: /content/final_dl/dl_files/i3d-features/Flow/v=vsI_pKNcgeQ__#00-18-00_00-21-00_label_A__3.npy  
  inflating: /content/final_dl/dl_files/i3d-features/Flow/v=vsI_pKNcgeQ__#00-18-00_00-21-00_label_A__2.npy  
  inflating: /content/final_dl/dl_files/i3d-features/Flow/v=vsI

may need to change directory depending on where you upload the data to google drive.

In [None]:
# !cp -r /mydrive/MyDrive/final_dl ./

## 1. Methods


### A) Test


In [6]:
from sklearn.metrics import auc, precision_recall_curve
import numpy as np
import torch

def test(dataloader, model, device, gt):
    with torch.no_grad():
        model.eval()
        pred = torch.zeros(0).to(device)
        pred2 = torch.zeros(0).to(device)
        for i, input in enumerate(dataloader):
            input = input.to(device)
            logits, logits2 = model(inputs=input, seq_len=None)
            logits = torch.squeeze(logits)
            sig = torch.sigmoid(logits)
            sig = torch.mean(sig, 0)
            pred = torch.cat((pred, sig))
            '''
            online detection
            '''
            logits2 = torch.squeeze(logits2)
            sig2 = torch.sigmoid(logits2)
            sig2 = torch.mean(sig2, 0)

            sig2 = torch.unsqueeze(sig2, 1) ##for audio
            pred2 = torch.cat((pred2, sig2))

            # print("pred:, ", pred)
            # print("pred2:, ", pred2)

        pred = list(pred.cpu().detach().numpy())
        pred2 = list(pred2.cpu().detach().numpy())




        precision, recall, th = precision_recall_curve(list(gt), np.repeat(pred, 16))
        pr_auc = auc(recall, precision)
        precision, recall, th = precision_recall_curve(list(gt), np.repeat(pred2, 16))
        pr_auc2 = auc(recall, precision)
        return pr_auc, pr_auc2




### B) Utils

In [7]:
# -*- coding: utf-8 -*-

import numpy as np


def random_extract(feat, t_max):
   r = np.random.randint(len(feat)-t_max)
   return feat[r:r+t_max]

def uniform_extract(feat, t_max):
   r = np.linspace(0, len(feat)-1, t_max, dtype=np.uint16)
   return feat[r, :]

def pad(feat, min_len):
    if np.shape(feat)[0] <= min_len:
       return np.pad(feat, ((0, min_len-np.shape(feat)[0]), (0, 0)), mode='constant', constant_values=0)
    else:
       return feat

def process_feat(feat, length, is_random=True):
    if len(feat) > length:
        if is_random:
            return random_extract(feat, length)
        else:
            return uniform_extract(feat, length)
    else:
        return pad(feat, length)



### C) Dataset

In [8]:
import torch.utils.data as data
import numpy as np

# from utils import process_feat

class Dataset(data.Dataset):
    def __init__(self, args, transform=None, mode='train'):
        self.modality = args.modality
        """
        Args:
            args: Arguments containing dataset paths and configuration
            transform: Optional transforms to apply
            mode: One of ['train', 'val', 'test'] to specify the dataset split
        """

        if mode == 'test':
            self.rgb_list_file = args.test_rgb_list
            self.flow_list_file = args.test_flow_list
            self.audio_list_file = args.test_audio_list
        elif mode == 'val':
            self.rgb_list_file = args.val_rgb_list
            self.flow_list_file = args.val_flow_list
            self.audio_list_file = args.val_audio_list
        else: # train
            self.rgb_list_file = args.train_rgb_list
            self.flow_list_file = args.train_flow_list
            self.audio_list_file = args.train_audio_list


        self.max_seqlen = args.max_seqlen
        self.tranform = transform
        self.test_mode = (mode == 'test')
        self.normal_flag = '_label_A'
        self._parse_list()

    def _parse_list(self):
        if self.modality == 'AUDIO':
            self.list = list(open(self.audio_list_file))
        elif self.modality == 'RGB':
            self.list = list(open(self.rgb_list_file))
            print("here")
            # print(self.list)
        elif self.modality == 'FLOW':
            self.list = list(open(self.flow_list_file))
        elif self.modality == 'MIX':
            self.list = list(open(self.rgb_list_file))
            self.flow_list = list(open(self.flow_list_file))
        elif self.modality == 'MIX2':
            self.list = list(open(self.rgb_list_file))
            self.audio_list = list(open(self.audio_list_file))
        elif self.modality == 'MIX3':
            self.list = list(open(self.flow_list_file))
            self.audio_list = list(open(self.audio_list_file))
        elif self.modality == 'MIX_ALL':
            self.list = list(open(self.rgb_list_file))
            self.flow_list = list(open(self.flow_list_file))
            self.audio_list = list(open(self.audio_list_file))
        else:
            assert 1 > 2, 'Modality is wrong!'

    def __getitem__(self, index):
        if self.normal_flag in self.list[index]:
            label = 0.0
        else:
            label = 1.0

        if self.modality == 'AUDIO':
            features = np.array(np.load(self.list[index].strip('\n')), dtype=np.float32)
        elif self.modality == 'RGB':
            features = np.array(np.load(self.list[index].strip('\n')), dtype=np.float32)
        elif self.modality == 'FLOW':
            features = np.array(np.load(self.list[index].strip('\n')), dtype=np.float32)
        elif self.modality == 'MIX':
            features1 = np.array(np.load(self.list[index].strip('\n')), dtype=np.float32)
            features2 = np.array(np.load(self.flow_list[index].strip('\n')), dtype=np.float32)
            if features1.shape[0] == features2.shape[0]:
                features = np.concatenate((features1, features2), axis=1)
            else:
                features = np.concatenate((features1[:-1], features2), axis=1)
        elif self.modality == 'MIX2':


            features1 = np.array(np.load(self.list[index].strip('\n')), dtype=np.float32)
            features2 = np.array(np.load(self.audio_list[index//5].strip('\n')), dtype=np.float32)
            if features1.shape[0] == features2.shape[0]:
                features = np.concatenate((features1, features2),axis=1)
            else:# because the frames of flow is one less than that of rgb
                min_len = min(features1.shape[0], features2.shape[0])
                features1 = features1[:min_len]
                features2 = features2[:min_len]
                features = np.concatenate((features1[:-1], features2), axis=1)
            features = np.concatenate((features1, features2), axis=1)

        elif self.modality == 'MIX3':
            features1 = np.array(np.load(self.list[index].strip('\n')), dtype=np.float32)
            features2 = np.array(np.load(self.audio_list[index // 5].strip('\n')), dtype=np.float32)
            if features1.shape[0] == features2.shape[0]:
                features = np.concatenate((features1, features2), axis=1)
            else:
                features = np.concatenate((features1[:-1], features2), axis=1)
        elif self.modality == 'MIX_ALL':
            features1 = np.array(np.load(self.list[index].strip('\n')), dtype=np.float32)
            features2 = np.array(np.load(self.flow_list[index].strip('\n')), dtype=np.float32)
            features3 = np.array(np.load(self.audio_list[index // 5].strip('\n')), dtype=np.float32)
            if features1.shape[0] == features2.shape[0]:
                features = np.concatenate((features1, features2, features3), axis=1)
            else:
                features = np.concatenate((features1[:-1], features2, features3[:-1]), axis=1)
        else:
            print("WHAT IS WRONG")
            raise ValueError("Modality is wrong!")

        # Apply transformations if any
        if self.tranform is not None:
            features = self.tranform(features)

        # Handle test mode
        if self.test_mode:
            return features
        else:
            # Process features for training/validation
            features = process_feat(features, self.max_seqlen, is_random=False)
            return features, label

    def __len__(self):
        return len(self.list)

### D) Layers

In [9]:
from math import sqrt
from torch import FloatTensor
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from scipy.spatial.distance import pdist, squareform

class GraphAttentionLayer(nn.Module):
    """
    Simple GAT layer, similar to https://arxiv.org/abs/1710.10903
    """

    def __init__(self, in_features, out_features, dropout, alpha, concat=True):
        super(GraphAttentionLayer, self).__init__()
        self.dropout = dropout
        self.in_features = in_features
        self.out_features = out_features
        self.alpha = alpha
        self.concat = concat

        self.W = nn.Parameter(nn.init.xavier_uniform(torch.Tensor(in_features, out_features).type(torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor), gain=np.sqrt(2.0)), requires_grad=True)
        self.a = nn.Parameter(nn.init.xavier_uniform(torch.Tensor(2*out_features, 1).type(torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor), gain=np.sqrt(2.0)), requires_grad=True)

        self.leakyrelu = nn.LeakyReLU(self.alpha)

    def forward(self, input, adj):
        h = torch.mm(input, self.W)
        N = h.size()[0]

        a_input = torch.cat([h.repeat(1, N).view(N * N, -1), h.repeat(N, 1)], dim=1).view(N, -1, 2 * self.out_features)
        e = self.leakyrelu(torch.matmul(a_input, self.a).squeeze(2))

        zero_vec = -9e15*torch.ones_like(e)
        attention = torch.where(adj > 0, e, zero_vec)
        attention = F.softmax(attention, dim=1)
        attention = F.dropout(attention, self.dropout, training=self.training)
        h_prime = torch.matmul(attention, h)

        if self.concat:
            return F.elu(h_prime)
        else:
            return h_prime

    def __repr__(self):
        return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'

class linear(nn.Module):
    def __init__(self, in_features, out_features):
        super(linear, self).__init__()
        self.weight = Parameter(FloatTensor(in_features, out_features))
        self.register_parameter('bias', None)
        stdv = 1. / sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
    def forward(self, x):
        x = x.matmul(self.weight)
        return x

class GraphConvolution(Module):
    """
    Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
    """

    def __init__(self, in_features, out_features, bias=False, residual=True):
        super(GraphConvolution, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(FloatTensor(in_features, out_features))

        if bias:
            self.bias = Parameter(FloatTensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()
        if not residual:
            self.residual = lambda x: 0
        elif (in_features == out_features):
            self.residual = lambda x: x
        else:
            # self.residual = linear(in_features, out_features)
            self.residual = nn.Conv1d(in_channels=in_features, out_channels=out_features, kernel_size=5, padding=2)
    def reset_parameters(self):
        # stdv = 1. / sqrt(self.weight.size(1))
        nn.init.xavier_uniform_(self.weight)
        if self.bias is not None:
            self.bias.data.fill_(0.1)

    def forward(self, input, adj):
        # To support batch operations
        support = input.matmul(self.weight)
        output = adj.matmul(support)

        if self.bias is not None:
            output = output + self.bias
        if self.in_features != self.out_features and self.residual:
            input = input.permute(0,2,1)
            res = self.residual(input)
            res = res.permute(0,2,1)
            output = output + res
        else:
            output = output + self.residual(input)

        return output

    def __repr__(self):
        return self.__class__.__name__ + ' (' \
               + str(self.in_features) + ' -> ' \
               + str(self.out_features) + ')'

######################################################

class SimilarityAdj(Module):

    def __init__(self, in_features, out_features):
        super(SimilarityAdj, self).__init__()
        self.in_features = in_features
        self.out_features = out_features

        self.weight0 = Parameter(FloatTensor(in_features, out_features))
        self.weight1 = Parameter(FloatTensor(in_features, out_features))
        self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        # stdv = 1. / sqrt(self.weight0.size(1))
        nn.init.xavier_uniform_(self.weight0)
        nn.init.xavier_uniform_(self.weight1)

    def forward(self, input, seq_len):
        # To support batch operations
        soft = nn.Softmax(1)
        theta = torch.matmul(input, self.weight0)
        phi = torch.matmul(input, self.weight0)
        phi2 = phi.permute(0, 2, 1)
        sim_graph = torch.matmul(theta, phi2)

        theta_norm = torch.norm(theta, p=2, dim=2, keepdim=True)  # B*T*1
        phi_norm = torch.norm(phi, p=2, dim=2, keepdim=True)  # B*T*1
        x_norm_x = theta_norm.matmul(phi_norm.permute(0, 2, 1))
        sim_graph = sim_graph / (x_norm_x + 1e-20)

        output = torch.zeros_like(sim_graph)
        if seq_len is None:
            for i in range(sim_graph.shape[0]):
                tmp = sim_graph[i]
                adj2 = tmp
                adj2 = F.threshold(adj2, 0.7, 0)
                adj2 = soft(adj2)
                output[i] = adj2
        else:
            for i in range(len(seq_len)):
                tmp = sim_graph[i, :seq_len[i], :seq_len[i]]
                adj2 = tmp
                adj2 = F.threshold(adj2, 0.7, 0)
                adj2 = soft(adj2)
                output[i, :seq_len[i], :seq_len[i]] = adj2

        return output

    def __repr__(self):
        return self.__class__.__name__ + ' (' \
               + str(self.in_features) + ' -> ' \
               + str(self.out_features) + ')'

class DistanceAdj(Module):

    def __init__(self):
        super(DistanceAdj, self).__init__()
        self.sigma = Parameter(FloatTensor(1))
        self.sigma.data.fill_(0.1)

    def forward(self, batch_size, max_seqlen):
        # To support batch operations
        self.arith = np.arange(max_seqlen).reshape(-1, 1)
        dist = pdist(self.arith, metric='cityblock').astype(np.float32)
        self.dist = torch.from_numpy(squareform(dist)).to('cuda')
        self.dist = torch.exp(-self.dist / torch.exp(torch.tensor(1.)))
        self.dist = torch.unsqueeze(self.dist, 0).repeat(batch_size, 1, 1).to('cuda')
        return self.dist

### E) Model

In [10]:

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as torch_init
import os
# from layers import GraphConvolution, SimilarityAdj, DistanceAdj


def weight_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1 or classname.find('Linear') != -1:
        torch_init.xavier_uniform_(m.weight)
        # m.bias.data.fill_(0.1)

class Model(nn.Module):
    def __init__(self, args):
        super(Model, self).__init__()

        n_features = args.feature_size
        n_class = args.num_classes

        self.conv1d1 = nn.Conv1d(in_channels=n_features, out_channels=512, kernel_size=1, padding=0)
        self.conv1d2 = nn.Conv1d(in_channels=512, out_channels=128, kernel_size=1, padding=0)
        self.conv1d3 = nn.Conv1d(in_channels=128, out_channels=32, kernel_size=5, padding=2)
        self.conv1d4 = nn.Conv1d(in_channels=32, out_channels=32, kernel_size=5, padding=2)
        # Graph Convolution
        self.gc1 = GraphConvolution(128, 32, residual=True)  # nn.Linear(128, 32)
        self.gc2 = GraphConvolution(32, 32, residual=True)
        self.gc3 = GraphConvolution(128, 32, residual=True)  # nn.Linear(128, 32)
        self.gc4 = GraphConvolution(32, 32, residual=True)
        self.gc5 = GraphConvolution(128, 32, residual=True)  # nn.Linear(128, 32)
        self.gc6 = GraphConvolution(32, 32, residual=True)
        self.simAdj = SimilarityAdj(n_features, 32)
        self.disAdj = DistanceAdj()

        self.classifier = nn.Linear(32*3, n_class)
        self.approximator = nn.Sequential(nn.Conv1d(128, 64, 1, padding=0), nn.ReLU(),
                                          nn.Conv1d(64, 32, 1, padding=0), nn.ReLU())
        self.conv1d_approximator = nn.Conv1d(32, 1, 5, padding=0)
        self.dropout = nn.Dropout(0.6)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        self.apply(weight_init)



    def forward(self, inputs, seq_len):
        x = inputs.permute(0, 2, 1)  # for conv1d
        x = self.relu(self.conv1d1(x))
        x = self.dropout(x)
        x = self.relu(self.conv1d2(x))
        x = self.dropout(x)

        logits = self.approximator(x)
        logits = F.pad(logits, (4, 0))
        logits = self.conv1d_approximator(logits)
        logits = logits.permute(0, 2, 1)
        x = x.permute(0, 2, 1)  # b*t*c

        ## gcn
        scoadj = self.sadj(logits.detach(), seq_len)
        adj = self.adj(inputs, seq_len)
        disadj = self.disAdj(x.shape[0], x.shape[1])
        x1_h = self.relu(self.gc1(x, adj))
        x1_h = self.dropout(x1_h)
        x2_h = self.relu(self.gc3(x, disadj))
        x2_h = self.dropout(x2_h)
        x3_h = self.relu(self.gc5(x, scoadj))
        x3_h = self.dropout(x3_h)
        x1 = self.relu(self.gc2(x1_h, adj))
        x1 = self.dropout(x1)
        x2 = self.relu(self.gc4(x2_h, disadj))
        x2 = self.dropout(x2)
        x3 = self.relu(self.gc6(x3_h, scoadj))
        x3 = self.dropout(x3)
        x = torch.cat((x1, x2, x3), 2)
        x = self.classifier(x)
        return x, logits

    def sadj(self, logits, seq_len):
        lens = logits.shape[1]
        soft = nn.Softmax(1)
        logits2 = self.sigmoid(logits).repeat(1, 1, lens)
        tmp = logits2.permute(0, 2, 1)
        adj = 1. - torch.abs(logits2 - tmp)
        self.sig = lambda x:1/(1+torch.exp(-((x-0.5))/0.1))
        adj = self.sig(adj)
        output = torch.zeros_like(adj)
        if seq_len is None:
            for i in range(logits.shape[0]):
                tmp = adj[i]
                adj2 = soft(tmp)
                output[i] = adj2
        else:
            for i in range(len(seq_len)):
                tmp = adj[i, :seq_len[i], :seq_len[i]]
                adj2 = soft(tmp)
                output[i, :seq_len[i], :seq_len[i]] = adj2
        return output


    def adj(self, x, seq_len):
        soft = nn.Softmax(1)
        x2 = x.matmul(x.permute(0,2,1)) # B*T*T
        x_norm = torch.norm(x, p=2, dim=2, keepdim=True)  # B*T*1
        x_norm_x = x_norm.matmul(x_norm.permute(0,2,1))
        x2 = x2/(x_norm_x+1e-20)
        output = torch.zeros_like(x2)
        if seq_len is None:
            for i in range(x.shape[0]):
                tmp = x2[i]
                adj2 = tmp
                adj2 = F.threshold(adj2, 0.7, 0)
                adj2 = soft(adj2)
                output[i] = adj2
        else:
            for i in range(len(seq_len)):
                tmp = x2[i, :seq_len[i], :seq_len[i]]
                adj2 = tmp
                adj2 = F.threshold(adj2, 0.7, 0)
                adj2 = soft(adj2)
                output[i, :seq_len[i], :seq_len[i]] = adj2

        return output



In [None]:
import os
print("Current directory:", os.getcwd())

Current directory: /mydrive/MyDrive


## Args

Here are the default args that were obtained via cmd line arg parser. I just created a class 'Args' that holds the default config for the model.

I think the most important args:

*`Modality`*: Determines whether we want to use either audio alone, video alone, both audio and video, audio, video, and flow, etc. for training

*`List`*: point to the list containing filenames for all training and testing data.

*`workers`*: I believe this is the number of individual threads/processes running during training or testing. In ther model it was set to 4 by defualt but that spit out an error so it lowered it to 1. Prob a sign that we need to do heavy downsampling to compensate for lack of parallel processing.

In [11]:
class Args:
    def __init__(self):
        self.modality = 'MIX2'
        # Original paths
        self.rgb_list = '/content/final_dl/list/rgb.list'
        self.flow_list = '/content/final_dl/list/flow.list'
        self.audio_list = '/content/final_dl/list/audio.list'

        # Train paths
        self.train_rgb_list = '/content/final_dl/list/rgb_train.list'
        self.train_flow_list = '/content/final_dl/list/flow_train.list'
        self.train_audio_list = '/content/final_dl/list/audio_train.list'

        # Val paths
        self.val_rgb_list = '/content/final_dl/list/rgb_val.list'
        self.val_flow_list = '/content/final_dl/list/flow_val.list'
        self.val_audio_list = '/content/final_dl/list/audio_val.list'

        # Test paths
        self.test_rgb_list = '/content/final_dl/list/rgb_test.list'
        self.test_flow_list = '/content/final_dl/list/flow_test.list'
        self.test_audio_list = '/content/final_dl/list/audio_test.list'

        self.gt = '/content/final_dl/list/gt.npy'
        self.gpus = 1
        self.lr = 0.0001
        self.batch_size = 128
        self.workers = 1  # Reduced from 4 to avoid memory issues
        self.model_name = 'wsanodet'
        self.pretrained_ckpt = None
        self.feature_size = 1152  # 1024 + 128
        self.num_classes = 1
        self.dataset_name = 'XD-Violence'
        self.max_seqlen = 200
        self.max_epoch = 50

args = Args()

## Val Split For MultiModal Data

In [12]:
import os
import glob
import random
from pathlib import Path
import numpy as np

def get_video_id(filepath):
    """Extract video ID from filepath based on common prefix before _label
    e.g., "/path/to/video123_label_A.npy" -> "video123"
    """
    filename = os.path.basename(filepath)
    if '_label' in filename:
        return filename.split('_label')[0]
    return filename.split('.')[0]

def find_matching_files():
    """
    Find and align RGB and audio feature files.
    Returns dict mapping video IDs to their RGB and audio paths
    """
    rgb_path = "/content/final_dl/dl_files/i3d-features/RGB"
    audio_path = "/content/final_dl/list/xx/train"

    # Get all files
    rgb_files = glob.glob(os.path.join(rgb_path, "*.npy"))
    audio_files = glob.glob(os.path.join(audio_path, "*.npy"))

    # Create mappings that preserve the 5:1 ratio
    rgb_map = {}
    for f in rgb_files:
        vid_id = get_video_id(f)
        if vid_id not in rgb_map:
            rgb_map[vid_id] = []
        rgb_map[vid_id].append(f)

    audio_map = {get_video_id(f): f for f in audio_files}

    # Find common video IDs
    common_ids = set(rgb_map.keys()) & set(audio_map.keys())

    # Create aligned mapping
    aligned_files = {
        vid_id: {
            'rgb': sorted(rgb_map[vid_id]),  # Sort to maintain consistent ordering
            'audio': audio_map[vid_id],
            'is_normal': '_label_A' in rgb_map[vid_id][0]  # Check first RGB file for label
        }
        for vid_id in common_ids
    }

    print(f"Found {len(aligned_files)} aligned RGB-Audio pairs")
    return aligned_files

def create_splits(aligned_files, train_ratio=0.8, seed=42):
    """Split the video IDs first, then we'll expand to files in write_list_files"""
    random.seed(seed)
    video_ids = list(aligned_files.keys())
    train_size = int(len(video_ids) * train_ratio)
    train_ids = random.sample(video_ids, train_size)
    val_ids = [vid for vid in video_ids if vid not in train_ids]

    return {
        'train': train_ids,
        'val': val_ids
    }

def write_list_files(split_data, aligned_files, output_dir="/content/final_dl/list"):
    """Write list files with audio files repeated to match RGB structure"""
    os.makedirs(output_dir, exist_ok=True)

    for split_name, video_ids in split_data.items():
        # RGB list - one entry per frame
        rgb_path = os.path.join(output_dir, f'rgb_{split_name}.list')
        with open(rgb_path, 'w') as f:
            for vid_id in video_ids:
                for rgb_file in aligned_files[vid_id]['rgb']:
                    f.write(f"{rgb_file}\n")

        # Audio list - one entry per video (not per frame)
        audio_path = os.path.join(output_dir, f'audio_{split_name}.list')
        with open(audio_path, 'w') as f:
            for vid_id in video_ids:
                audio_file = aligned_files[vid_id]['audio']
                f.write(f"{audio_file}\n")  # Write once only


## Create Dataloaders for multimodal

In [13]:
aligned_files = find_matching_files()

    # Create train/val splits
split_data = create_splits(aligned_files)

    # Write list files
write_list_files(split_data, aligned_files)


from torch.utils.data import DataLoader
def create_data_loaders(args):
    """
    Create train, validation and test data loaders
    """

    print(args)
    print("Creating data loaders...")

    # Create train loader
    train_dataset = Dataset(args, mode='train')

    print("audio size")
    print(len(train_dataset.audio_list))

    print("rgb size")
    print(len(train_dataset.list))
    train_loader = DataLoader(
        train_dataset,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=True,
        drop_last=True
    )
    print(f"Train loader created with {len(train_dataset)} samples")

    # Create validation loader
    val_dataset = Dataset(args, mode='val')
    val_loader = DataLoader(
        val_dataset,
        batch_size=args.batch_size,
        shuffle=False,  # No need to shuffle validation data
        num_workers=args.workers,
        pin_memory=True,
        drop_last=True
    )
    print(f"Validation loader created with {len(val_dataset)} samples")

    # Create test loader with smaller batch size as per original code
    test_dataset = Dataset(args, mode='test')
    test_loader = DataLoader(
        test_dataset,
        batch_size=5,  # Using smaller batch size for testing
        shuffle=False,
        num_workers=args.workers,
        pin_memory=True,
        drop_last=True
    )
    print(f"Test loader created with {len(test_dataset)} samples")

    return train_loader, val_loader, test_loader

train_loader, val_loader, test_loader = create_data_loaders(args)

Found 3953 aligned RGB-Audio pairs
<__main__.Args object at 0x7bc788f1b880>
Creating data loaders...
audio size
3162
rgb size
15815
Train loader created with 15815 samples
Validation loader created with 3955 samples
Test loader created with 4000 samples


## VAL SPLIT FOR SINGLE MODALITY

In [14]:
def create_single_modality_data_loaders(args, modality='AUDIO'):
    """
    Create train, validation and test data loaders for a single modality
    """
    print(f"Creating {modality} data loaders...")

    # Create new args with only needed attributes
    args_new = Args()
    args_new.modality = modality

    # List files needed for train/val/test splits
    if modality == 'AUDIO':
        args_new.train_audio_list = args.train_audio_list
        args_new.val_audio_list = args.val_audio_list
        args_new.test_audio_list = args.test_audio_list
    elif modality == 'RGB':
        args_new.train_rgb_list = args.train_rgb_list
        args_new.val_rgb_list = args.val_rgb_list
        args_new.test_rgb_list = args.test_rgb_list
    elif modality == 'FLOW':
        args_new.train_flow_list = args.train_flow_list
        args_new.val_flow_list = args.val_flow_list
        args_new.test_flow_list = args.test_flow_list

    # Create data loaders
    train_dataset = Dataset(args_new, mode='train')
    train_loader = DataLoader(
        train_dataset,
        batch_size=args_new.batch_size,
        shuffle=True,
        num_workers=args_new.workers,
        pin_memory=True,
        drop_last=True
    )
    print(f"Train loader created with {len(train_dataset)} samples")

    val_dataset = Dataset(args_new, mode='val')
    val_loader = DataLoader(
        val_dataset,
        batch_size=args_new.batch_size,
        shuffle=False,
        num_workers=args_new.workers,
        pin_memory=True,
        drop_last=True
    )
    print(f"Validation loader created with {len(val_dataset)} samples")

    test_dataset = Dataset(args_new, mode='test')
    test_loader = DataLoader(
        test_dataset,
        batch_size=1,
        shuffle=False,
        num_workers=args_new.workers,
        pin_memory=True,
        drop_last=True
    )
    print(f"Test loader created with {len(test_dataset)} samples")

    return train_loader, val_loader, test_loader

## Create data loader for specified modality

In [53]:
# For audio only
#train_loader, val_loader, test_loader = create_single_modality_data_loaders(args, modality='AUDIO')

# For RGB only
#train_loader, val_loader, test_loader = create_single_modality_data_loaders(args, modality='RGB')

# For flow only
## CURRENTLY, FLOW IS NOT SUPPORTED, BUT IMPLEMENTING IT WOULD NOT BE THAT CHALLENGING. YOU WOULD SIMPLY HAVE TO
## ADJUST THE CODE A FEW CELLS ABOVE SO TO WRITE AN EQUIVALENT OF "find_matching_files" FOR FLOW DATA
#train_loader, val_loader, test_loader = create_single_modality_data_loaders(args, modality='FLOW')

In [15]:
# Testing the val splitter, no need to run this

def inspect_batch_files(args, batch_size=1, num_batches=5):
    """
    Inspect the first few batches to see which files are being loaded and their dimensions
    """
    #from torch.utils.data import DataLoader
    #from dataset import Dataset  # Your dataset class

    dataset = Dataset(args, mode='train')

    print("Inspecting individual samples:")
    for i in range(min(25, len(dataset))):
        try:
            # Get the filepaths that would be loaded
            rgb_path = dataset.list[i].strip('\n')
            audio_path = dataset.audio_list[i//5].strip('\n')

            # Load the features
            features1 = np.array(np.load(rgb_path), dtype=np.float32)
            features2 = np.array(np.load(audio_path), dtype=np.float32)

            print(f"\nSample {i}:")
            print(f"RGB file: {os.path.basename(rgb_path)}")
            print(f"RGB shape: {features1.shape}")
            print(f"Audio file: {os.path.basename(audio_path)}")
            print(f"Audio shape: {features2.shape}")

            # Try the concatenation
            try:
                if features1.shape[0] != features2.shape[0]:
                    print("⚠️ Dimension mismatch!")
                    if features1.shape[0] - 1 == features2.shape[0]:
                        print("Would work with [:-1] slice")
                    features = np.concatenate((features1[:-1], features2), axis=1)
                    print("Concatenation successful after adjustment")
            except ValueError as e:
                print(f"❌ Concatenation failed: {str(e)}")

        except Exception as e:
            print(f"Error loading sample {i}: {str(e)}")

        print("-" * 50)

# Run the inspection
args.modality = "MIX2"
inspect_batch_files(args)

Inspecting individual samples:

Sample 0:
RGB file: Still.Walking.2008__#01-30-25_01-33-56_label_A__0.npy
RGB shape: (316, 1024)
Audio file: Still.Walking.2008__#01-30-25_01-33-56_label_A__vggish.npy
Audio shape: (316, 128)
--------------------------------------------------

Sample 1:
RGB file: Still.Walking.2008__#01-30-25_01-33-56_label_A__1.npy
RGB shape: (316, 1024)
Audio file: Still.Walking.2008__#01-30-25_01-33-56_label_A__vggish.npy
Audio shape: (316, 128)
--------------------------------------------------

Sample 2:
RGB file: Still.Walking.2008__#01-30-25_01-33-56_label_A__2.npy
RGB shape: (316, 1024)
Audio file: Still.Walking.2008__#01-30-25_01-33-56_label_A__vggish.npy
Audio shape: (316, 128)
--------------------------------------------------

Sample 3:
RGB file: Still.Walking.2008__#01-30-25_01-33-56_label_A__3.npy
RGB shape: (316, 1024)
Audio file: Still.Walking.2008__#01-30-25_01-33-56_label_A__vggish.npy
Audio shape: (316, 128)
--------------------------------------------

## Testing PreTrained Model

In [16]:
from torch.utils.data import DataLoader
import torch
import numpy as np
# from model import Model
# from dataset import Dataset
# from test import test
# import option
import time

if __name__ == '__main__':

  device = torch.device("cuda")

  test_loader = DataLoader(Dataset(args, mode='test'),
                            batch_size=5, shuffle=False,
                            num_workers=args.workers, pin_memory=True)
  model = Model(args)
  model = model.to(device)
  # had to change path to "/content/final_dl/wsanodet_mix2.pkl"
  model_dict = model.load_state_dict(
      {k.replace('module.', ''): v for k, v in torch.load('/content/final_dl/wsanodet_mix2.pkl').items()})

  gt = np.load(args.gt)
  st = time.time()
  pr_auc, pr_auc_online = test(test_loader, model, device, gt)
  print('Time:{}'.format(time.time()-st))
  print('offline pr_auc:{0:.4}; online pr_auc:{1:.4}\n'.format(pr_auc, pr_auc_online))


  {k.replace('module.', ''): v for k, v in torch.load('/content/final_dl/wsanodet_mix2.pkl').items()})


FileNotFoundError: Caught FileNotFoundError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/worker.py", line 351, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/fetch.py", line 52, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "<ipython-input-8-9ae98fe16be2>", line 83, in __getitem__
    features1 = np.array(np.load(self.list[index].strip('\n')), dtype=np.float32)
  File "/usr/local/lib/python3.10/dist-packages/numpy/lib/npyio.py", line 427, in load
    fid = stack.enter_context(open(os_fspath(file), "rb"))
FileNotFoundError: [Errno 2] No such file or directory: '/media/peng/Samsung_T5/i3d-features/RGBTest/Bad.Boys.1995__#01-11-55_01-12-40_label_G-B2-B6__0.npy'


how to save a model for the future.

In [None]:
# torch.save(model.state_dict(), "/content/test.pkl")

# Training HLNET


In [17]:
import torch


def CLAS(logits, label, seq_len, criterion, device, is_topk=True):
    logits = logits.squeeze()
    instance_logits = torch.zeros(0).to(device)  # tensor([])
    for i in range(logits.shape[0]):
        if is_topk:
            tmp, _ = torch.topk(logits[i][:seq_len[i]], k=int(seq_len[i]//16+1), largest=True)
            tmp = torch.mean(tmp).view(1)
        else:
            tmp = torch.mean(logits[i, :seq_len[i]]).view(1)
        instance_logits = torch.cat((instance_logits, tmp))

    instance_logits = torch.sigmoid(instance_logits)

    clsloss = criterion(instance_logits, label)
    return clsloss


def CENTROPY(logits, logits2, seq_len, device):
    instance_logits = torch.tensor(0).to(device)  # tensor([])
    for i in range(logits.shape[0]):
        tmp1 = torch.sigmoid(logits[i, :seq_len[i]]).squeeze()
        tmp2 = torch.sigmoid(logits2[i, :seq_len[i]]).squeeze()
        loss = torch.mean(-tmp1.detach() * torch.log(tmp2))
        instance_logits = instance_logits + loss
    instance_logits = instance_logits/logits.shape[0]
    return instance_logits


def train(dataloader, model, optimizer, criterion, device, is_topk):
    with torch.set_grad_enabled(True):
        model.train()
        for i, (input, label) in enumerate(dataloader):
            seq_len = torch.sum(torch.max(torch.abs(input), dim=2)[0]>0, 1)
            input = input[:, :torch.max(seq_len), :]
            input, label = input.float().to(device), label.float().to(device)
            logits, logits2 = model(input, seq_len)
            clsloss = CLAS(logits, label, seq_len, criterion, device, is_topk)
            clsloss2 = CLAS(logits2, label, seq_len, criterion, device, is_topk)
            croloss = CENTROPY(logits, logits2, seq_len, device)

            total_loss = clsloss + clsloss2 + 5*croloss
            optimizer.zero_grad()
            total_loss.backward()
            optimizer.step()

In [18]:
# Define the input .list file containing the original file paths
input_list_file = "/content/final_dl/list/audio.list"

# Define the directory to update the paths to
new_directory = "/content/final_dl/list/xx/train"

# Define the output .list file for the updated file paths
output_list_file = "/content/final_dl/list/audio.list"

# Read the original file paths from the .list file
with open(input_list_file, "r") as file:
    original_paths = file.readlines()

# Process and update each file path
updated_paths = []
for path in original_paths:
    path = path.strip()  # Remove any leading/trailing whitespace or newlines
    if path:  # Ensure the path is not empty
        # Extract the filename from the original path and create a new path
        filename = path.split("/")[-1]
        updated_path = f"{new_directory}/{filename}"
        updated_paths.append(updated_path)

# Write the updated paths to the output .list file
with open(output_list_file, "w") as file:
    file.write("\n".join(updated_paths))

print(f"Updated paths have been written to {output_list_file}")

Updated paths have been written to /content/final_dl/list/audio.list


In [19]:
# Define the input .list file containing the original file paths
input_list_file = "/content/final_dl/list/rgb.list"

# Define the directory to update the paths to
new_directory = "/content/final_dl/dl_files/i3d-features/RGB"

# Define the output .list file for the updated file paths
output_list_file = "/content/final_dl/list/rgb.list"

# Read the original file paths from the .list file
with open(input_list_file, "r") as file:
    original_paths = file.readlines()

# Process and update each file path
updated_paths = []
for path in original_paths:
    path = path.strip()  # Remove any leading/trailing whitespace or newlines
    if path:  # Ensure the path is not empty
        # Extract the filename from the original path and create a new path
        filename = path.split("/")[-1]
        updated_path = f"{new_directory}/{filename}"
        updated_paths.append(updated_path)

# Write the updated paths to the output .list file
with open(output_list_file, "w") as file:
    file.write("\n".join(updated_paths))

print(f"Updated paths have been written to {output_list_file}")

Updated paths have been written to /content/final_dl/list/rgb.list


## test (ignore)

In [20]:
class Args:
  def __init__(self):
      self.modality = 'MIX2'
      self.rgb_list = '/content/final_dl/list/rgb.list'
      self.flow_list = '/content/final_dl/list/flow.list'
      self.audio_list = '/content/final_dl/list/audio.list'
      self.test_rgb_list = '/content/final_dl/list/rgb_test.list'
      self.test_flow_list = '/content/final_dl/list/flow_test.list'
      self.test_audio_list = '/content/final_dl/list/audio_test.list'
      self.gt = '/content/final_dl/list/gt.npy'
      self.gpus = 1
      self.lr = 0.0001
      self.batch_size = 128
      self.workers = 1
      self.model_name = 'wsanodet'
      self.pretrained_ckpt = None
      self.feature_size = 1152  # 1024 + 128
      self.num_classes = 1
      self.dataset_name = 'XD-Violence'
      self.max_seqlen = 200
      self.max_epoch = 50

  # Create an instance of the Args class
args = Args()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Model(args)
model = model.cuda()

test_loader = DataLoader(Dataset(args, mode='test'),
                          batch_size=5, shuffle=True,
                          num_workers=args.workers, pin_memory=True)

with torch.no_grad():
  for i, (input,label) in enumerate(test_loader):
    input = input.to(device)

    print(input.shape)
    ############
    ### NOTE: ## setting seq_len to None pads training data in the sequence dim to 200
    ############
    logits, logits2 = model(inputs=input, seq_len=None)
    # print(logits, logits2)
    if i == 2:
      break

FileNotFoundError: Caught FileNotFoundError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/worker.py", line 351, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/fetch.py", line 52, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "<ipython-input-8-9ae98fe16be2>", line 83, in __getitem__
    features1 = np.array(np.load(self.list[index].strip('\n')), dtype=np.float32)
  File "/usr/local/lib/python3.10/dist-packages/numpy/lib/npyio.py", line 427, in load
    fid = stack.enter_context(open(os_fspath(file), "rb"))
FileNotFoundError: [Errno 2] No such file or directory: '/media/peng/Samsung_T5/i3d-features/RGBTest/Before.Sunrise.1995__#00-04-20_00-05-35_label_A__3.npy'


## Training HL NET

In [21]:
from torch.utils.data import DataLoader
import torch.optim as optim
import torch
import time
import numpy as np
import random
import os
# import option


def setup_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True


# torch.multiprocessing.set_start_method('spawn')
# setup_seed(2333)
# args = option.parser.parse_args()

!export TORCH_USE_CUDA_DSA=ON
device = torch.device("cuda")
train_loader = DataLoader(Dataset(args, mode='train'),
                          batch_size=args.batch_size, shuffle=True,
                          num_workers=args.workers, pin_memory=True)
test_loader = DataLoader(Dataset(args, mode='test'),
                          batch_size=5, shuffle=False,
                          num_workers=args.workers, pin_memory=True)


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Model(args)
model = model.cuda()

for name, value in model.named_parameters():
    print(name)
approximator_param = list(map(id, model.approximator.parameters()))
approximator_param += list(map(id, model.conv1d_approximator.parameters()))
base_param = filter(lambda p: id(p) not in approximator_param, model.parameters())

if not os.path.exists('./ckpt'):
    os.makedirs('./ckpt')
optimizer = optim.Adam([{'params': base_param},
                        {'params': model.approximator.parameters(), 'lr': args.lr / 2},
                        {'params': model.conv1d_approximator.parameters(), 'lr': args.lr / 2},
                        ],
                        lr=args.lr, weight_decay=0.000)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10], gamma=0.1)
criterion = torch.nn.BCELoss()

is_topk = True
gt = np.load(args.gt)
pr_auc, pr_auc_online = test(test_loader, model, device, gt)
print('Random initalization: offline pr_auc:{0:.4}; online pr_auc:{1:.4}\n'.format(pr_auc, pr_auc_online))
for epoch in range(args.max_epoch):
    scheduler.step()
    st = time.time()
    train(train_loader, model, optimizer, criterion, device, is_topk)
    if epoch % 2 == 0 and not epoch == 0:
        torch.save(model.state_dict(), './ckpt/'+args.model_name+'{}.pkl'.format(epoch))

    pr_auc, pr_auc_online = test(test_loader, model, device, gt)
    print('Epoch {0}/{1}: offline pr_auc:{2:.4}; online pr_auc:{3:.4}\n'.format(epoch, args.max_epoch, pr_auc, pr_auc_online))
torch.save(model.state_dict(), './ckpt/' + args.model_name + '.pkl')

AttributeError: 'Args' object has no attribute 'train_rgb_list'

# Training VAE



## VAE MODEL

added batch normalization (gus)

In [22]:
import torch
from torch import nn

class Sampling(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, z_means, z_log_vars):
        epsilon = torch.randn_like(z_means, dtype=torch.float32)
        return z_means + torch.exp(0.5 * z_log_vars) * epsilon

class Encoder(nn.Module):
    def __init__(self, latent_dim, input_dim=1152, seq_len=200):
        super().__init__()
        self.latent_dim = latent_dim

        # Reduced number of feature maps in encoder
        self.encoder = nn.Sequential(
            nn.Conv1d(input_dim, 256, kernel_size=3, stride=2, padding=1),  # Reduced from 576
            nn.BatchNorm1d(256),
            nn.ReLU(True),
            nn.Conv1d(256, 128, kernel_size=3, stride=2, padding=1),  # Reduced from 288
            nn.BatchNorm1d(128),
            nn.ReLU(True),
            nn.Conv1d(128, 64, kernel_size=3, stride=2, padding=1),  # Reduced from 144
            nn.BatchNorm1d(64),
            nn.ReLU(True),
            nn.Flatten()
        )

        flattened_dim = 64 * 25  # Updated based on reduced features

        self.lin_mean = nn.Sequential(
            nn.Linear(flattened_dim, latent_dim),
            nn.BatchNorm1d(latent_dim)
        )

        self.lin_log_var = nn.Sequential(
            nn.Linear(flattened_dim, latent_dim),
            nn.BatchNorm1d(latent_dim)
        )

        self.sampling = Sampling()

    def forward(self, x):
        x = x.permute(0, 2, 1)
        x = self.encoder(x)
        z_means = self.lin_mean(x)
        z_log_vars = self.lin_log_var(x)
        z = self.sampling(z_means, z_log_vars)
        return z, z_means, z_log_vars

class Decoder(nn.Module):
    def __init__(self, latent_dim, input_dim=1152, seq_len=200):
        super().__init__()
        self.seq_len = seq_len
        flattened_dim = 64 * 25  # Updated based on reduced features

        self.decoder_fc = nn.Sequential(
            nn.Linear(latent_dim, flattened_dim),
            nn.BatchNorm1d(flattened_dim),
            nn.ReLU(True)
        )

        # Reduced number of feature maps in decoder
        self.decoder_conv = nn.Sequential(
            nn.ConvTranspose1d(64, 128, kernel_size=3, stride=2, padding=1, output_padding=1),  # Reduced from 144->288
            nn.BatchNorm1d(128),
            nn.ReLU(True),
            nn.ConvTranspose1d(128, 256, kernel_size=3, stride=2, padding=1, output_padding=1),  # Reduced from 288->576
            nn.BatchNorm1d(256),
            nn.ReLU(True),
            nn.ConvTranspose1d(256, input_dim, kernel_size=3, stride=2, padding=1, output_padding=1),  # Reduced from 576->input_dim
            nn.BatchNorm1d(input_dim),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.decoder_fc(x)
        x = x.view(-1, 64, 25)  # Updated based on reduced features
        x = self.decoder_conv(x)
        x = x.permute(0, 2, 1)
        return x

class VAE(nn.Module):
    def __init__(self, latent_dim, input_dim=1152, seq_len=200):
        super().__init__()
        self.encoder = Encoder(latent_dim, input_dim, seq_len)
        self.decoder = Decoder(latent_dim, input_dim, seq_len)

    def forward(self, x):
        z, z_means, z_log_vars = self.encoder(x)
        x_reconstructed = self.decoder(z)
        return x_reconstructed, z_means, z_log_vars

In [23]:
import torch.utils.data as data
import numpy as np
import os
import glob
import random
from pathlib import Path

class NormalDataset(data.Dataset):
    def __init__(self, args, transform=None, mode='train'):
        self.modality = args.modality
        self.normal_flag = '_label_A'
        self.max_seqlen = args.max_seqlen
        self.transform = transform
        self.test_mode = (mode == 'test')

        # Set appropriate file lists based on mode
        if mode == 'test':
            self.rgb_list_file = args.test_rgb_list
            self.flow_list_file = args.test_flow_list
            self.audio_list_file = args.test_audio_list
        elif mode == 'val':
            self.rgb_list_file = args.val_rgb_list
            self.flow_list_file = args.val_flow_list
            self.audio_list_file = args.val_audio_list
        else:  # train
            self.rgb_list_file = args.train_rgb_list
            self.flow_list_file = args.train_flow_list
            self.audio_list_file = args.train_audio_list

        self._parse_list()

    def _parse_list(self):
        """Parse file lists and filter for normal samples only"""
        def filter_normal_samples(file_list):
            return [f for f in file_list if self.normal_flag in f]

        if self.modality == 'AUDIO':
            self.list = filter_normal_samples(list(open(self.audio_list_file)))
        elif self.modality == 'RGB':
            self.list = filter_normal_samples(list(open(self.rgb_list_file)))
        elif self.modality == 'FLOW':
            self.list = filter_normal_samples(list(open(self.flow_list_file)))
        elif self.modality == 'MIX2':
            # For MIX2, we need to handle the 5:1 ratio between RGB and audio
            self.list = filter_normal_samples(list(open(self.rgb_list_file)))
            # Filter audio list and ensure alignment
            all_audio = list(open(self.audio_list_file))
            self.audio_list = [f for f in all_audio if self.normal_flag in f]

            # Ensure RGB and audio lists are aligned (5:1 ratio)
            rgb_video_ids = set([self._get_video_id(f) for f in self.list])
            audio_video_ids = set([self._get_video_id(f) for f in self.audio_list])
            common_ids = rgb_video_ids & audio_video_ids

            # Filter lists to only include common videos
            self.list = [f for f in self.list if self._get_video_id(f) in common_ids]
            self.audio_list = [f for f in self.audio_list if self._get_video_id(f) in common_ids]

    def _get_video_id(self, filepath):
        """Extract video ID from filepath"""
        filename = os.path.basename(filepath.strip('\n'))
        return filename.split('_label')[0]

    def __getitem__(self, index):
        if self.modality in ['RGB', 'FLOW', 'AUDIO']:
            features = np.array(np.load(self.list[index].strip('\n')), dtype=np.float32)
        elif self.modality == 'MIX2':
            # Load RGB features
            features1 = np.array(np.load(self.list[index].strip('\n')), dtype=np.float32)
            # Load corresponding audio features (accounting for 5:1 ratio)
            audio_index = index // 5
            features2 = np.array(np.load(self.audio_list[audio_index].strip('\n')), dtype=np.float32)

            # Handle potential dimension mismatch
            if features1.shape[0] > features2.shape[0]:
                features1 = features1[:features2.shape[0]]
            features = np.concatenate((features1, features2), axis=1)

        if self.transform is not None:
            features = self.transform(features)

        features = process_feat(features, self.max_seqlen, is_random=not self.test_mode)

        # Always return label 0 since these are normal samples
        return features, 0.0

    def __len__(self):
        return len(self.list)

def create_normal_data_loaders(args):
    """Create data loaders for normal samples only"""
    print("Creating normal-only data loaders...")

    # Create train loader
    train_dataset = NormalDataset(args, mode='train')
    train_loader = data.DataLoader(
        train_dataset,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=True
    )
    print(f"Normal train loader created with {len(train_dataset)} samples")

    # Create validation loader
    val_dataset = NormalDataset(args, mode='val')
    val_loader = data.DataLoader(
        val_dataset,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=True
    )
    print(f"Normal validation loader created with {len(val_dataset)} samples")

    # Create test loader
    test_dataset = NormalDataset(args, mode='test')
    test_loader = data.DataLoader(
        test_dataset,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=True
    )
    print(f"Normal test loader created with {len(test_dataset)} samples")

    return train_loader, val_loader, test_loader

def process_feat(feat, length, is_random=True):
    """Process features to have consistent length"""
    if len(feat) > length:
        if is_random:
            r = np.random.randint(len(feat) - length)
            return feat[r:r + length]
        else:
            r = np.linspace(0, len(feat) - 1, length, dtype=np.uint16)
            return feat[r, :]
    else:
        return np.pad(feat, ((0, length - len(feat)), (0, 0)), mode='constant', constant_values=0)


## VAE Training func (Gus)


In [24]:
import torch
from datetime import datetime
import os

class EarlyStopping:
    """Early stopping to prevent overfitting"""
    def __init__(self, patience=7, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0
        return self.early_stop

def validate_vae(vae, val_loader, device):
    """Run validation loop and return average loss"""
    vae.eval()
    total_loss = 0
    total_recon_loss = 0
    total_kl_loss = 0
    n_samples = 0

    with torch.no_grad():
        for data, labels in val_loader:
            # Only process normal samples (label == 0)
            normal_mask = (labels == 0.0)
            if not normal_mask.any():
                continue

            data = data[normal_mask].to(device)
            recon_data, mu, logvar = vae(data)

            # Reconstruction loss
            recon_criterion = torch.nn.MSELoss(reduction='sum')
            recon_loss = recon_criterion(recon_data, data)

            # KL divergence loss
            kl_loss = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())

            # Total loss
            loss = recon_loss + kl_loss

            total_loss += loss.item()
            total_recon_loss += recon_loss.item()
            total_kl_loss += kl_loss.item()
            n_samples += data.size(0)

    # Calculate averages
    if n_samples > 0:
        avg_loss = total_loss / n_samples
        avg_recon = total_recon_loss / n_samples
        avg_kl = total_kl_loss / n_samples
    else:
        avg_loss = float('inf')
        avg_recon = float('inf')
        avg_kl = float('inf')

    vae.train()
    return avg_loss, avg_recon, avg_kl

def train_vae(vae, train_loader, val_loader, args, save_dir='vae_checkpoints'):
    """Main training loop for VAE"""

    # Create directory for saving checkpoints
    os.makedirs(save_dir, exist_ok=True)

    # Setup
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    vae = vae.to(device)
    optimizer = torch.optim.Adam(vae.parameters(), lr=args.lr)
    early_stopping = EarlyStopping(patience=5)

    # Training loop
    best_val_loss = float('inf')
    for epoch in range(args.max_epoch):
        # Training
        vae.train()
        train_loss = 0
        train_recon = 0
        train_kl = 0
        n_samples = 0

        for batch_idx, (data, labels) in enumerate(train_loader):
            # Only process normal samples (label == 0)
            normal_mask = (labels == 0.0)
            if not normal_mask.any():
                continue

            data = data[normal_mask].to(device)
            optimizer.zero_grad()

            # Forward pass
            recon_data, mu, logvar = vae(data)

            # Losses
            recon_criterion = torch.nn.MSELoss(reduction='sum')
            recon_loss = recon_criterion(recon_data, data)
            kl_loss = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
            loss = recon_loss + kl_loss

            # Backward pass
            loss.backward()
            optimizer.step()

            # Record losses
            train_loss += loss.item()
            train_recon += recon_loss.item()
            train_kl += kl_loss.item()
            n_samples += data.size(0)

        # Calculate average training losses
        if n_samples > 0:
            avg_train_loss = train_loss / n_samples
            avg_train_recon = train_recon / n_samples
            avg_train_kl = train_kl / n_samples
        else:
            print("Warning: No normal samples in training batch")
            continue

        # Validation
        val_loss, val_recon, val_kl = validate_vae(vae, val_loader, device)

        # Print progress
        print(f'Epoch {epoch+1}/{args.max_epoch}:')
        print(f'Training - Loss: {avg_train_loss:.4f}, Recon: {avg_train_recon:.4f}, KL: {avg_train_kl:.4f}')
        print(f'Validation - Loss: {val_loss:.4f}, Recon: {val_recon:.4f}, KL: {val_kl:.4f}\n')

        # Save best model
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
            save_path = os.path.join(save_dir, f'vae_best_{timestamp}.pt')
            torch.save({
                'epoch': epoch,
                'model_state_dict': vae.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'train_loss': avg_train_loss,
                'val_loss': val_loss,
            }, save_path)
            print(f'Saved best model to {save_path}')

        # Early stopping
        if early_stopping(val_loss):
            print("Early stopping triggered")
            break

    return vae

In [27]:
# adjust args needed for VAE
args_vae = Args()
args_vae.feature_size = 1152  # 1024 (RGB) + 128 (audio)
args_vae.batch_size = 2
args_vae.modality = 'MIX2'

# initialize VAE with correct input dimension
vae = VAE(latent_dim=64, input_dim=args_vae.feature_size, seq_len=200)

# Create normal-only dataloaders
normal_train_loader, normal_val_loader, normal_test_loader = create_normal_data_loaders(args_vae)

# Train the VAE
trained_vae = train_vae(vae, normal_train_loader, normal_val_loader, args_vae)

Creating normal-only data loaders...
Normal train loader created with 8200 samples
Normal validation loader created with 2045 samples
Normal test loader created with 1500 samples


KeyboardInterrupt: 

In [28]:
torch.save(trained_vae.state_dict(), "/content/trained_vae.pkl")

## Debugging data loaders

In [28]:
def inspect_normal_dataset(args, num_samples=25):
    """
    Inspect the normal samples being loaded from the dataset
    """
    print("Creating NormalDataset...")
    dataset = NormalDataset(args, mode='train')
    print(f"Total number of samples in dataset: {len(dataset)}")
    print(f"\nNumber of files in main list: {len(dataset.list)}")
    if hasattr(dataset, 'audio_list'):
        print(f"Number of files in audio list: {len(dataset.audio_list)}")

    print("\nInspecting individual samples:")
    for i in range(min(num_samples, len(dataset))):
        try:
            # Get the filepaths that would be loaded
            rgb_path = dataset.list[i].strip('\n')
            print(f"\nSample {i}:")
            print(f"RGB file: {os.path.basename(rgb_path)}")

            # Load RGB features
            features1 = np.array(np.load(rgb_path), dtype=np.float32)
            print(f"RGB shape: {features1.shape}")

            # If MIX2 modality, also show audio information
            if dataset.modality == 'MIX2':
                audio_index = i // 5
                if audio_index < len(dataset.audio_list):
                    audio_path = dataset.audio_list[audio_index].strip('\n')
                    print(f"Audio file: {os.path.basename(audio_path)}")
                    features2 = np.array(np.load(audio_path), dtype=np.float32)
                    print(f"Audio shape: {features2.shape}")

                    # Try the concatenation
                    try:
                        if features1.shape[0] != features2.shape[0]:
                            print("⚠️ Dimension mismatch!")
                            print(f"RGB frames: {features1.shape[0]}, Audio frames: {features2.shape[0]}")
                            if features1.shape[0] - 1 == features2.shape[0]:
                                print("Would work with [:-1] slice")
                                features = np.concatenate((features1[:-1], features2), axis=1)
                                print(f"Concatenated shape after adjustment: {features.shape}")
                            else:
                                print("Cannot be fixed with simple slice")
                        else:
                            features = np.concatenate((features1, features2), axis=1)
                            print(f"Concatenated shape: {features.shape}")
                    except ValueError as e:
                        print(f"❌ Concatenation failed: {str(e)}")
                else:
                    print("⚠️ No corresponding audio file (index out of range)")

            # Check if this is actually a normal sample
            if '_label_A' not in rgb_path:
                print("⚠️ WARNING: This doesn't appear to be a normal sample!")

        except Exception as e:
            print(f"Error loading sample {i}: {str(e)}")

        print("-" * 50)

# Run the inspection
print("Inspecting dataset with MIX2 modality...")
args_check = args_vae
args_check.modality = 'MIX2'
args_check.batch_size = 64  # Smaller batch size for inspection
inspect_normal_dataset(args_check)

Inspecting dataset with MIX2 modality...
Creating NormalDataset...
Total number of samples in dataset: 8200

Number of files in main list: 8200
Number of files in audio list: 1640

Inspecting individual samples:

Sample 0:
RGB file: Still.Walking.2008__#01-30-25_01-33-56_label_A__0.npy
RGB shape: (316, 1024)
Audio file: Still.Walking.2008__#01-30-25_01-33-56_label_A__vggish.npy
Audio shape: (316, 128)
Concatenated shape: (316, 1152)
--------------------------------------------------

Sample 1:
RGB file: Still.Walking.2008__#01-30-25_01-33-56_label_A__1.npy
RGB shape: (316, 1024)
Audio file: Still.Walking.2008__#01-30-25_01-33-56_label_A__vggish.npy
Audio shape: (316, 128)
Concatenated shape: (316, 1152)
--------------------------------------------------

Sample 2:
RGB file: Still.Walking.2008__#01-30-25_01-33-56_label_A__2.npy
RGB shape: (316, 1024)
Audio file: Still.Walking.2008__#01-30-25_01-33-56_label_A__vggish.npy
Audio shape: (316, 128)
Concatenated shape: (316, 1152)
-----------

# HLNET (AUDIO)

In [None]:
impo

## Utils

In [29]:
def collate_fn_audio(batch):
    """
    Collate function for a batch of spectrograms.
    Each spectrogram is of shape [time_steps, num_mels].
    This function pads each spectrogram along the time dimension
    so that all have the same time_steps dimension.
    """
    # `batch` is a list of spectrograms: each is numpy array [T, num_mels]
    # Find the longest time dimension in the batch
    max_length = max(feat.shape[0] for feat in batch)

    # Pad each spectrogram to max_length
    padded_feats = []
    for feat in batch:
        t, m = feat.shape
        # Create a zero array [max_length, num_mels]
        padded = np.zeros((max_length, m), dtype=np.float32)
        padded[:t] = feat
        padded_feats.append(padded)

    # Convert the list of numpy arrays into a single tensor
    feats_batch = torch.tensor(padded_feats, dtype=torch.float32)

    return feats_batch


## Arg Definition

In [30]:
class Args:
    def __init__(self):
        self.modality = 'AUDIO'
        # Original paths
        self.rgb_list = '/content/final_dl/list/rgb.list'
        self.flow_list = '/content/final_dl/list/flow.list'
        self.audio_list = '/content/final_dl/list/audio.list'

        # Train paths
        self.train_rgb_list = '/content/final_dl/list/rgb_train.list'
        self.train_flow_list = '/content/final_dl/list/flow_train.list'
        self.train_audio_list = '/content/final_dl/list/audio_train.list'

        # Val paths
        self.val_rgb_list = '/content/final_dl/list/rgb_val.list'
        self.val_flow_list = '/content/final_dl/list/flow_val.list'
        self.val_audio_list = '/content/final_dl/list/audio_val.list'

        # Test paths
        self.test_rgb_list = '/content/final_dl/list/rgb_test.list'
        self.test_flow_list = '/content/final_dl/list/flow_test.list'
        self.test_audio_list = '/content/final_dl/list/audio_test.list'

        self.gt = '/content/final_dl/list/gt.npy'
        self.gpus = 1
        self.lr = 0.0001
        self.batch_size = 128
        self.workers = 1  # Reduced from 4 to avoid memory issues
        self.model_name = 'wsanodet'
        self.pretrained_ckpt = None
        #self.feature_size = 1152  # 1024 + 128
        self.feature_size = 128  # 1024 + 128
        self.num_classes = 1
        self.dataset_name = 'XD-Violence'
        self.max_seqlen = 200
        self.max_epoch = 50

args = Args()

class Dataset(data.Dataset):
    def __init__(self, args, transform=None, mode='train'):
        self.modality = args.modality
        """
        Args:
            args: Arguments containing dataset paths and configuration
            transform: Optional transforms to apply
            mode: One of ['train', 'val', 'test'] to specify the dataset split
        """

        if mode == 'test':
            self.rgb_list_file = args.test_rgb_list
            self.flow_list_file = args.test_flow_list
            self.audio_list_file = args.test_audio_list
        elif mode == 'val':
            self.rgb_list_file = args.val_rgb_list
            self.flow_list_file = args.val_flow_list
            self.audio_list_file = args.val_audio_list
        else: # train
            self.rgb_list_file = args.train_rgb_list
            self.flow_list_file = args.train_flow_list
            self.audio_list_file = args.train_audio_list

        self.max_seqlen = args.max_seqlen
        self.tranform = transform
        self.test_mode = (mode == 'test')
        self.normal_flag = '_label_A'
        self._parse_list()

    def _parse_list(self):
        if self.modality == 'AUDIO':
            self.list = list(open(self.audio_list_file))
        elif self.modality == 'RGB':
            self.list = list(open(self.rgb_list_file))
            print("here")
            # print(self.list)
        elif self.modality == 'FLOW':
            self.list = list(open(self.flow_list_file))
        elif self.modality == 'MIX':
            self.list = list(open(self.rgb_list_file))
            self.flow_list = list(open(self.flow_list_file))
        elif self.modality == 'MIX2':
            self.list = list(open(self.rgb_list_file))
            self.audio_list = list(open(self.audio_list_file))
        elif self.modality == 'MIX3':
            self.list = list(open(self.flow_list_file))
            self.audio_list = list(open(self.audio_list_file))
        elif self.modality == 'MIX_ALL':
            self.list = list(open(self.rgb_list_file))
            self.flow_list = list(open(self.flow_list_file))
            self.audio_list = list(open(self.audio_list_file))
        else:
            assert 1 > 2, 'Modality is wrong!'

    def __getitem__(self, index):
        if self.normal_flag in self.list[index]:
            label = 0.0
        else:
            label = 1.0

        if self.modality == 'AUDIO':
            features = np.array(np.load(self.list[index].strip('\n')), dtype=np.float32)
        elif self.modality == 'RGB':
            features = np.array(np.load(self.list[index].strip('\n')),dtype=np.float32)
        elif self.modality == 'FLOW':
            features = np.array(np.load(self.list[index].strip('\n')), dtype=np.float32)
        elif self.modality == 'MIX':
            features1 = np.array(np.load(self.list[index].strip('\n')), dtype=np.float32)
            features2 = np.array(np.load(self.flow_list[index].strip('\n')), dtype=np.float32)
            if features1.shape[0] == features2.shape[0]:
                features = np.concatenate((features1, features2),axis=1)
            else:# because the frames of flow is one less than that of rgb
                features = np.concatenate((features1[:-1], features2), axis=1)
        elif self.modality == 'MIX2':
            features1 = np.array(np.load(self.list[index].strip('\n')), dtype=np.float32)
            features2 = np.array(np.load(self.audio_list[index//5].strip('\n')), dtype=np.float32)
        elif self.modality == 'MIX3':
            features1 = np.array(np.load(self.list[index].strip('\n')), dtype=np.float32)
            features2 = np.array(np.load(self.audio_list[index//5].strip('\n')), dtype=np.float32)
            if features1.shape[0] == features2.shape[0]:
                features = np.concatenate((features1, features2),axis=1)
            else:# because the frames of flow is one less than that of rgb
                features = np.concatenate((features1[:-1], features2), axis=1)
        elif self.modality == 'MIX_ALL':
            features1 = np.array(np.load(self.list[index].strip('\n')), dtype=np.float32)
            features2 = np.array(np.load(self.flow_list[index].strip('\n')), dtype=np.float32)
            features3 = np.array(np.load(self.audio_list[index//5].strip('\n')), dtype=np.float32)
            if features1.shape[0] == features2.shape[0]:
                features = np.concatenate((features1, features2, features3),axis=1)
            else:# because the frames of flow is one less than that of rgb
                features = np.concatenate((features1[:-1], features2, features3[:-1]), axis=1)
        else:
            print("WHAT IS WRONG")
            assert 1>2, 'Modality is wrong!'
        if self.tranform is not None:
            features = self.tranform(features)
        if self.test_mode:
            return features, label

        else:
            features = process_feat(features, self.max_seqlen, is_random=False)
            return features, label

    def __len__(self):
        return len(self.list)

## Data splits

In [31]:
import random


def train():
    # Define the input .list file containing the original file paths
    input_list_file = "/content/final_dl/list/audio.list"

    # Define the directory to update the paths to
    new_directory = "/content/final_dl/list/xx/train"

    # Define the output .list file for the updated file paths
    output_list_file = "/content/final_dl/list/audio.list"

    # Read the original file paths from the .list file
    with open(input_list_file, "r") as file:
        original_paths = file.readlines()

    # Process and update each file path
    updated_paths = []
    for path in original_paths:
        path = path.strip()  # Remove any leading/trailing whitespace or newlines
        if path:  # Ensure the path is not empty
            # Extract the filename from the original path and create a new path
            filename = path.split("/")[-1]
            updated_path = f"{new_directory}/{filename}"
            updated_paths.append(updated_path)

    # Write the updated paths to the output .list file
    with open(output_list_file, "w") as file:
        file.write("\n".join(updated_paths))

    print(f"Updated paths have been written to {output_list_file}")

    # Define the input .list file containing the original file paths
    input_list_file = "/content/final_dl/list/rgb.list"

    # Define the directory to update the paths to
    new_directory = "/content/final_dl/dl_files/i3d-features/RGB"

    # Define the output .list file for the updated file paths
    output_list_file = "/content/final_dl/list/rgb.list"

    # Read the original file paths from the .list file
    with open(input_list_file, "r") as file:
        original_paths = file.readlines()

    # Process and update each file path
    updated_paths = []
    for path in original_paths:
        path = path.strip()  # Remove any leading/trailing whitespace or newlines
        if path:  # Ensure the path is not empty
            # Extract the filename from the original path and create a new path
            filename = path.split("/")[-1]
            updated_path = f"{new_directory}/{filename}"
            updated_paths.append(updated_path)

    # Write the updated paths to the output .list file
    with open(output_list_file, "w") as file:
        file.write("\n".join(updated_paths))

    print(f"Updated paths have been written to {output_list_file}")


def test():
    # Define the input .list file containing the original file paths
    input_list_file = "/content/final_dl/list/audio_test.list"

    # Define the directory to update the paths to
    new_directory = "/content/final_dl/list/xx/test"

    # Define the output .list file for the updated file paths
    output_list_file = "/content/final_dl/list/audio_test.list"

    # Read the original file paths from the .list file
    with open(input_list_file, "r") as file:
        original_paths = file.readlines()

    # Process and update each file path
    updated_paths = []
    for path in original_paths:
        path = path.strip()  # Remove any leading/trailing whitespace or newlines
        if path:  # Ensure the path is not empty
            # Extract the filename from the original path and create a new path
            filename = path.split("/")[-1]
            updated_path = f"{new_directory}/{filename}"
            updated_paths.append(updated_path)

    # Write the updated paths to the output .list file
    with open(output_list_file, "w") as file:
        file.write("\n".join(updated_paths))

    print(f"Updated paths have been written to {output_list_file}")

    # Define the input .list file containing the original file paths
    input_list_file = "/content/final_dl/list/rgb_test.list"

    # Define the directory to update the paths to
    new_directory = "/content/final_dl/dl_files/i3d-features/RGBTest"

    # Define the output .list file for the updated file paths
    output_list_file = "/content/final_dl/list/rgb_test.list"

    # Read the original file paths from the .list file
    with open(input_list_file, "r") as file:
        original_paths = file.readlines()

    # Process and update each file path
    updated_paths = []
    for path in original_paths:
        path = path.strip()  # Remove any leading/trailing whitespace or newlines
        if path:  # Ensure the path is not empty
            # Extract the filename from the original path and create a new path
            filename = path.split("/")[-1]
            updated_path = f"{new_directory}/{filename}"
            updated_paths.append(updated_path)

    # Write the updated paths to the output .list file
    with open(output_list_file, "w") as file:
        file.write("\n".join(updated_paths))

    print(f"Updated paths have been written to {output_list_file}")




import random

def split_file(input_file, output_file_80, output_file_20):
    # Read all lines from the input file
    with open(input_file, 'r') as file:
        lines = file.readlines()

    # Shuffle the lines to randomize them
    random.shuffle(lines)

    # Calculate split index
    split_index = int(len(lines) * 0.8)

    # Split into 80% and 20%
    lines_80 = lines[:split_index]
    lines_20 = lines[split_index:]

    # Write the 80% lines to the output file for 80%
    with open(output_file_80, 'w') as file:
        file.writelines(lines_80)

    # Write the 20% lines to the output file for 20%
    with open(output_file_20, 'w') as file:
        file.writelines(lines_20)

    print(f"Split complete. {len(lines_80)} lines written to {output_file_80}.")
    print(f"{len(lines_20)} lines written to {output_file_20}.")

train()
test()
split_file("/content/final_dl/list/audio.list", "/content/final_dl/list/audio_train.list", "/content/final_dl/list/audio_val.list")
split_file("/content/final_dl/list/rgb.list", "/content/final_dl/list/rgb_train.list", "/content/final_dl/list/rgb_val.list")

Updated paths have been written to /content/final_dl/list/audio.list
Updated paths have been written to /content/final_dl/list/rgb.list
Updated paths have been written to /content/final_dl/list/audio_test.list
Updated paths have been written to /content/final_dl/list/rgb_test.list
Split complete. 3163 lines written to /content/final_dl/list/audio_train.list.
791 lines written to /content/final_dl/list/audio_val.list.
Split complete. 15816 lines written to /content/final_dl/list/rgb_train.list.
3954 lines written to /content/final_dl/list/rgb_val.list.


## Train

In [32]:
from torch.utils.data import DataLoader
import torch.optim as optim
import torch
import time
import numpy as np
import random
import os


def validate(dataloader, model, criterion, device, is_topk):
    with torch.no_grad():
        model.eval()
        total_loss = 0.0
        count = 0
        for i, (input, label) in enumerate(dataloader):
            seq_len = torch.sum(torch.max(torch.abs(input), dim=2)[0]>0, 1)
            input = input[:, :torch.max(seq_len), :]
            input, label = input.float().to(device), label.float().to(device)
            logits, logits2 = model(input, seq_len)
            clsloss = CLAS(logits, label, seq_len, criterion, device, is_topk)
            clsloss2 = CLAS(logits2, label, seq_len, criterion, device, is_topk)
            croloss = CENTROPY(logits, logits2, seq_len, device)

            batch_loss = clsloss + clsloss2 + 5*croloss
            total_loss += batch_loss.item()
            count += 1
        return total_loss / count if count > 0 else 0.0


def train(dataloader, model, optimizer, scheduler, criterion, device, is_topk, val_loader=None):
    model.train()
    running_loss = 0.0
    count = 0
    for i, (input, label) in enumerate(dataloader):
        seq_len = torch.sum(torch.max(torch.abs(input), dim=2)[0]>0, 1)
        input = input[:, :torch.max(seq_len), :]
        input, label = input.float().to(device), label.float().to(device)
        logits, logits2 = model(input, seq_len)
        clsloss = CLAS(logits, label, seq_len, criterion, device, is_topk)
        clsloss2 = CLAS(logits2, label, seq_len, criterion, device, is_topk)
        croloss = CENTROPY(logits, logits2, seq_len, device)

        total_loss = clsloss + clsloss2 + 5*croloss
        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()
        scheduler.step()

        running_loss += total_loss.item()
        count += 1

        # Print training loss every 100 steps
        if (i + 1) % 100 == 0:
            avg_train_loss = running_loss / count
            print(f"Step {i+1}: Average Training Loss: {avg_train_loss:.4f}")
            running_loss = 0.0
            count = 0

            # If val_loader is provided, evaluate on validation set
            if val_loader is not None:
                val_loss = validate(val_loader, model, criterion, device, is_topk)
                print(f"Step {i+1}: Validation Loss: {val_loss:.4f}")

    return model


def test(dataloader, model, device):
    gt =[]
    with torch.no_grad():
        model.eval()
        pred = torch.zeros(0).to(device)
        pred2 = torch.zeros(0).to(device)
        for i, (input, label) in enumerate(dataloader):
            gt.append(label)
            input = input.to(device)
            logits, logits2 = model(inputs=input, seq_len=None)
            logits = torch.squeeze(logits)
            sig = torch.sigmoid(logits)
            sig = torch.mean(sig, 0)
            sig = sig.unsqueeze(0)
            pred = torch.cat((pred, sig))
            '''
            online detection
            '''
            logits2 = torch.squeeze(logits2)
            sig2 = torch.sigmoid(logits2)
            sig2 = torch.mean(sig2, 0)
            sig2 = sig2.unsqueeze(0)
            pred2 = torch.cat((pred2, sig2))

        pred = list(pred.cpu().detach().numpy())
        pred2 = list(pred2.cpu().detach().numpy())

        precision, recall, th = precision_recall_curve(list(gt), pred)
        pr_auc = auc(recall, precision)
        precision, recall, th = precision_recall_curve(list(gt), pred2)
        pr_auc2 = auc(recall, precision)
        return pr_auc, pr_auc2





if __name__ == '__main__':
    aligned_files = find_matching_files()

    # Create train/val splits
    split_data = create_splits(aligned_files)

        # Write list files
    write_list_files(split_data, aligned_files, "/content/final_dl/list")

    train_loader, val_loader, test_loader = create_single_modality_data_loaders(args, modality="AUDIO")

    device = torch.device("cuda")
    # train_loader = DataLoader(Dataset(args, mode='train'),
    #                         batch_size=args.batch_size, shuffle=True,
    #                         num_workers=args.workers, pin_memory=True)
    # test_loader = DataLoader(Dataset(args, mode='test'),
    #                         batch_size=5, shuffle=False,
    #                         num_workers=args.workers, pin_memory=True)


    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = Model(args)
    model = model.cuda()

    for name, value in model.named_parameters():
        print(name)
    approximator_param = list(map(id, model.approximator.parameters()))
    approximator_param += list(map(id, model.conv1d_approximator.parameters()))
    base_param = filter(lambda p: id(p) not in approximator_param, model.parameters())

    if not os.path.exists('./ckpt'):
        os.makedirs('./ckpt')
    optimizer = optim.Adam([{'params': base_param},
                            {'params': model.approximator.parameters(), 'lr': args.lr / 2},
                            {'params': model.conv1d_approximator.parameters(), 'lr': args.lr / 2},
                            ],
                            lr=args.lr, weight_decay=0.000)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10], gamma=0.1)
    criterion = torch.nn.BCELoss()

    is_topk = True
    gt = np.load(args.gt)
    # pr_auc, pr_auc_online = test(test_loader, model, device)
    # print('Random initalization: offline pr_auc:{0:.4}; online pr_auc:{1:.4}\n'.format(pr_auc, pr_auc_online))
    for epoch in range(args.max_epoch):
        st = time.time()
        model = train(train_loader, model, optimizer, scheduler, criterion, device, is_topk)
        if epoch % 2 == 0 and not epoch == 0:
            torch.save(model.state_dict(), '/content/final_dl/'+args.model_name+'{}.pth'.format(epoch))

        pr_auc, pr_auc_online = test(test_loader, model, device)
        print('Epoch {0}/{1}: offline pr_auc:{2:.4}; online pr_auc:{3:.4}\n'.format(epoch, args.max_epoch, pr_auc, pr_auc_online))
    torch.save(model.state_dict(), '/content/ckpt/' + args.model_name + '.pth')


Found 3953 aligned RGB-Audio pairs
Creating AUDIO data loaders...
Train loader created with 3162 samples
Validation loader created with 791 samples
Test loader created with 800 samples
conv1d1.weight
conv1d1.bias
conv1d2.weight
conv1d2.bias
conv1d3.weight
conv1d3.bias
conv1d4.weight
conv1d4.bias
gc1.weight
gc1.residual.weight
gc1.residual.bias
gc2.weight
gc3.weight
gc3.residual.weight
gc3.residual.bias
gc4.weight
gc5.weight
gc5.residual.weight
gc5.residual.bias
gc6.weight
simAdj.weight0
simAdj.weight1
disAdj.sigma
classifier.weight
classifier.bias
approximator.0.weight
approximator.0.bias
approximator.2.weight
approximator.2.bias
conv1d_approximator.weight
conv1d_approximator.bias


KeyboardInterrupt: 

# HLNET (VIDEO + AUDIO) + VAE





### Dataset


In [33]:


import torch.utils.data as data
import numpy as np

# from utils import process_feat
def pad(feat, min_len):
    if np.shape(feat)[0] <= min_len:
       return np.pad(feat, ((0, min_len-np.shape(feat)[0]), (0, 0)), mode='constant', constant_values=0)
    else:
       return feat

def process_feat(feat, length, is_random=True):
    if len(feat) > length:
        if is_random:
            return random_extract(feat, length)
        else:
            return uniform_extract(feat, length)
    else:
        return pad(feat, length)

class Dataset(data.Dataset):
    def __init__(self, args, transform=None, mode='train'):
        self.modality = args.modality
        """
        Args:
            args: Arguments containing dataset paths and configuration
            transform: Optional transforms to apply
            mode: One of ['train', 'val', 'test'] to specify the dataset split
        """

        if mode == 'test':
            self.rgb_list_file = args.test_rgb_list
            self.flow_list_file = args.test_flow_list
            self.audio_list_file = args.test_audio_list
        elif mode == 'val':
            self.rgb_list_file = args.val_rgb_list
            self.flow_list_file = args.val_flow_list
            self.audio_list_file = args.val_audio_list
        else: # train
            self.rgb_list_file = args.train_rgb_list
            self.flow_list_file = args.train_flow_list
            self.audio_list_file = args.train_audio_list


        self.max_seqlen = args.max_seqlen
        self.tranform = transform
        self.test_mode = (mode == 'test')
        self.normal_flag = '_label_A'
        self._parse_list()

    def _parse_list(self):
        if self.modality == 'AUDIO':
            self.list = list(open(self.audio_list_file))
        elif self.modality == 'RGB':
            self.list = list(open(self.rgb_list_file))
            print("here")
            # print(self.list)
        elif self.modality == 'FLOW':
            self.list = list(open(self.flow_list_file))
        elif self.modality == 'MIX':
            self.list = list(open(self.rgb_list_file))
            self.flow_list = list(open(self.flow_list_file))
        elif self.modality == 'MIX2':
            self.list = list(open(self.rgb_list_file))
            self.audio_list = list(open(self.audio_list_file))
        elif self.modality == 'MIX3':
            self.list = list(open(self.flow_list_file))
            self.audio_list = list(open(self.audio_list_file))
        elif self.modality == 'MIX_ALL':
            self.list = list(open(self.rgb_list_file))
            self.flow_list = list(open(self.flow_list_file))
            self.audio_list = list(open(self.audio_list_file))
        else:
            assert 1 > 2, 'Modality is wrong!'

    def __getitem__(self, index):
        if self.normal_flag in self.list[index]:
            label = 0.0
        else:
            label = 1.0

        if self.modality == 'AUDIO':
            features = np.array(np.load(self.list[index].strip('\n')), dtype=np.float32)
        elif self.modality == 'RGB':
            features = np.array(np.load(self.list[index].strip('\n')), dtype=np.float32)
        elif self.modality == 'FLOW':
            features = np.array(np.load(self.list[index].strip('\n')), dtype=np.float32)
        elif self.modality == 'MIX':
             # Load RGB features
            features1 = np.array(np.load(self.list[index].strip('\n')), dtype=np.float32)
            # Load corresponding audio features (accounting for 5:1 ratio)
            audio_index = index // 5
            features2 = np.array(np.load(self.audio_list[audio_index].strip('\n')), dtype=np.float32)



            # print("features1 shape: ", features1.shape)
            # print("features2 shape: ", features1.shape)

            # Handle potential dimension mismatch
            if features1.shape[0] > features2.shape[0]:
                features1 = features1[:features2.shape[0]]
            else:
                features2 = features2[:features1.shape[0]]
            features = np.concatenate((features1, features2), axis=1)

        elif self.modality == 'MIX2':


            # Load RGB features
            features1 = np.array(np.load(self.list[index].strip('\n')), dtype=np.float32)
            # Load corresponding audio features (accounting for 5:1 ratio)
            audio_index = index // 5
            if audio_index >= len(self.audio_list):
              audio_index-=1
            # print(f"Index: {index}, Audio Index: {audio_index}, Length of audio_list: {len(self.audio_list)}")
            features2 = np.array(np.load(self.audio_list[audio_index].strip('\n')), dtype=np.float32)




            # print("features1 shape: ", features1.shape)
            # print("features2 shape: ", features1.shape)

            # Handle potential dimension mismatch
            if features1.shape[0] > features2.shape[0]:
                features1 = features1[:features2.shape[0]]
            else:
                features2 = features2[:features1.shape[0]]
            features = np.concatenate((features1, features2), axis=1)

        elif self.modality == 'MIX3':
            features1 = np.array(np.load(self.list[index].strip('\n')), dtype=np.float32)
            features2 = np.array(np.load(self.audio_list[index // 5].strip('\n')), dtype=np.float32)
            if features1.shape[0] == features2.shape[0]:
                features = np.concatenate((features1, features2), axis=1)
            else:
                features = np.concatenate((features1[:-1], features2), axis=1)
        elif self.modality == 'MIX_ALL':
            features1 = np.array(np.load(self.list[index].strip('\n')), dtype=np.float32)
            features2 = np.array(np.load(self.flow_list[index].strip('\n')), dtype=np.float32)
            features3 = np.array(np.load(self.audio_list[index // 5].strip('\n')), dtype=np.float32)
            if features1.shape[0] == features2.shape[0]:
                features = np.concatenate((features1, features2, features3), axis=1)
            else:
                features = np.concatenate((features1[:-1], features2, features3[:-1]), axis=1)
        else:
            print("WHAT IS WRONG")
            raise ValueError("Modality is wrong!")

        # Apply transformations if any
        if self.tranform is not None:
            features = self.tranform(features)

        # Handle test mode
        if self.test_mode:
            return features
        else:
            # Process features for training/validation
            features = process_feat(features, self.max_seqlen, is_random=False)
            # print("features.shape: ", features.shape)
            return features, label

    def __len__(self):
        return len(self.list)

### Loading Vae model

In [34]:
class Args:
    def __init__(self):
        self.modality = 'MIX2'
        # Original paths
        self.rgb_list = '/content/final_dl/list/rgb.list'
        self.flow_list = '/content/final_dl/list/flow.list'
        self.audio_list = '/content/final_dl/list/audio.list'

        # Train paths
        self.train_rgb_list = '/content/final_dl/list/rgb_train.list'
        self.train_flow_list = '/content/final_dl/list/flow_train.list'
        self.train_audio_list = '/content/final_dl/list/audio_train.list'

        # Val paths
        self.val_rgb_list = '/content/final_dl/list/rgb_val.list'
        self.val_flow_list = '/content/final_dl/list/flow_val.list'
        self.val_audio_list = '/content/final_dl/list/audio_val.list'

        # Test paths
        self.test_rgb_list = '/content/final_dl/list/rgb_test.list'
        self.test_flow_list = '/content/final_dl/list/flow_test.list'
        self.test_audio_list = '/content/final_dl/list/audio_test.list'

        self.gt = '/content/final_dl/list/gt.npy'
        self.gpus = 1
        self.lr = 0.0001
        self.batch_size = 128
        self.workers = 1  # Reduced from 4 to avoid memory issues
        self.model_name = 'wsanodet'
        self.pretrained_ckpt = None
        self.feature_size = 1152  # 1024 + 128
        self.num_classes = 1
        self.dataset_name = 'XD-Violence'
        self.max_seqlen = 200
        self.max_epoch = 50

args = Args()

In [35]:

args.modality="MIX2"

# Instantiate the VAE model
vae_model = VAE(latent_dim=64, input_dim=args.feature_size, seq_len=200)

# Load the model weights
dir = "/content/best_trained_vae.pkl"
vae_model.load_state_dict(torch.load(dir))

# Move the model to the appropriate device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vae_model = vae_model.to(device)

# Set the model to evaluation mode
vae_model.eval()



# Now vae_model is ready to be used


  vae_model.load_state_dict(torch.load(dir))


VAE(
  (encoder): Encoder(
    (encoder): Sequential(
      (0): Conv1d(1152, 256, kernel_size=(3,), stride=(2,), padding=(1,))
      (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv1d(256, 128, kernel_size=(3,), stride=(2,), padding=(1,))
      (4): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU(inplace=True)
      (6): Conv1d(128, 64, kernel_size=(3,), stride=(2,), padding=(1,))
      (7): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (8): ReLU(inplace=True)
      (9): Flatten(start_dim=1, end_dim=-1)
    )
    (lin_mean): Sequential(
      (0): Linear(in_features=1600, out_features=64, bias=True)
      (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (lin_log_var): Sequential(
      (0): Linear(in_features=1600, out_features=64, bias=True)
      (1): BatchNorm1d(64, e

## Modified HLNET Training


In [36]:

def validate(dataloader, model, criterion, device, is_topk):
    with torch.no_grad():
        model.eval()
        total_loss = 0.0
        count = 0
        for i, (input, label) in enumerate(dataloader):
            seq_len = torch.sum(torch.max(torch.abs(input), dim=2)[0]>0, 1)
            input = input[:, :torch.max(seq_len), :]
            input, label = input.float().to(device), label.float().to(device)
            logits, logits2 = model(input, seq_len)
            clsloss = CLAS(logits, label, seq_len, criterion, device, is_topk)
            clsloss2 = CLAS(logits2, label, seq_len, criterion, device, is_topk)
            croloss = CENTROPY(logits, logits2, seq_len, device)

            batch_loss = clsloss + clsloss2 + 5*croloss
            total_loss += batch_loss.item()
            count += 1
        return total_loss / count if count > 0 else 0.0

def test_hl_vae(dataloader, model, device, gt):
    with torch.no_grad():
        model.eval()
        pred = torch.zeros(0).to(device)
        pred2 = torch.zeros(0).to(device)
        for i, input in enumerate(dataloader):


            input = input.to(device)

            # print("input shape: ", input.shape)
            logits, logits2 = model(inputs=input, seq_len=None)
            logits = torch.squeeze(logits)
            sig = torch.sigmoid(logits)
            sig = torch.mean(sig, 0)
            pred = torch.cat((pred, sig))
            '''
            online detection
            '''
            logits2 = torch.squeeze(logits2)
            sig2 = torch.sigmoid(logits2)
            sig2 = torch.mean(sig2, 0)

            sig2 = torch.unsqueeze(sig2, 1) ##for audio
            pred2 = torch.cat((pred2, sig2))

            # print("pred:, ", pred)
            # print("pred2:, ", pred2)

        pred = list(pred.cpu().detach().numpy())
        pred2 = list(pred2.cpu().detach().numpy())

        precision, recall, th = precision_recall_curve(list(gt), np.repeat(pred, 16))
        pr_auc = auc(recall, precision)
        precision, recall, th = precision_recall_curve(list(gt), np.repeat(pred2, 16))
        pr_auc2 = auc(recall, precision)
        return pr_auc, pr_auc2


def train_hlnet_vae(dataloader, hlnet, vae, optimizer, scheduler, criterion, device, is_topk, HLNET_LOSS_WEIGHT, RECON_LOSS_WEIGHT):
    hlnet.train()
    vae.eval()
    running_loss = 0.0
    count = 0
    for i, (input, label) in enumerate(dataloader):
        inputcpy = input.float().to(device)
        seq_len = torch.sum(torch.max(torch.abs(input), dim=2)[0]>0, 1)
        input = input[:, :torch.max(seq_len), :]
        input, label = input.float().to(device), label.float().to(device)
        logits, logits2 = hlnet(input, seq_len)
        clsloss = CLAS(logits, label, seq_len, criterion, device, is_topk)
        clsloss2 = CLAS(logits2, label, seq_len, criterion, device, is_topk)
        croloss = CENTROPY(logits, logits2, seq_len, device)

        recon_loss = 0;

        with torch.inference_mode():
          recon_data, mu, logvar = vae(inputcpy)


          # Reconstruction loss
          recon_criterion = torch.nn.MSELoss(reduction='sum')
          recon_loss = recon_criterion(recon_data, inputcpy)

          # KL divergence loss
          # kl_loss = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())

          # # Total loss
          # vae_loss = recon_loss + kl_loss

        # print("step")

        total_loss = HLNET_LOSS_WEIGHT * (clsloss + clsloss2 + 5*croloss) + RECON_LOSS_WEIGHT * (recon_loss)
        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()
        scheduler.step()

        running_loss += total_loss.item()
        count += 1

        # Print training loss every 100 steps
        if (i + 1) % 100 == 0:
            avg_train_loss = running_loss / count
            print(f"Step {i+1}: Average Training Loss: {avg_train_loss:.4f}")
            running_loss = 0.0
            count = 0

            # If val_loader is provided, evaluate on validation set
            if val_loader is not None:
                val_loss = validate(val_loader, hlnet, criterion, device, is_topk)
                print(f"Step {i+1}: Validation Loss: {val_loss:.4f}")

    return model





## Training Combined Model

In [38]:
# weight hyperparameters
HL_NET_LOSS_weight = .8
RECON_LOSS_weight = .2

args = Args()
args.feature_size = 1152  # 1024 (RGB) + 128 (audio)
args.batch_size = 100
args.modality = 'MIX2'
args.max_seqlen = 200
args.workers = 1
# args.batch_size = 5

train_loader, val_loader, test_loader = create_data_loaders(args)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Model(args)
model = model.cuda()

for name, value in model.named_parameters():
    print(name)
approximator_param = list(map(id, model.approximator.parameters()))
approximator_param += list(map(id, model.conv1d_approximator.parameters()))
base_param = filter(lambda p: id(p) not in approximator_param, model.parameters())

if not os.path.exists('./ckpt'):
    os.makedirs('./ckpt')
optimizer = optim.Adam([{'params': base_param},
                        {'params': model.approximator.parameters(), 'lr': args.lr / 2},
                        {'params': model.conv1d_approximator.parameters(), 'lr': args.lr / 2},
                        ],
                        lr=args.lr, weight_decay=0.000)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10], gamma=0.1)
criterion = torch.nn.BCELoss()

is_topk = True
gt = np.load(args.gt)

print("shape of test set: ", len(test_loader))

for epoch in range(args.max_epoch):
    st = time.time()
    model = train_hlnet_vae(train_loader, model, vae_model, optimizer, scheduler, criterion, device, is_topk, HL_NET_LOSS_weight, RECON_LOSS_weight)
    print("here")
    if epoch % 2 == 0 and not epoch == 0:
        torch.save(model.state_dict(), '/content/final_dl/'+args.model_name+'{}.pth'.format(epoch))

    pr_auc, pr_auc_online = test_hl_vae(test_loader, model, device,gt)
    print('Epoch {0}/{1}: offline pr_auc:{2:.4}; online pr_auc:{3:.4}\n'.format(epoch, args.max_epoch, pr_auc, pr_auc_online))
torch.save(model.state_dict(), '/content/' + args.model_name + '.pth')




<__main__.Args object at 0x7bc788fe6200>
Creating data loaders...
audio size
3162
rgb size
15815
Train loader created with 15815 samples
Validation loader created with 3955 samples
Test loader created with 4000 samples
conv1d1.weight
conv1d1.bias
conv1d2.weight
conv1d2.bias
conv1d3.weight
conv1d3.bias
conv1d4.weight
conv1d4.bias
gc1.weight
gc1.residual.weight
gc1.residual.bias
gc2.weight
gc3.weight
gc3.residual.weight
gc3.residual.bias
gc4.weight
gc5.weight
gc5.residual.weight
gc5.residual.bias
gc6.weight
simAdj.weight0
simAdj.weight1
disAdj.sigma
classifier.weight
classifier.bias
approximator.0.weight
approximator.0.bias
approximator.2.weight
approximator.2.bias
conv1d_approximator.weight
conv1d_approximator.bias
shape of test set:  800
Step 100: Average Training Loss: 103488.5051
Step 100: Validation Loss: 2.0938
here
Epoch 0/50: offline pr_auc:0.2235; online pr_auc:0.2677

Step 100: Average Training Loss: 104712.5016
Step 100: Validation Loss: 1.9922
here
Epoch 1/50: offline pr_auc: