#### Defining Function to extract frame for each video, simultaneously extract featured from the respective frames


 pip install opencv-python 

In [1]:
from datetime import timedelta
import cv2
import numpy as np
import os
import torch
import torch.nn as nn
from torchvision.transforms import transforms
import random

In [2]:
SAVING_FRAMES_PER_SECOND = 10

In [3]:
def format_timedelta(td):
    """Utility function to format timedelta objects in a cool way (e.g 00:00:20.05) 
    omitting microseconds and retaining milliseconds"""
    result = str(td)
    try:
        result, ms = result.split(".")
    except ValueError:
        return (result + ".00").replace(":", "-")
    ms = int(ms)
    ms = round(ms / 1e4)
    return f"{result}.{ms:02}".replace(":", "-")


def get_saving_frames_durations(cap, saving_fps):
    """A function that returns the list of durations where to save the frames"""
    s = []
    # get the clip duration by dividing number of frames by the number of frames per second
    clip_duration = cap.get(cv2.CAP_PROP_FRAME_COUNT) / cap.get(cv2.CAP_PROP_FPS)
    # use np.arange() to make floating-point steps
    for i in np.arange(0, clip_duration, 1 / saving_fps):
        s.append(i)
    return s

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

import torch
import torch.nn as nn
from torchvision.models import resnet50

class RelationModuleMultiScale(torch.nn.Module):
    # Temporal Relation module in multiply scale, suming over [2-frame relation, 3-frame relation, ..., n-frame relation]

    def __init__(self, img_feature_dim, num_frames, num_class):
        super(RelationModuleMultiScale, self).__init__()
        self.subsample_num = 4 # how many relations selected to sum up
        self.img_feature_dim = img_feature_dim
        self.scales = [i for i in range(num_frames, 1, -1)] # generate the multiple frame relations

        self.relations_scales = []
        self.subsample_scales = []
        
        for scale in self.scales:
            relations_scale = self.return_relationset(num_frames, scale)
            self.relations_scales.append(relations_scale)
            self.subsample_scales.append(min(self.subsample_num, len(relations_scale))) # how many samples of relation to select in each forward pass

        self.num_class = num_class
        self.num_frames = num_frames
        num_bottleneck = 256
        self.fc_fusion_scales = nn.ModuleList() # high-tech modulelist
        for i in range(len(self.scales)):
            scale = self.scales[i]
            fc_fusion = nn.Sequential(
                        nn.ReLU(),
                        nn.Linear(scale * self.img_feature_dim, num_bottleneck), # mutiplies the scale with image dimension to reduce/magnify the image respectively 
                        nn.ReLU(),
                        nn.Linear(num_bottleneck, self.num_class),
                        )  # Simple neural network to extract features at different scales of the extracted frames

            self.fc_fusion_scales += [fc_fusion]

#         print('Multi-Scale Temporal Relation Network Module in use', ['%d-frame relation' % i for i in self.scales])

    def forward(self, input):
        # the first one is the largest scale - global features
        act_all = input[:, self.relations_scales[0][0] , :]
        act_all = act_all.view(act_all.size(0), self.scales[0] * self.img_feature_dim)
        act_all = self.fc_fusion_scales[0](act_all)

        for scaleID in range(1, len(self.scales)):
            # iterate over the scales - local features, features from images at different scales
            idx_relations_randomsample = np.random.choice(len(self.relations_scales[scaleID]), self.subsample_scales[scaleID], replace=False)
            for idx in idx_relations_randomsample:
                act_relation = input[:, self.relations_scales[scaleID][idx], :]
                act_relation = act_relation.view(act_relation.size(0), self.scales[scaleID] * self.img_feature_dim)
                act_relation = self.fc_fusion_scales[scaleID](act_relation)
                act_all += act_relation
        return act_all

    def return_relationset(self, num_frames, num_frames_relation): # creates the random set of all relation sets (for 3 frames in this case)
        import itertools
        return list(itertools.combinations([i for i in range(num_frames)], num_frames_relation))



# Example usage:
num_frames = 3
num_classes = 4  # Replace with the actual number of classes
input_size = (3, 224, 224)  # Replace with the actual input size




In [7]:
dataset_path = "E:/extracted datasets/Anomaly-Videos-Part-1" #Root directory of the dataset
# output_path = "features/frames" #Directory to save the extracted frames
features_path = "features/" #Path to save the extracted features as tensors
# labels_path = "D:/dataset/labels.txt" #Path to save the labels as a text file


model = RelationModuleMultiScale(240*320, 3, 2048)
model.eval()
model.to(device)

transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(240), 
    transforms.ToTensor(), 
    transforms.Normalize(mean=[0.485,0.456, 0.406], std=[0.229, 0.224, 0.225])
])

labels = []
    # Iterate over the videos in the folder
for folder_name in ["test"]:
    folder_path = os.path.join(dataset_path, folder_name)
    if not os.path.exists(folder_path):
        continue
    video_files = os.listdir(folder_path)

    for video_file in video_files:
        video_path = os.path.join(folder_path, video_file)
        features = []
        # Open the video file
        video_capture = cv2.VideoCapture(video_path)
        frame_count = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))

        # Extract frames and features
        for frame_index in range(0, frame_count):
            success, frame = video_capture.read()
            if not success:
                break

            # Extract features using ResNet50
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_tensor = transform(frame)
            frame_tensor = torch.unsqueeze(frame_tensor, 0).to(device)

            with torch.no_grad():
                feature_tensor = model(frame_tensor)

            # Append features and labels
            features.append(feature_tensor.squeeze().cpu().numpy())
            labels.append(folder_name)


        video_capture.release()

        # Convert features to tensors
        features_tensor = torch.tensor(features)
        savnm = video_file+".pt"
        
        featurept = features_path+'/'+video_file+'.pt'
       
        torch.save(features_tensor, featurept)
