In [4]:
from sfmodel import VideoTransform, SlowFastLitModel  

ModuleNotFoundError: No module named 'torch'

In [5]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import cv2


class InferenceDataset(Dataset):
    def __init__(self, video_paths, transform, frames=32):
        self.video_paths = video_paths
        self.transform = transform
        self.frames = frames

    def __len__(self):
        return len(self.video_paths)

    def __getitem__(self, idx):
        video_path = self.video_paths[idx]
        clip = self._load_video_cv2(video_path, self.frames)
        if self.transform:
            clip = self.transform(clip)
        return clip, video_path

    @staticmethod
    def _load_video_cv2(path: str, frames: int):
        cap = cv2.VideoCapture(path)
        if not cap.isOpened():
            raise RuntimeError(f"Cannot open video {path}")

        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        if total_frames <= 0:
            raise RuntimeError(f"Video {path} has no frames")

        if total_frames < frames:
            indices = list(range(total_frames)) + [total_frames - 1] * (frames - total_frames)
        else:
            indices = np.linspace(0, total_frames - 1, frames).astype(int).tolist()

        frames_list = []
        sampled_idx = 0

        for i in range(total_frames):
            ret, frame = cap.read()
            if not ret:
                break
            if i == indices[sampled_idx]:
                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                frames_list.append(frame_rgb)
                sampled_idx += 1
                if sampled_idx >= len(indices):
                    break
        cap.release()

        while len(frames_list) < frames:
            frames_list.append(frames_list[-1])

        clip_np = np.stack(frames_list, axis=0)  # (T, H, W, C)
        clip_tensor = torch.from_numpy(clip_np).permute(3, 0, 1, 2).float() / 255.0  # (C, T, H, W)
        return clip_tensor


class SlowFastVideoInference:
    def __init__(self, checkpoint_path: str, class_names: list, frames: int = 32, device: str = None):
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
        self.frames = frames
        self.class_names = class_names

        self.model = SlowFastLitModel.load_from_checkpoint(checkpoint_path, num_classes=len(class_names))
        self.model.eval()
        self.model.to(self.device)

        self.transform = VideoTransform()

    def _prepare_pathways(self, x: torch.Tensor):
        # x shape: (B, C, T, H, W)
        slow = x[:, :, ::4, :, :]  # slow pathway: sample every 4th frame
        fast = x                   # fast pathway: all frames
        return [slow, fast]

    def predict_batch(self, video_paths: list, batch_size=4, num_workers=4):
        dataset = InferenceDataset(video_paths, transform=self.transform, frames=self.frames)
        loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

        results = []

        with torch.no_grad():
            for clips, paths in loader:
                # clips shape: (B, C, T, H, W)
                clips = clips.to(self.device)
                inputs = self._prepare_pathways(clips)  # list of two tensors [slow, fast]
                logits = self.model(inputs)
                probs = torch.softmax(logits, dim=1)
                pred_idxs = torch.argmax(probs, dim=1).cpu().numpy()
                probs_np = probs.cpu().numpy()

                for path, pred_idx, prob in zip(paths, pred_idxs, probs_np):
                    results.append({
                        "video_path": path,
                        "predicted_class": self.class_names[pred_idx],
                        "predicted_prob": prob[pred_idx]
                    })

        return results

    def predict(self, video_path: str):
        return self.predict_batch([video_path], batch_size=1)[0]
        
if __name__ == "__main__":
    checkpoint_path = r"C:\Users\Bimlendra\OneDrive\Desktop\Major Project\Mid term\backend\finalbackend\best-checkpoint-epoch=06-val_acc=0.4500.ckpt"
    class_names = [
        'Abuse', 'Arrest', 'Arson', 'Assault', 'Burglary',
        'Explosion', 'Fighting', 'Normal_Videos_for_Event_Recognition',
        'RoadAccidents', 'Robbery', 'Shooting', 'Shoplifting',
        'Stealing', 'Vandalism'
    ]

    inference = SlowFastVideoInference(checkpoint_path, class_names)

    results = inference.predict_batch([r"backend\finalbackend\carcrash1.mp4"], batch_size=2)

    for res in results:
        print(f"Video: {res['video_path']}")
        print(f"Predicted class: {res['predicted_class']}")
        print(f"Probability: {res['predicted_prob']:.4f}")
        print()

ModuleNotFoundError: No module named 'torch'