In [1]:
#i3d extraction

!git clone https://github.com/piergiaj/pytorch-i3d.git
%cd pytorch-i3d
!pip install torch torchvision numpy pillow tqdm

# Create models directory
!mkdir -p models

# Download flow_imagenet.pt into models/
!wget -O /kaggle/working/flow_imagenet.pt https://www.dropbox.com/s/7w4z5q9fowcp9x5/flow_imagenet.pt?dl=1


fatal: destination path 'pytorch-i3d' already exists and is not an empty directory.
/kaggle/working/pytorch-i3d
--2025-04-06 12:46:05--  https://www.dropbox.com/s/7w4z5q9fowcp9x5/flow_imagenet.pt?dl=1
Resolving www.dropbox.com (www.dropbox.com)... 162.125.1.18, 2620:100:6016:18::a27d:112
Connecting to www.dropbox.com (www.dropbox.com)|162.125.1.18|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
Saving to: ‘/kaggle/working/flow_imagenet.pt’

/kaggle/working/flo     [ <=>                ]  70.18K  --.-KB/s    in 0.02s   

2025-04-06 12:46:05 (3.47 MB/s) - ‘/kaggle/working/flow_imagenet.pt’ saved [71867]



In [3]:
!ls /kaggle/working/pytorch-i3d

charades_dataset_full.py  LICENSE.txt  pytorch_i3d.py  train_i3d.py
charades_dataset.py	  models       README.md       videotransforms.py
extract_features.py	  __pycache__  state.db


In [4]:
import sys
from pytorch_i3d import InceptionI3d

sys.path.append('/kaggle/working/pytorch-i3d')

!wget -O /kaggle/working/flow_imagenet.pt https://github.com/piergiaj/pytorch-i3d/raw/master/models/flow_imagenet.pt
!file /kaggle/working/flow_imagenet.pt
!wget -O /kaggle/working/rgb_imagenet.pt https://github.com/piergiaj/pytorch-i3d/raw/master/models/rgb_imagenet.pt


--2025-04-06 12:46:10--  https://github.com/piergiaj/pytorch-i3d/raw/master/models/flow_imagenet.pt
Resolving github.com (github.com)... 140.82.116.3
Connecting to github.com (github.com)|140.82.116.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/piergiaj/pytorch-i3d/master/models/flow_imagenet.pt [following]
--2025-04-06 12:46:10--  https://raw.githubusercontent.com/piergiaj/pytorch-i3d/master/models/flow_imagenet.pt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.108.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 50795330 (48M) [application/octet-stream]
Saving to: ‘/kaggle/working/flow_imagenet.pt’


2025-04-06 12:46:12 (293 MB/s) - ‘/kaggle/working/flow_imagenet.pt’ saved [50795330/50795330]

/kaggle/working/flow_imagenet.pt: data
--2025-

In [None]:
import os
import numpy as np
import torch
from PIL import Image
from tqdm import tqdm
from pytorch_i3d import InceptionI3d
import torchvision.transforms as transforms

# --- Config ---
dataset_path = '/kaggle/input/shanghaitech-anomaly-detection/dataset/mp'
output_dir = '/kaggle/working/i3d_train_rgb_features_output'
model_weights = '/kaggle/working/rgb_imagenet.pt'  # Update with your RGB model weights
os.makedirs(output_dir, exist_ok=True)

# --- Device ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
print(f"GPU Available: {torch.cuda.is_available()}")
print(f"GPU Count: {torch.cuda.device_count()}")

# --- Load RGB I3D Model ---
i3d = InceptionI3d(400, in_channels=3)  # Changed to 3 channels for RGB
i3d.load_state_dict(torch.load(model_weights, weights_only=True))  # if you're loading only weights
i3d.replace_logits(1024)
i3d.to(device)
i3d.eval()

# --- Modified Feature Extractor for Single Frames ---
class I3DFrameFeatureExtractor(torch.nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model
        
    def forward(self, x):
        # Since we're processing single frames, we need to add temporal dimension
        x = x.unsqueeze(2)  # (N, C, 1, H, W)
        
        # Forward pass through the model
        for endpoint in self.model.end_points:
            x = self.model.end_points[endpoint](x)
            if endpoint == 'avg_pool':
                break
                
        x = torch.nn.functional.adaptive_avg_pool3d(x, 1)  # (N, C, 1, 1, 1)
        return x.view(x.size(0), -1)  # (N, C)

extractor = I3DFrameFeatureExtractor(i3d).to(device)

# --- Preprocessing for RGB frames ---
def preprocess_rgb_frame(frame_path):
    img = Image.open(frame_path).resize((224, 224))
    arr = np.array(img)
    if len(arr.shape) == 2:  # Handle grayscale images
        arr = np.stack([arr]*3, axis=-1)
    arr = (arr / 127.5 - 1.0).astype(np.float32)  # Normalize to [-1, 1]
    arr = torch.tensor(arr).permute(2, 0, 1)  # (C, H, W)
    return arr

# --- Extract Features from Single Frame ---
def extract_frame_feature(frame_path):
    frame = preprocess_rgb_frame(frame_path).unsqueeze(0).to(device)  # (1, 3, H, W)
    with torch.no_grad():
        features = extractor(frame)
    return features.cpu().numpy().squeeze()

# --- Process Dataset ---
video_folders = sorted([f for f in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, f))])

for video_folder in tqdm(video_folders, desc="Extracting features"):
    frames_dir = os.path.join(dataset_path, video_folder, 'frames')  # Changed to frames directory
    if not os.path.exists(frames_dir):
        continue
    
    rgb_frames = sorted([
        os.path.join(frames_dir, f) for f in os.listdir(frames_dir)
        if f.endswith(('.jpg', '.png', '.jpeg'))
    ])
    
    features = []
    for frame_path in rgb_frames:
        feat = extract_frame_feature(frame_path)
        features.append(feat)
    
    features = np.array(features)
    np.save(os.path.join(output_dir, f"{video_folder}_rgb.npy"), features)

print("✅ Single-frame RGB feature extraction complete.")

**FLOW FEATURE EXTRACTION**

In [7]:
import os
import numpy as np
import torch
from PIL import Image
from tqdm import tqdm
from pytorch_i3d import InceptionI3d
import torchvision.transforms as transforms

# --- Config ---
dataset_path = '/kaggle/input/shanghaitech-anomaly-detection/dataset/mp'
output_dir = '/kaggle/working/i3d_test_flow_features_output'
model_weights = '/kaggle/working/flow_imagenet.pt'
os.makedirs(output_dir, exist_ok=True)

# --- Device ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# --- Load Flow I3D Model ---
i3d = InceptionI3d(400, in_channels=2)
i3d.load_state_dict(torch.load(model_weights, weights_only=True))
i3d.replace_logits(1024)
i3d = torch.nn.DataParallel(i3d)
i3d.to(device)

i3d.eval()

class I3DFrameFeatureExtractor(torch.nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model.module  # 🔥 Access the actual model inside DataParallel

    def forward(self, x):
        x = x.unsqueeze(2)  # (N, C, 1, H, W)
        for endpoint in self.model.end_points:
            x = self.model.end_points[endpoint](x)
            if endpoint == 'avg_pool':
                break
        x = torch.nn.functional.adaptive_avg_pool3d(x, 1)
        return x.view(x.size(0), -1)


extractor = I3DFrameFeatureExtractor(i3d).to(device)

# --- Preprocessing (Flow uses 2 channels) ---
def preprocess_flow_frame(flow_path):
    img = Image.open(flow_path).resize((224, 224))
    arr = np.array(img)[:, :, :2]  # take only x/y flow channels
    arr = (arr / 127.5 - 1.0).astype(np.float32)
    arr = torch.tensor(arr).permute(2, 0, 1)  # (C, H, W)
    return arr

# --- Extract Features from Single Frame ---
def extract_frame_feature(frame_path):
    frame = preprocess_flow_frame(frame_path).unsqueeze(0).to(device)  # (1, 2, H, W)
    with torch.no_grad():
        features = extractor(frame)
    return features.cpu().numpy().squeeze()

# --- Process Dataset ---
video_folders = sorted([f for f in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, f))])

for video_folder in tqdm(video_folders, desc="Extracting features"):
    flow_dir = os.path.join(dataset_path, video_folder, 'optical_flow')
    if not os.path.exists(flow_dir):
        continue
    
    flow_frames = sorted([
        os.path.join(flow_dir, f) for f in os.listdir(flow_dir)
        if f.endswith(('.jpg', '.png'))
    ])
    
    features = []
    for frame_path in flow_frames:
        feat = extract_frame_feature(frame_path)
        features.append(feat)
    
    features = np.array(features)
    np.save(os.path.join(output_dir, f"{video_folder}_flow.npy"), features)

print("✅ Single-frame optical flow feature extraction complete.")

Extracting features: 100%|██████████| 330/330 [2:15:01<00:00, 24.55s/it]  

✅ Single-frame optical flow feature extraction complete.





In [None]:
import os
import numpy as np
from tqdm import tqdm

# --- Paths ---
rgb_features_dir = '/kaggle/working/i3d_train_rgb_features_output'
flow_features_dir = '/kaggle/working/i3d_test_flow_features_output'
output_dir = '/kaggle/working/combined_rgb_flow_features'
os.makedirs(output_dir, exist_ok=True)

# --- List available RGB feature files ---
rgb_files = sorted([f for f in os.listdir(rgb_features_dir) if f.endswith('.npy')])

for rgb_file in tqdm(rgb_files, desc="Concatenating RGB + Flow features"):
    video_name = rgb_file.replace('_rgb.npy', '')
    
    rgb_path = os.path.join(rgb_features_dir, rgb_file)
    flow_path = os.path.join(flow_features_dir, f"{video_name}_flow.npy")

    if not os.path.exists(flow_path):
        print(f"⚠️ Skipping {video_name} — Flow feature missing.")
        continue

    rgb_features = np.load(rgb_path)  # shape: (num_frames, feature_dim)
    flow_features = np.load(flow_path)  # shape: (num_frames, feature_dim)

    # Ensure same number of frames
    min_len = min(len(rgb_features), len(flow_features))
    rgb_features = rgb_features[:min_len]
    flow_features = flow_features[:min_len]

    # --- Concatenate along feature axis ---
    combined_features = np.concatenate([rgb_features, flow_features], axis=1)  # shape: (num_frames, combined_feature_dim)

    # --- Save ---
    output_path = os.path.join(output_dir, f"{video_name}_rgb_flow.npy")
    np.save(output_path, combined_features)

print("✅ All features successfully concatenated and saved.")
