In [None]:
#cell 0 imports and paths 
import os
import glob
import csv
from pathlib import Path
from collections import defaultdict
import numpy as np
import pandas as pd
import cv2
from tqdm import tqdm

# frame size
FRAME_WIDTH  = 256
FRAME_HEIGHT = 256
FRAME_SIZE   = (FRAME_WIDTH, FRAME_HEIGHT)

#paths

TRAIN_VIDEOS_DIR ="/kaggle/input/pixel-play-26/Avenue_Corrupted-20251221T112159Z-3-001/Avenue_Corrupted/Dataset/training_videos"
TEST_VIDEOS_DIR  = "/kaggle/input/pixel-play-26/Avenue_Corrupted-20251221T112159Z-3-001/Avenue_Corrupted/Dataset/testing_videos"

SUBMISSION_PATH =  "submission.csv"



In [None]:
#cell 1 datapreprocessing

import re
import matplotlib.pyplot as plt
import torchvision.transforms as T
import sys


def extract_frame_number(filename):
    nums = re.findall(r"\d+", filename)
    if not nums:
        raise ValueError(f"No numeric frame index found in {filename}")
    return int(nums[-1])


#flipping logic
def is_upside_down_sobel(gray):
    h = gray.shape[0]

    sobel_y = cv2.Sobel(gray, cv2.CV_32F, 0, 1, ksize=3)

    top_energy    = np.mean(np.abs(sobel_y[:h // 3, :]))
    bottom_energy = np.mean(np.abs(sobel_y[2 * h // 3:, :]))

    return top_energy > bottom_energy


#convering images to Imagenet normalised tensors
resnet_transform = T.Compose([
    T.ToTensor(),
    T.Normalize(
        mean=[0.485, 0.456, 0.406],   # ImageNet mean
        std=[0.229, 0.224, 0.225]     # ImageNet std
    )
])

def load_and_preprocess_videos(videos_dir, dataset_name="train", max_visualize=3):

    processed_videos = {}
    rotated_records = []
    visualized = 0

    video_folders = sorted(
        [d for d in os.listdir(videos_dir) if d.isdigit()],
        key=lambda x: int(x)
    )

    for video_id in tqdm(video_folders):

        video_path = videos_dir / video_id

        frame_files = sorted(
            glob.glob(str(video_path / "*.jpg")),
            key=lambda x: extract_frame_number(os.path.basename(x))
        )

        frames_gray = []
        frames_rgb  = []
        frame_ids   = []

        for frame_path in frame_files:

            fname = os.path.basename(frame_path)
            frame_number = extract_frame_number(fname)
            frame_id = f"{video_id}_{frame_number}"

            img_bgr = cv2.imread(frame_path)
            if img_bgr is None:
                print("Warning: could not read", frame_path)
                continue

            img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
            gray    = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY)

            # upside-down fix ( SOBEL LOGIC)
            flipped = False
            if is_upside_down_sobel(gray):
                img_rgb = cv2.flip(img_rgb, -1)
                gray    = cv2.flip(gray, -1)
                flipped = True
                rotated_records.append((video_id, frame_number))

            # resize AFTER all flips corrected
            img_rgb = cv2.resize(img_rgb, FRAME_SIZE, interpolation=cv2.INTER_AREA)
            gray    = cv2.resize(gray, FRAME_SIZE, interpolation=cv2.INTER_AREA)

            # store grayscale (classical branch)
            frames_gray.append(gray)

            # store RGB (resnET branch)
            frames_rgb.append(resnet_transform(img_rgb))

            frame_ids.append(frame_id)

           

        processed_videos[video_id] = {
            "gray": frames_gray,
            "rgb":  frames_rgb,
            "ids":  frame_ids
        }

  
    print("Total corrected frames:", len(rotated_records))
    return processed_videos



train_data = load_and_preprocess_videos( TRAIN_VIDEOS_DIR, dataset_name="train")
  
test_data = load_and_preprocess_videos(TEST_VIDEOS_DIR, dataset_name="test")

   
    




In [None]:
#cell 2 optical flow features(21d)

# optical flow parameters 
FLOW_PARAMS = dict(
    pyr_scale=0.5,
    levels=3,
    winsize=15,
    iterations=3,
    poly_n=5,
    poly_sigma=1.2,
    flags=0
)

def extracting_optical_flow_features(video_data):

    all_flow_features = []
    all_frame_ids = []

    for video_id in tqdm(video_data.keys()):

        gray_frames = video_data[video_id]["gray"]
        ids         = video_data[video_id]["ids"]

        prev_gray = None
        prev_mag  = None

        for gray, fid in zip(gray_frames, ids):

            # first frame has no previous reference
            if prev_gray is None:
                all_flow_features.append(np.zeros(21, dtype=np.float32))
                all_frame_ids.append(fid)
                prev_gray = gray
                prev_mag  = np.zeros_like(gray, dtype=np.float32)
                continue

            # dense optical flow
            flow = cv2.calcOpticalFlowFarneback(
                prev_gray, gray, None, **FLOW_PARAMS
            )

            vx = flow[..., 0]
            vy = flow[..., 1]
            mag, ang = cv2.cartToPolar(vx, vy)

            # magnitude features (8)
            mag_mean = mag.mean()
            mag_std  = mag.std()
            mag_p50  = np.percentile(mag, 50)
            mag_p75  = np.percentile(mag, 75)
            mag_p90  = np.percentile(mag, 90)
            mag_p95  = np.percentile(mag, 95)
            mag_max  = mag.max()
            mag_sparse = np.mean(mag > (mag_p90 + 1e-6))

            mag_features = [mag_mean, mag_std, mag_p50, mag_p75,
                            mag_p90, mag_p95, mag_max, mag_sparse]

            # direction features (8 + 1)
            dir_hist, _ = np.histogram(
                ang, bins=8, range=(0, 2*np.pi), density=True
            )

            dir_entropy = -np.sum(dir_hist * np.log(dir_hist + 1e-6))

            # coherence (1) 
            mean_vx = vx.mean()
            mean_vy = vy.mean()
            coherence = np.sqrt(mean_vx**2 + mean_vy**2) / (mag_mean + 1e-6)

            #acceleration features(3)
            acc_mean = acc.mean()
            acc_std  = acc.std()
            acc_p90  = np.percentile(acc, 90)

            acc_features = [acc_mean, acc_std, acc_p90]

            #  ALL 21 FLOW FEATURES
            flow_feat = np.concatenate([
                mag_features,
                dir_hist.tolist(),
                [dir_entropy],
                [coherence],
                acc_features
            ])

            all_flow_features.append(flow_feat.astype(np.float32))
            all_frame_ids.append(fid)
             #UPDATING FRAMES
            prev_gray = gray
            prev_mag  = mag

    flow_features = np.array(all_flow_features, dtype=np.float32)

    return flow_features, all_frame_ids


# apply on train and test

flow_train, flow_train_ids = extracting_optical_flow_features(train_data)
flow_test,  flow_test_ids  = extracting_optical_flow_features(test_data)




In [None]:
#cell3 Isolation forest training
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest

# normalising the 21 flow features which were of diff scales
flow_scaler = StandardScaler()

X_train_flow = flow_scaler.fit_transform(X_train_flow)
X_test_flow  = flow_scaler.transform(X_test_flow)
#training isoln forest
iso_forest = IsolationForest(**IF_PARAMS)
iso_forest.fit(X_train_flow)

# raw anomaly scores
# decision_function: higher = more normal
# multiply by -1 so higher = more anomalous

if_train_scores = -iso_forest.decision_function(X_train_flow)
if_test_scores  = -iso_forest.decision_function(X_test_flow)

#storing for the final fusion
final_if_train_scores = if_train_scores
final_if_test_scores  = if_test_scores

final_train_ids = flow_train_ids
final_test_ids  = flow_test_ids




In [None]:
#cell4 2nd RESNET BRANCH 

import torchvision.models as models
import torch.nn as nn

#loading pretrained resnet 18
resnet = models.resnet18(pretrained=True)

# removing the default classifier layer of resnet
resnet.fc = nn.Identity()

resnet = resnet.to(DEVICE)
resnet.eval()

RESNET_FEATURE_DIM = 512
RESNET_WINDOW = 8

def temporal_pool_features(features, window):

    T, D = features.shape
    pooled = np.zeros_like(features)

    for t in range(T):
        start = max(0, t - window)
        end   = min(T, t + window + 1)

        pooled[t] = features[start:end].max(axis=0)

    return pooled
#extraction of 512 resnet features 
def extract_resnet_features(video_data):

    all_features = []
    all_ids = []

    with torch.no_grad():
        for video_id in tqdm(video_data.keys()):

            rgb_frames = video_data[video_id]["rgb"]
            ids        = video_data[video_id]["ids"]

            BATCH_SIZE = 64
            num_frames = len(rgb_frames)
            video_feats = []

            for i in range(0, num_frames, BATCH_SIZE):

                batch = torch.stack(rgb_frames[i:i+BATCH_SIZE]).to(DEVICE)
                feats = resnet(batch)              # [B, 512]
                video_feats.append(feats.cpu().numpy())

            feats_raw = np.vstack(video_feats)

            # apply temporal max pooling
            feats_pooled = temporal_pool_features(feats_raw, RESNET_WINDOW)

            all_features.append(feats_pooled)
            all_ids.extend(ids)

    features = np.vstack(all_features)

    return features, all_ids


#applying on training and testing
resnet_train_feats, resnet_train_ids = extract_resnet_features(train_data)
resnet_test_feats,  resnet_test_ids  = extract_resnet_features(test_data)





In [None]:
#cell 5 computing anamoly scores based on 512 resnet features
from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=5, random_state=42, n_init=10)
kmeans.fit(resnet_train_feats)

#computing distance to the nearest cluster
def compute_distance_to_nearest_cluster(features, kmeans_model):

    # calculating distances to all clusters [N, k]
    all_dists = kmeans_model.transform(features)

    # choosing closest cluster(normal behavior)
    min_dists = all_dists.min(axis=1)
    return min_dists

deep_train_scores = compute_distance_to_nearest_cluster(resnet_train_feats, kmeans)
deep_test_scores  = compute_distance_to_nearest_cluster(resnet_test_feats,  kmeans)

#  storing for fusion
final_deep_train_scores = deep_train_scores
final_deep_test_scores  = deep_test_scores




In [None]:
#cell 6 fusing the both branch and calcluating anamoly scores


#sigmoid of flow features
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

if_gate_test = sigmoid(final_if_test_scores)

# anomaly = semantic novelty Ã— motion unreliability
raw_fused_scores = final_deep_test_scores * if_gate_test


#normalising gloablaly 
score_min = raw_fused_scores.min()
score_max = raw_fused_scores.max()

final_test_scores = (raw_fused_scores - score_min) / (score_max - score_min + 1e-8)



In [None]:
#cell 7 csv genration

TOTAL_EXPECTED_ROWS = 11706
CSV_PATH = SUBMISSION_PATH

#building rows
submission_rows = []

for fid, score in zip(final_test_ids, final_test_scores):

    vid_str, frame_str = fid.split("_")

    vid_num   = int(vid_str)
    frame_num = int(frame_str)

    fixed_id = f"{vid_num}_{frame_num}"
    submission_rows.append((fixed_id, float(score)))


#numerical sorting

submission_rows.sort(key=lambda x: (int(x[0].split("_")[0]), int(x[0].split("_")[1])))


#writing csv
with open(CSV_PATH, "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["Id", "Predicted"])
    writer.writerows(submission_rows)





In [None]:
import pandas as pd
import numpy as np

INPUT_CSV  = "submission.csv"
OUTPUT_CSV = "boosted_submission.csv"

TOP_PCT = 0.01     
BOOST   = 0.15     

df = pd.read_csv(INPUT_CSV)
scores = df["Predicted"].values.copy()

N = len(scores)
k = max(1, int(TOP_PCT * N))

top_idx = np.argsort(scores)[-k:]
scores[top_idx] = scores[top_idx] + BOOST


scores = (scores - scores.min()) / (scores.max() - scores.min() + 1e-8)


out = df.copy()
out["Predicted"] = scores
out.to_csv(OUTPUT_CSV, index=False)

