In [1]:
# -------------------------------------------------------------
# Import Required Libraries
# -------------------------------------------------------------
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision.models import alexnet
from PIL import Image
from tqdm import tqdm
import torch.nn.functional as F


In [2]:

class STN(nn.Module):
    def __init__(self, input_channels):
        super(STN, self).__init__()

        self.localization = nn.Sequential(
            nn.Conv2d(input_channels, 8, kernel_size=7),
            nn.MaxPool2d(2, 2),
            nn.ReLU(True),
            nn.Conv2d(8, 10, kernel_size=5),
            nn.MaxPool2d(2, 2),
            nn.ReLU(True)
        )

        self.fc_loc = nn.Linear(1, 32)  # Will be replaced dynamically
        self.fc_initialized = False

    def forward(self, x):
        xs = self.localization(x)
        flattened_size = xs.view(xs.size(0), -1).shape[1]

        if not self.fc_initialized:
            self.fc_loc = nn.Linear(flattened_size, 6).to(x.device)
            self.fc_loc.weight.data.zero_()
            self.fc_loc.bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))
            self.fc_initialized = True

        xs = xs.view(xs.size(0), -1)
        theta = self.fc_loc(xs)
        theta = theta.view(-1, 2, 3)

        grid = F.affine_grid(theta, x.size(), align_corners=False)
        x_transformed = F.grid_sample(x, grid, align_corners=False)
        return x_transformed


class FeatureExtractor(nn.Module):
    def __init__(self, use_stn=True):
        super(FeatureExtractor, self).__init__()
        self.use_stn = use_stn
        self.stn = STN(input_channels=3) if use_stn else None

        # ✅ Use AlexNet instead of VGG16
        self.alexnet_model = alexnet(weights="IMAGENET1K_V1").features
        self.pooling = nn.AdaptiveAvgPool2d((1, 1))  # Global pooling to flatten output

    def forward(self, x):
        if self.use_stn:
            x = self.stn(x)
        features = self.alexnet_model(x)
        features = self.pooling(features)
        return features.view(features.size(0), -1)




In [3]:
# -------------------------------------------------------------
# Initialize Model and Device
# -------------------------------------------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
feature_extractor = FeatureExtractor(use_stn=True).to(device)
feature_extractor.eval()

# -------------------------------------------------------------
# Image Preprocessing
# -------------------------------------------------------------
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


def extract_features(image_path, transform, device=device):
    try:
        image = Image.open(image_path).convert('RGB')
        image = transform(image).unsqueeze(0).to(device)

        with torch.no_grad():
            features = feature_extractor(image).squeeze().cpu().numpy()

        return features
    except Exception as e:
        print(f"Skipping frame: {image_path} due to error: {e}")
        return None




In [9]:
# -------------------------------------------------------------
# Paths
# -------------------------------------------------------------
rgb_path = "D:/Datasets/Datasets/MECCANO/RGB_Frames_Original/0005"
label_csv_path = "D:/Datasets/Datasets/MECCANO/Labels_ExcelFile(1-6)/0005_csv.csv"
output_csv = "Feature_0005_Meccano_AlexNet.csv"

labels_df = pd.read_csv(label_csv_path)


In [11]:

# -------------------------------------------------------------
# Extract RGB Features Only and Save
# -------------------------------------------------------------
S = 10
features_list = []
all_frames_rgb = sorted(os.listdir(rgb_path))[::S]

for frame in tqdm(all_frames_rgb, desc="Extracting RGB Features Only"):
    frame_path = os.path.join(rgb_path, frame)
    rgb_features = extract_features(frame_path, transform)

    if rgb_features is not None:
        frame_number = int(frame.split('_')[-1].split('.')[0])
        label_row = labels_df[(labels_df['StartFrame'] <= frame_number) & (labels_df['EndFrame'] >= frame_number)]

        if not label_row.empty:
            action_class = label_row.iloc[0]['ActionLabel']
        else:
            action_class = 0  # default fallback

        features_list.append([frame, action_class] + rgb_features.tolist())

# ----------------- Save to CSV -----------------
if len(features_list) == 0:
    raise ValueError("No valid features extracted. Check paths and formats.")

columns = ["Frame", "Action_class"] + [f"Feature_{i}" for i in range(len(rgb_features))]
df = pd.DataFrame(features_list, columns=columns)

output_dir = os.path.dirname(output_csv)
if output_dir:
    os.makedirs(output_dir, exist_ok=True)

df.to_csv(output_csv, index=False)
print(f"\n✅ Feature extraction completed. Saved to: {output_csv}")


Extracting RGB Features Only: 100%|██████████████████████████████████████████████████| 676/676 [00:23<00:00, 28.71it/s]



✅ Feature extraction completed. Saved to: Feature_0005_Meccano_AlexNet.csv
