In [1]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torchvision.models import resnet50
from PIL import Image
from tqdm import tqdm


In [2]:

###########################################################
# Define DEFT Module Components
###########################################################

class LocalizationNetwork(nn.Module):
    def __init__(self, input_channels):
        super(LocalizationNetwork, self).__init__()
        self.conv1 = nn.Conv2d(input_channels, 8, kernel_size=7)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(8, 10, kernel_size=5)

        # Placeholder fc1 (will update after shape calculation)
        self.fc1 = nn.Linear(1, 32)  # Temporary placeholder
        self.fc2 = nn.Linear(32, 6)  # 6 affine parameters

        # Initialize weights for identity transformation
        self.fc2.weight.data.zero_()
        self.fc2.bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))

        # Dynamically compute the flattened size
        if not hasattr(self, 'computed_fc1'):
            flattened_size = x.view(x.shape[0], -1).shape[1]
            self.fc1 = nn.Linear(flattened_size, 32).to(x.device)
            self.computed_fc1 = True  # Prevent re-initialization

        x = x.view(x.shape[0], -1)  # Flatten
        x = F.relu(self.fc1(x))
        theta = self.fc2(x)
        theta = theta.view(-1, 2, 3)
        return theta

class WeightingModule(nn.Module):
    def __init__(self, sigma=0.5):
        super(WeightingModule, self).__init__()
        self.lambda_param = nn.Parameter(torch.tensor(0.5))
        self.sigma = sigma

    def forward(self, grid):
        dist2 = grid[..., 0]**2 + grid[..., 1]**2
        weight = 1 + self.lambda_param * torch.exp(-dist2 / (2 * self.sigma ** 2))
        return weight.unsqueeze(-1)

class DEFTModule(nn.Module):
    def __init__(self, input_channels, sigma=0.5):
        super(DEFTModule, self).__init__()
        self.localization = LocalizationNetwork(input_channels)
        self.weighting = WeightingModule(sigma)

    def forward(self, x):
        theta = self.localization(x)
        grid = F.affine_grid(theta, x.size(), align_corners=False)
        weight = self.weighting(grid)
        x_transformed = F.grid_sample(x, grid, align_corners=False)

        if x.shape[1] > 1:
            weight = weight.expand(-1, x.shape[2], x.shape[3], x.shape[1]).permute(0, 3, 1, 2)
        else:
            weight = weight.permute(0, 3, 1, 2)

        x_weighted = x_transformed * weight
        return x_weighted






In [3]:
###########################################################
# Load ResNet50 Model for Feature Extraction
###########################################################
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

resnet_model = resnet50(pretrained=True)
resnet_model = torch.nn.Sequential(*list(resnet_model.children())[:-1])  # Remove classification layer
resnet_model.eval().to(device)

# Load DEFT Module
deft_model = DEFTModule(input_channels=3).to(device)  # Input channels = 3 (RGB)
deft_model.eval()

###########################################################
# Define Image Transformation
###########################################################
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


###########################################################
# Feature Extraction Function with DEFT
###########################################################
def extract_features(image_path, transform, device=device):
    """Extract ResNet50 features from a single image after DEFT transformation."""
    try:
        image = Image.open(image_path).convert('RGB')
        image = transform(image).unsqueeze(0).to(device)  # Convert to tensor and send to GPU

        # Apply DEFT Transformation
        with torch.no_grad():
            image = deft_model(image)  # Pass through DEFT Module
            features = resnet_model(image).squeeze().cpu().numpy()  # Extract ResNet50 features
        
        return features
    except Exception as e:
        print(f"Skipping frame: {image_path} due to error: {e}")
        return None





In [9]:
###########################################################
# Paths to RGB Directory and Label File
###########################################################
rgb_path = "D:/Datasets/Datasets/MECCANO/RGB_Frames_Original/0005"
label_csv_path = "D:/Datasets/Datasets/MECCANO/Labels_ExcelFile(1-6)/0005_CSV.csv"
labels_df = pd.read_csv(label_csv_path)

# Output CSV for Features
output_csv = "../SavedFeatures/Feature_0005_RGB_Meccano_Sampled.csv"

###########################################################
# Extract Features with DEFT & Save
###########################################################
S = 6  # Sampling every 5th frame
features_list = []

all_frames = sorted(os.listdir(rgb_path))[::S]

for frame in tqdm(all_frames, desc="Extracting Features with DEFT (RGB only)"):
    rgb_frame_path = os.path.join(rgb_path, frame)
    
    # Extract only RGB features
    rgb_features = extract_features(rgb_frame_path, transform, device)
    
    if rgb_features is not None:
        # Find frame number
        frame_number = int(frame.split('_')[-1].split('.')[0])
        
        # Match frame to action label
        label_row = labels_df[(labels_df['StartFrame'] <= frame_number) & (labels_df['EndFrame'] >= frame_number)]
        
        if not label_row.empty:
            action_label = label_row.iloc[0]['ActionLabel']
            action_name = label_row.iloc[0]['ActionName']
        else:
            action_label, action_name = 0, "Unknown"  # Default if no label found
        
        # Add to feature list
        features_list.append([frame, action_label, action_name] + rgb_features.tolist())



# Handle case where no valid features were extracted
if len(features_list) == 0:
    raise ValueError("No valid features extracted. Please check the dataset paths and feature extraction function.")

# Create DataFrame and Save to CSV
columns = ["Frame", "ActionLabel", "ActionName"] + [f"Feature_{i}" for i in range(len(rgb_features))]
df = pd.DataFrame(features_list, columns=columns)
df.to_csv(output_csv, index=False)

print(f"Feature extraction completed! Saved to {output_csv}")


Extracting Features with DEFT (RGB only): 100%|████████████████████████████████████| 1127/1127 [01:14<00:00, 15.13it/s]


Feature extraction completed! Saved to ../SavedFeatures/Feature_0005_RGB_Meccano_Sampled.csv


In [8]:
f=pd.read_csv("../SavedFeatures/Feature_0005_RGB_Meccano_Sampled.csv")
f

Unnamed: 0,Frame,ActionLabel,ActionName,Feature_0,Feature_1,Feature_2,Feature_3,Feature_4,Feature_5,Feature_6,...,Feature_2038,Feature_2039,Feature_2040,Feature_2041,Feature_2042,Feature_2043,Feature_2044,Feature_2045,Feature_2046,Feature_2047
0,frame_0000.jpg,0,Unknown,0.626021,0.136648,0.814924,0.467652,0.565238,0.912501,0.086076,...,0.266693,0.116132,0.126604,0.168727,0.834818,0.282505,0.234308,0.292932,0.270182,0.421659
1,frame_0010.jpg,0,Unknown,0.600925,0.144724,0.688657,0.136680,0.162919,0.251208,0.199814,...,0.307471,0.230512,0.073927,0.356993,0.075708,0.372889,0.093797,0.163520,0.477284,0.304520
2,frame_0020.jpg,0,Unknown,0.502192,0.098959,0.482710,0.209834,0.375789,0.450500,0.325713,...,0.497056,0.097544,0.044455,0.381480,0.124072,0.355168,0.208703,0.139895,0.364601,0.255063
3,frame_0030.jpg,0,Unknown,0.546299,0.094240,0.416382,0.145025,0.483093,0.228572,0.199605,...,0.266392,0.208721,0.060479,0.397812,0.159045,0.472079,0.311867,0.140286,0.398266,0.180622
4,frame_0040.jpg,0,Unknown,0.380077,0.540423,0.586455,0.184319,0.406975,0.140169,0.600791,...,0.284387,0.352718,0.165158,0.353295,0.318298,0.420781,0.062481,0.087273,0.487244,0.218091
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
671,frame_6710.jpg,0,Unknown,0.700987,1.577711,1.548751,1.206610,0.535822,0.612624,0.249038,...,0.450356,0.053699,0.142810,0.902073,0.192972,0.389225,0.236971,0.380975,0.113422,0.633159
672,frame_6720.jpg,26,put_partial_model,0.739097,1.715547,0.850013,1.197490,0.228857,0.896092,0.370398,...,0.489609,0.085884,0.227601,0.816067,0.222830,0.677138,0.065650,0.307095,0.133619,0.348472
673,frame_6730.jpg,0,Unknown,0.175734,1.064635,0.723253,0.582385,0.246102,0.702970,0.404153,...,0.489417,0.149197,0.029018,0.776966,0.242713,0.302856,0.199337,0.266891,0.775237,0.444381
674,frame_6740.jpg,0,Unknown,0.378772,0.535044,0.378519,0.592495,0.274768,0.439652,0.246924,...,0.307797,0.139914,0.020380,0.556032,0.307466,0.397254,0.051450,0.323076,0.378952,0.267345
