In [1]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torchvision.models import resnet50
from PIL import Image
from tqdm import tqdm

In [2]:
###########################################################
# Define DEFT Module Components
###########################################################

class LocalizationNetwork(nn.Module):
    def __init__(self, input_channels):
        super(LocalizationNetwork, self).__init__()
        self.conv1 = nn.Conv2d(input_channels, 8, kernel_size=7)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(8, 10, kernel_size=5)

        # Placeholder fc1 (will update after shape calculation)
        self.fc1 = nn.Linear(1, 32)  # Temporary placeholder
        self.fc2 = nn.Linear(32, 6)  # 6 affine parameters

        # Initialize weights for identity transformation
        self.fc2.weight.data.zero_()
        self.fc2.bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))

        # Dynamically compute the flattened size
        if not hasattr(self, 'computed_fc1'):
            flattened_size = x.view(x.shape[0], -1).shape[1]
            self.fc1 = nn.Linear(flattened_size, 32).to(x.device)
            self.computed_fc1 = True  # Prevent re-initialization

        x = x.view(x.shape[0], -1)  # Flatten
        x = F.relu(self.fc1(x))
        theta = self.fc2(x)
        theta = theta.view(-1, 2, 3)
        return theta

class WeightingModule(nn.Module):
    def __init__(self, sigma=0.5):
        super(WeightingModule, self).__init__()
        self.lambda_param = nn.Parameter(torch.tensor(0.5))
        self.sigma = sigma

    def forward(self, grid):
        dist2 = grid[..., 0]**2 + grid[..., 1]**2
        weight = 1 + self.lambda_param * torch.exp(-dist2 / (2 * self.sigma ** 2))
        return weight.unsqueeze(-1)

class DEFTModule(nn.Module):
    def __init__(self, input_channels, sigma=0.5):
        super(DEFTModule, self).__init__()
        self.localization = LocalizationNetwork(input_channels)
        self.weighting = WeightingModule(sigma)

    def forward(self, x):
        theta = self.localization(x)
        grid = F.affine_grid(theta, x.size(), align_corners=False)
        weight = self.weighting(grid)
        x_transformed = F.grid_sample(x, grid, align_corners=False)

        if x.shape[1] > 1:
            weight = weight.expand(-1, x.shape[2], x.shape[3], x.shape[1]).permute(0, 3, 1, 2)
        else:
            weight = weight.permute(0, 3, 1, 2)

        x_weighted = x_transformed * weight
        return x_weighted


In [4]:
###########################################################
# Load ResNet50 Model for Feature Extraction
###########################################################
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

resnet_model = resnet50(pretrained=True)
resnet_model = torch.nn.Sequential(*list(resnet_model.children())[:-1])  # Remove classification layer
resnet_model.eval().to(device)

# Load DEFT Module
deft_model = DEFTModule(input_channels=3).to(device)  # Input channels = 3 (RGB)
deft_model.eval()

###########################################################
# Define Image Transformation
###########################################################
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


###########################################################
# Feature Extraction Function with DEFT
###########################################################
def extract_features(image_path, transform, device=device):
    """Extract ResNet50 features from a single image after DEFT transformation."""
    try:
        image = Image.open(image_path).convert('RGB')
        image = transform(image).unsqueeze(0).to(device)  # Convert to tensor and send to GPU

        # Apply DEFT Transformation
        with torch.no_grad():
            image = deft_model(image)  # Pass through DEFT Module
            features = resnet_model(image).squeeze().cpu().numpy()  # Extract ResNet50 features
        
        return features
    except Exception as e:
        print(f"Skipping frame: {image_path} due to error: {e}")
        return None



###########################################################
# Paths to RGB Directory and Label File
###########################################################
rgb_path = "D:/Datasets/Datasets/GTEA/GTEA_NEW/S2_Cheese_C1"
label_csv_path = "D:/Datasets/Datasets/GTEA/GTEA_NEW/Label_CSV/S2_Cheese_C1.csv"
labels_df = pd.read_csv(label_csv_path)

# Output CSV for Features
output_csv = "../SavedFeatures/Feature_0005_RGB_GTEA_Sampled.csv"



In [5]:
###########################################################
# Extract Features with DEFT & Save
###########################################################
S = 1  # Sampling every 5th frame
features_list = []

all_frames = sorted(os.listdir(rgb_path))[::S]

for frame in tqdm(all_frames, desc="Extracting Features with DEFT (RGB only)"):
    rgb_frame_path = os.path.join(rgb_path, frame)
    
    # Extract only RGB features
    rgb_features = extract_features(rgb_frame_path, transform, device)
    
    if rgb_features is not None:
        # Find frame number
        frame_number = int(frame.split('_')[-1].split('.')[0])
        
        # Match frame to action label
        label_row = labels_df[(labels_df['StartFrame'] <= frame_number) & (labels_df['EndFrame'] >= frame_number)]
        
        if not label_row.empty:
            action_label = label_row.iloc[0]['ActionLabel']
            action_name = label_row.iloc[0]['ActionName']
        else:
            action_label, action_name = 0, "Unknown"  # Default if no label found
        
        # Add to feature list
        features_list.append([frame, action_label, action_name] + rgb_features.tolist())



# Handle case where no valid features were extracted
if len(features_list) == 0:
    raise ValueError("No valid features extracted. Please check the dataset paths and feature extraction function.")

# Create DataFrame and Save to CSV
columns = ["Frame", "ActionLabel", "ActionName"] + [f"Feature_{i}" for i in range(len(rgb_features))]
df = pd.DataFrame(features_list, columns=columns)
df.to_csv(output_csv, index=False)

print(f"Feature extraction completed! Saved to {output_csv}")



Extracting Features with DEFT (RGB only): 100%|██████████████████████████████████████| 634/634 [00:27<00:00, 23.12it/s]


Feature extraction completed! Saved to ../SavedFeatures/Feature_0005_RGB_GTEA_Sampled.csv


In [7]:
f=pd.read_csv("../SavedFeatures/Feature_0005_RGB_GTEA_Sampled.csv")
f

Unnamed: 0,Frame,ActionLabel,ActionName,Feature_0,Feature_1,Feature_2,Feature_3,Feature_4,Feature_5,Feature_6,...,Feature_2038,Feature_2039,Feature_2040,Feature_2041,Feature_2042,Feature_2043,Feature_2044,Feature_2045,Feature_2046,Feature_2047
0,frame_0000.jpg,0,Unknown,0.672702,0.632684,0.429582,0.172300,1.890879,0.322251,0.223466,...,0.404167,0.187437,0.267517,0.284633,0.419673,0.094880,0.295313,0.259225,1.177268,0.728167
1,frame_0001.jpg,0,Unknown,0.608917,0.663221,0.323042,0.136267,1.778407,0.197399,0.200829,...,0.314054,0.209137,0.377932,0.319331,0.493963,0.117412,0.372072,0.305326,1.236573,0.678703
2,frame_0002.jpg,0,Unknown,0.534393,0.691548,0.324666,0.227510,1.681171,0.223980,0.209489,...,0.274337,0.210229,0.358059,0.256999,0.597587,0.104436,0.269609,0.328924,1.068935,0.578926
3,frame_0003.jpg,0,Unknown,0.721208,0.560013,0.398591,0.315795,1.725095,0.292977,0.222457,...,0.335489,0.186857,0.390643,0.177100,0.463020,0.078972,0.368051,0.190583,0.886433,0.599541
4,frame_0004.jpg,0,Unknown,0.676643,0.479319,0.451283,0.290430,1.577147,0.169950,0.198903,...,0.328434,0.189764,0.275102,0.221035,0.644674,0.105612,0.304131,0.190870,1.007153,0.557135
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
629,frame_0629.jpg,0,Unknown,0.771166,0.864095,1.185811,0.121982,2.136732,0.342709,0.803857,...,0.165685,0.480680,0.722815,1.006872,1.112097,0.064192,1.113890,0.282670,1.572460,0.619470
630,frame_0630.jpg,0,Unknown,0.614610,0.761205,0.839294,0.104549,2.176075,0.312590,0.658513,...,0.233371,0.624845,0.976592,1.014247,1.166884,0.078205,0.777822,0.258261,1.840842,0.646813
631,frame_0631.jpg,0,Unknown,0.630541,0.756575,0.977241,0.125201,1.928457,0.328043,0.607180,...,0.238999,0.722437,1.128379,0.767957,1.193334,0.054957,0.671840,0.309554,1.993572,0.744509
632,frame_0632.jpg,0,Unknown,0.514133,0.496670,0.608805,0.137088,2.252193,0.185065,0.711322,...,0.289810,0.377427,0.944381,0.887915,0.894849,0.044464,0.765889,0.377734,1.967176,0.818945
