In [54]:
import torch
import numpy as np
import os

def load_and_prepare_tensor(file_path):
    data = np.load(file_path)
    if data.shape != (224, 224, 32):
        raise ValueError(f"Unexpected shape of data: {data.shape}")
    tensor = torch.from_numpy(data).permute(2, 0, 1).unsqueeze(0)
    return tensor

def extract_activations(model, input_tensor):
    # Forward pass the input tensor through the model
    activations = {}
    x = input_tensor
    for name, layer in model.named_children():
        x = layer(x)
        print(f"Layer name: {name}, Output shape: {x.shape}")  # Add this line for debugging
        if name == "layer4" and isinstance(layer, nn.Sequential):
            activations["layer4.2"] = x  # Last layer of "layer4"
        elif name == "avgpool" and isinstance(layer, nn.AdaptiveAvgPool2d):
            activations["avgpool"] = x
    return activations

def process_folder(folder_path, activations_dir, max_files=1000):
    files = [os.path.join(folder_path, f) for f in sorted(os.listdir(folder_path)) if f.endswith('.npy')]
    files = files[:max_files]

    # Move the model to GPU if CUDA is available
    if torch.cuda.is_available():
        model = flow_resnet50(pretrained=True).to('cuda')
    else:
        model = flow_resnet50(pretrained=True)

    for file in files:
        # Load and prepare the tensor
        tensor = load_and_prepare_tensor(file)

        # Move the tensor to GPU if CUDA is available
        if torch.cuda.is_available():
            tensor = tensor.to('cuda')

        # Extract activations from layer 4 and avg_pool
        activations = extract_activations(model, tensor)

        # Save activations for this file
        file_name = os.path.splitext(os.path.basename(file))[0]
        for layer_name, activation in activations.items():
            save_path = os.path.join(activations_dir, f"{file_name}_{layer_name}.npy")
            np.save(save_path, activation.data.cpu().numpy().ravel())

In [66]:
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
from torchvision.models.resnet import ResNet, Bottleneck

model_urls = {
    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
}

def modify_first_conv_layer(model, new_in_channels=32, pretrained=True):
    if pretrained:
        # Load the pre-trained weights
        state_dict = model_zoo.load_url(model_urls['resnet50'])
        old_weights = state_dict['conv1.weight']

        # Calculate the new weights
        new_weights = torch.mean(old_weights, dim=1, keepdim=True)
        new_weights = new_weights.repeat(1, new_in_channels, 1, 1)

        # Update the first convolutional layer
        model.conv1 = nn.Conv2d(new_in_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
        model.conv1.weight.data = new_weights

        # Remove the original conv1 weights from the state_dict and load the modified state_dict
        del state_dict['conv1.weight']
        model.load_state_dict(state_dict, strict=False)
    else:
        # If not using pretrained weights, just modify the input channels
        model.conv1 = nn.Conv2d(new_in_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)

    return model

def remove_fc_weights(state_dict):
    # Remove the fully connected layer weights from the state_dict
    new_state_dict = {}
    for key, value in state_dict.items():
        if not key.startswith('fc.'):
            new_state_dict[key] = value
    return new_state_dict

class Identity(nn.Module):
    def forward(self, x):
        return x

def remove_fc_layer(model):
    # Replace the fully connected layer with an identity layer
    model.fc = Identity()
    return model

def flow_resnet50(pretrained=True, num_classes=1000, **kwargs):
    """Constructs a modified ResNet-50 model for optical flow with changed input channels."""
    model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, **kwargs)
    model = modify_first_conv_layer(model, new_in_channels=32, pretrained=False)
    
    if pretrained:
        # Load the pretrained weights, excluding the fully connected layer weights
        pretrained_state_dict = model_zoo.load_url(model_urls['resnet50'])
        pretrained_state_dict = remove_fc_weights(pretrained_state_dict)
        # Remove 'conv1.weight' from the pre-trained state_dict
        pretrained_state_dict.pop('conv1.weight', None)
        model.load_state_dict(pretrained_state_dict, strict=False)
    
    model = remove_fc_layer(model)
    
    return model

def modify_and_load_model():
    model = ResNet(Bottleneck, [3, 4, 6, 3])
    model = modify_first_conv_layer(model, new_in_channels=32, pretrained=True)

    # Load the pre-trained state dictionary, excluding 'conv1.weight'
    state_dict = model_zoo.load_url(model_urls['resnet50'])
    del state_dict['conv1.weight']
    model.load_state_dict(state_dict, strict=False)

    model = remove_fc_layer(model)
    return model

In [67]:
model = flow_resnet50(pretrained=True, num_classes=1000)
model = remove_fc_layer(model)
model = modify_and_load_model()
folder_path = r"C:\Users\andre\OneDrive\Documents\GitHub\Brainvision_Project\Motion_files\Optical_flow\stacked_img"
activations_dir = r"C:\Users\andre\OneDrive\Documents\GitHub\Brainvision_Project\Motion_files\Layers-features\REsnet_50_stackedflow"
process_folder(folder_path, activations_dir)

Layer name: conv1, Output shape: torch.Size([1, 64, 112, 112])
Layer name: bn1, Output shape: torch.Size([1, 64, 112, 112])
Layer name: relu, Output shape: torch.Size([1, 64, 112, 112])
Layer name: maxpool, Output shape: torch.Size([1, 64, 56, 56])
Layer name: layer1, Output shape: torch.Size([1, 256, 56, 56])
Layer name: layer2, Output shape: torch.Size([1, 512, 28, 28])
Layer name: layer3, Output shape: torch.Size([1, 1024, 14, 14])
Layer name: layer4, Output shape: torch.Size([1, 2048, 7, 7])
Layer name: avgpool, Output shape: torch.Size([1, 2048, 1, 1])
Layer name: fc, Output shape: torch.Size([1, 2048, 1, 1])
Layer name: conv1, Output shape: torch.Size([1, 64, 112, 112])
Layer name: bn1, Output shape: torch.Size([1, 64, 112, 112])
Layer name: relu, Output shape: torch.Size([1, 64, 112, 112])
Layer name: maxpool, Output shape: torch.Size([1, 64, 56, 56])
Layer name: layer1, Output shape: torch.Size([1, 256, 56, 56])
Layer name: layer2, Output shape: torch.Size([1, 512, 28, 28])
Lay

In [79]:
import os
import glob
import numpy as np
from tqdm import tqdm
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

def do_PCA_and_save(activations_dir, save_dir, layers, train_range, test_range, val_range, n_components=100):
    """
    This function preprocesses Neural Network features using PCA and saves the results
    in a specified directory.

    Parameters
    ----------
    activations_dir : str
        Path for extracted features.
    save_dir : str
        Path for extracted PCA features.
    layers : list
        List of strings with layer names to perform PCA.
    train_range : tuple
        Range of indices for training data (start, end).
    test_range : tuple
        Range of indices for testing data (start, end).
    val_range : tuple
        Range of indices for validation data (start, end).
    n_components : int, optional
        Number of components for PCA, default is 100.
    """
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    for layer in tqdm(layers):
        activations_file_list = glob.glob(os.path.join(activations_dir, f"*{layer}*"))
        activations_file_list.sort()
        print(activations_file_list)
        
        # Load all activations into a numpy array
        feature_dim = np.load(activations_file_list[0]).shape[0]
        x = np.array([np.load(f) for f in activations_file_list])

        # Split data into train, test, and validation sets
        x_train = x[train_range[0]:train_range[1], :]
        x_test = x[test_range[0]:test_range[1], :]
        x_val = x[val_range[0]:val_range[1], :]

        # Standardize features by removing the mean and scaling to unit variance
        scaler = StandardScaler()
        x_train = scaler.fit_transform(x_train)
        x_test = scaler.transform(x_test)
        x_val = scaler.transform(x_val)

        # Apply PCA
        pca = PCA(n_components=n_components)
        pca.fit(x_train)

        # Transform data using PCA
        x_train_pca = pca.transform(x_train)
        x_test_pca = pca.transform(x_test)
        x_val_pca = pca.transform(x_val)

        # Save the PCA transformed data
        np.save(os.path.join(save_dir, f"train_{layer}.npy"), x_train_pca)
        np.save(os.path.join(save_dir, f"test_{layer}.npy"), x_test_pca)
        np.save(os.path.join(save_dir, f"val_{layer}.npy"), x_val_pca)

# Example usage
activations_dir = r"C:\Users\andre\OneDrive\Documents\GitHub\Brainvision_Project\Motion_files\Layers-features\REsnet_50_stackedflow"
save_dir = r"C:\Users\andre\OneDrive\Documents\GitHub\Brainvision_Project\Motion_files\PCA"
layers = ["layer4", "avgpool"]
do_PCA_and_save(activations_dir, save_dir, layers, (1, 800), (801, 900), (901, 1000))


  0%|          | 0/2 [00:00<?, ?it/s]

['C:\\Users\\andre\\OneDrive\\Documents\\GitHub\\Brainvision_Project\\Motion_files\\Layers-features\\REsnet_50_stackedflow\\0001_stackedopticalflow_layer4.2.npy', 'C:\\Users\\andre\\OneDrive\\Documents\\GitHub\\Brainvision_Project\\Motion_files\\Layers-features\\REsnet_50_stackedflow\\0002_stackedopticalflow_layer4.2.npy', 'C:\\Users\\andre\\OneDrive\\Documents\\GitHub\\Brainvision_Project\\Motion_files\\Layers-features\\REsnet_50_stackedflow\\0003_stackedopticalflow_layer4.2.npy', 'C:\\Users\\andre\\OneDrive\\Documents\\GitHub\\Brainvision_Project\\Motion_files\\Layers-features\\REsnet_50_stackedflow\\0004_stackedopticalflow_layer4.2.npy', 'C:\\Users\\andre\\OneDrive\\Documents\\GitHub\\Brainvision_Project\\Motion_files\\Layers-features\\REsnet_50_stackedflow\\0005_stackedopticalflow_layer4.2.npy', 'C:\\Users\\andre\\OneDrive\\Documents\\GitHub\\Brainvision_Project\\Motion_files\\Layers-features\\REsnet_50_stackedflow\\0006_stackedopticalflow_layer4.2.npy', 'C:\\Users\\andre\\OneDrive

 50%|█████     | 1/2 [00:03<00:03,  3.37s/it]

['C:\\Users\\andre\\OneDrive\\Documents\\GitHub\\Brainvision_Project\\Motion_files\\Layers-features\\REsnet_50_stackedflow\\0001_stackedopticalflow_avgpool.npy', 'C:\\Users\\andre\\OneDrive\\Documents\\GitHub\\Brainvision_Project\\Motion_files\\Layers-features\\REsnet_50_stackedflow\\0002_stackedopticalflow_avgpool.npy', 'C:\\Users\\andre\\OneDrive\\Documents\\GitHub\\Brainvision_Project\\Motion_files\\Layers-features\\REsnet_50_stackedflow\\0003_stackedopticalflow_avgpool.npy', 'C:\\Users\\andre\\OneDrive\\Documents\\GitHub\\Brainvision_Project\\Motion_files\\Layers-features\\REsnet_50_stackedflow\\0004_stackedopticalflow_avgpool.npy', 'C:\\Users\\andre\\OneDrive\\Documents\\GitHub\\Brainvision_Project\\Motion_files\\Layers-features\\REsnet_50_stackedflow\\0005_stackedopticalflow_avgpool.npy', 'C:\\Users\\andre\\OneDrive\\Documents\\GitHub\\Brainvision_Project\\Motion_files\\Layers-features\\REsnet_50_stackedflow\\0006_stackedopticalflow_avgpool.npy', 'C:\\Users\\andre\\OneDrive\\Docu

100%|██████████| 2/2 [00:03<00:00,  1.91s/it]


In [82]:
pca_look = np.load(r"C:\Users\andre\OneDrive\Documents\GitHub\Brainvision_Project\Motion_files\PCA\train_avgpool.npy", allow_pickle=True)
pca_look.shape

(799, 100)