Channel PCA captures most significant patterns across frames for each channel, potentially highlighting the most prominent changes or features in the video content for that specific channel.
**Do we want that or are there better ways to capture spatial features per frame?**

In [76]:
import os
import pickle
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from tqdm import tqdm

In [77]:
def find_repo_root(path='.'):
    path = os.path.abspath(path)
    while not os.path.isdir(os.path.join(path, '.git')):
        parent = os.path.dirname(path)
        if parent == path:
            # We've reached the root of the file system without finding '.git'
            return None
        path = parent
    return path

repo_root = find_repo_root()
print("Repository Root:", repo_root)

Repository Root: C:\Users\marce\PycharmProjects\Brainvision_Project


In [78]:
def get_full_path(relative_path, repo_root):
    if not repo_root:
        raise ValueError("Repository root not found. Ensure you're inside a Git repository.")

    return os.path.join(repo_root, relative_path)


In [84]:
# step 1 function
def load_and_combine_tensors(stage_name, input_folder, num_videos):
    combined_tensor = []
    video_indices = {}

    for video_id in range(1, num_videos + 1):
        filename = f"{str(video_id).zfill(4)}_{stage_name}.pkl"
        file_path = os.path.join(input_folder, stage_name, filename)

        if os.path.exists(file_path):
            #print(f"Loading tensor from: {file_path}")
            with open(file_path, 'rb') as file:
                tensor = pickle.load(file)
                combined_tensor.append(tensor)
                # Track start and end indices for each video
                end_index = sum(t.shape[0] for t in combined_tensor)
                video_indices[str(video_id).zfill(4)] = (end_index - tensor.shape[0], end_index)

    if not combined_tensor:
        print("No tensors found to combine.")
        return None, None

    combined_tensor = np.concatenate(combined_tensor, axis=0)
    print(combined_tensor.shape)
    return combined_tensor, video_indices


In [85]:
# Step 2: globalized standardization (only based on training set)
def standardize_tensors(combined_tensor, video_indices, training_end_id='0005'):
    reshaped_tensor = combined_tensor.reshape(combined_tensor.shape[0], -1)
    scaler = StandardScaler()

    # Find the end index of the training set
    training_end_index = video_indices[training_end_id][1]

    # Fit the scaler only on the training set
    scaler.fit(reshaped_tensor[:training_end_index])

    # Transform both training and test sets
    standardized_data = scaler.transform(reshaped_tensor)
    print(standardized_data.reshape(combined_tensor.shape).shape)
    return standardized_data.reshape(combined_tensor.shape)

In [86]:
# Step 3: Separate the standardized tensor back into individual tensors
def separate_standardized_tensor(standardized_tensor, video_indices):
    separated_tensors = {}
    for video_id, (start, end) in video_indices.items():
        separated_tensors[video_id] = standardized_tensor[start:end, :]
        print(separated_tensors[video_id].shape)
    return separated_tensors

In [103]:
# to get same number of PCs for each video: fit PCA on all videos with a given variance threshold. Find max number of components. Fit and transform PCA with max number of components. -> makes sure that variance captured in each video is >= variance_ratio.

def apply_fmpca_and_save(separated_tensors, stage_name, output_folder, variance_ratio):
    # Assuming 'separated_tensors' is your dictionary with video IDs as keys and feature maps as values
    
    # Initialize a dictionary to store the final PCA results for each video
    final_pca_results = {}
    # Determine the spatial dimensions product from the first tensor
    first_feature_map = next(iter(separated_tensors.values()))
    spatial_dims_product = np.prod(first_feature_map.shape[2:4])  # Assuming spatial dimensions are in 3rd and 4th place
    
    # Step 1: Determine the maximum number of components needed
    max_components = 0
    for feature_maps in tqdm(separated_tensors.values(), desc="Finding max. number of PCs..."):

        for channel in range(feature_maps.shape[-1]):
            pca = PCA(n_components=variance_ratio)
            data_for_channel = feature_maps[..., channel].reshape(-1, spatial_dims_product)
            pca.fit(data_for_channel)
            max_components = max(max_components, pca.n_components_)
    
    print(f"Max. number of PCs: {max_components}")
    
    # Step 2: Apply PCA with the determined number of components
    for video_id, feature_maps in tqdm(separated_tensors.items(), desc="Performing PCA..."):
        pca_results = []
    
        # Loop over each channel
        for channel in range(feature_maps.shape[-1]):
            # Reshape the data for this channel
            data_for_pca = feature_maps[..., channel].reshape(-1, spatial_dims_product)
    
            # Apply PCA with the maximum number of components
            pca = PCA(n_components=max_components)
            pca_result = pca.fit_transform(data_for_pca)
    
            pca_results.append(pca_result)
    
        # Concatenate the PCA results from all channels for this video
        final_result = np.concatenate(pca_results, axis=1)
    
        # Store the result in the dictionary with the video ID as the key
        final_pca_results[video_id] = final_result
        print(final_pca_results[video_id].shape)
        # final_pca_results now contains the PCA-transformed data for each video
        # print(f"Processed Video ID: {video_id}, Resulting Shape: {final_result.shape}")
    
    pca_folder = os.path.join(output_folder, f"PCA_channel_{variance_ratio}", stage_name)
    if not os.path.exists(pca_folder):
        os.makedirs(pca_folder)
        
    file_path = os.path.join(pca_folder, 'pca_results.pkl')
    with open(file_path, 'wb') as f:
        pickle.dump(final_pca_results, f)
    
    print(f"{stage_name} PCs stored in: {file_path}")

In [104]:
def process_stage_for_pca(input_folder, output_folder, stage_name, num_components=30):
    """
    Process all videos of a given stage: standardize, apply PCA, and save the PCA-transformed tensors.
    Args:
    - input_folder: Folder containing the pre-processed videos.
    - output_folder: Folder to save PCA results.
    - stage_name: Name of the stage to process.
    - num_components: Number of PCA components.
    Returns:
    - DataFrame containing metadata (video ID and variance captured).
    """
     # Use the current working directory or a known absolute path
    current_working_directory = os.getcwd()
    stage_folder = os.path.join(current_working_directory, input_folder, stage_name)
    print("Attempting to access:", stage_folder)

    if not os.path.exists(stage_folder):
        print("Directory not found:", stage_folder)
        return None
    # Calculate the number of video files in the folder
    num_videos = 5 # len([f for f in os.listdir(stage_folder) if os.path.isfile(os.path.join(stage_folder, f))])
    print(f"Number of videos found: {num_videos}")

    # Step 1: Load and combine tensors
    combined_tensor, video_indices = load_and_combine_tensors(stage_name, input_folder, num_videos)
    print("Step 1 done.")
    # Step 2: Globally standardize the tensor
    standardized_tensor = standardize_tensors(combined_tensor, video_indices)
    print("Step 2 done.")
    # Step 3: Separate the standardized tensor back into individual tensors
    separated_tensors = separate_standardized_tensor(standardized_tensor, video_indices)
    print("Step 3 done.")
    # Step 4: Apply PCA to each tensor and save the result
    apply_fmpca_and_save(separated_tensors, stage_name, output_folder, variance_ratio)
    print("Step 4 done.")

In [105]:
print(os.getcwd())
print(repo_root)

C:\Users\marce\PycharmProjects\Brainvision_Project
C:\Users\marce\PycharmProjects\Brainvision_Project


In [106]:
# Example usage
input_folder = 'preprocessed_videos_30frames'
output_folder = repo_root
stages = ["stage_1", "stage_2", "stage_3", "stage_4", "stage_5"]
variance_ratio = 0.95

# Iterate over each stage and process it
for stage in stages:
    print(f"Processing {stage}...")
    process_stage_for_pca(input_folder, output_folder, stage)


Processing stage_1...
Attempting to access: C:\Users\marce\PycharmProjects\Brainvision_Project\preprocessed_videos_30frames\stage_1
Number of videos found: 5
(150, 1, 56, 56, 64)
Step 1 done.
(150, 1, 56, 56, 64)
Step 2 done.
(30, 1, 56, 56, 64)
(30, 1, 56, 56, 64)
(30, 1, 56, 56, 64)
(30, 1, 56, 56, 64)
(30, 1, 56, 56, 64)
Step 3 done.


Finding max. number of PCs...: 100%|██████████| 5/5 [00:04<00:00,  1.07it/s]


Max. number of PCs: 26


Performing PCA...:  20%|██        | 1/5 [00:00<00:03,  1.23it/s]

(30, 1664)


Performing PCA...:  40%|████      | 2/5 [00:01<00:02,  1.18it/s]

(30, 1664)


Performing PCA...:  60%|██████    | 3/5 [00:02<00:01,  1.10it/s]

(30, 1664)


Performing PCA...:  80%|████████  | 4/5 [00:03<00:00,  1.04it/s]

(30, 1664)


Performing PCA...: 100%|██████████| 5/5 [00:04<00:00,  1.06it/s]

(30, 1664)
stage_1 PCs stored in: C:\Users\marce\PycharmProjects\Brainvision_Project\PCA_channel_0.95\stage_1\pca_results.pkl
Step 4 done.
Processing stage_2...
Attempting to access: C:\Users\marce\PycharmProjects\Brainvision_Project\preprocessed_videos_30frames\stage_2
Number of videos found: 5





(150, 1, 56, 56, 256)
Step 1 done.
(150, 1, 56, 56, 256)
Step 2 done.
(30, 1, 56, 56, 256)
(30, 1, 56, 56, 256)
(30, 1, 56, 56, 256)
(30, 1, 56, 56, 256)
(30, 1, 56, 56, 256)
Step 3 done.


Finding max. number of PCs...: 100%|██████████| 5/5 [00:19<00:00,  3.96s/it]


Max. number of PCs: 28


Performing PCA...:  20%|██        | 1/5 [00:04<00:16,  4.11s/it]

(30, 7168)


Performing PCA...:  40%|████      | 2/5 [00:07<00:11,  3.93s/it]

(30, 7168)


Performing PCA...:  60%|██████    | 3/5 [00:11<00:07,  3.98s/it]

(30, 7168)


Performing PCA...:  80%|████████  | 4/5 [00:15<00:03,  3.83s/it]

(30, 7168)


Performing PCA...: 100%|██████████| 5/5 [00:19<00:00,  3.94s/it]

(30, 7168)
stage_2 PCs stored in: C:\Users\marce\PycharmProjects\Brainvision_Project\PCA_channel_0.95\stage_2\pca_results.pkl
Step 4 done.
Processing stage_3...
Attempting to access: C:\Users\marce\PycharmProjects\Brainvision_Project\preprocessed_videos_30frames\stage_3
Number of videos found: 5





(150, 1, 28, 28, 512)
Step 1 done.
(150, 1, 28, 28, 512)
Step 2 done.
(30, 1, 28, 28, 512)
(30, 1, 28, 28, 512)
(30, 1, 28, 28, 512)
(30, 1, 28, 28, 512)
(30, 1, 28, 28, 512)
Step 3 done.


Finding max. number of PCs...: 100%|██████████| 5/5 [00:34<00:00,  6.90s/it]


Max. number of PCs: 27


Performing PCA...:  20%|██        | 1/5 [00:05<00:23,  5.80s/it]

(30, 13824)


Performing PCA...:  40%|████      | 2/5 [00:12<00:19,  6.45s/it]

(30, 13824)


Performing PCA...:  60%|██████    | 3/5 [00:21<00:14,  7.34s/it]

(30, 13824)


Performing PCA...:  80%|████████  | 4/5 [00:28<00:07,  7.17s/it]

(30, 13824)


Performing PCA...: 100%|██████████| 5/5 [00:31<00:00,  6.31s/it]

(30, 13824)
stage_3 PCs stored in: C:\Users\marce\PycharmProjects\Brainvision_Project\PCA_channel_0.95\stage_3\pca_results.pkl
Step 4 done.
Processing stage_4...
Attempting to access: C:\Users\marce\PycharmProjects\Brainvision_Project\preprocessed_videos_30frames\stage_4
Number of videos found: 5





(150, 1, 14, 14, 1024)
Step 1 done.
(150, 1, 14, 14, 1024)
Step 2 done.
(30, 1, 14, 14, 1024)
(30, 1, 14, 14, 1024)
(30, 1, 14, 14, 1024)
(30, 1, 14, 14, 1024)
(30, 1, 14, 14, 1024)
Step 3 done.


Finding max. number of PCs...: 100%|██████████| 5/5 [00:12<00:00,  2.46s/it]


Max. number of PCs: 25


Performing PCA...:  20%|██        | 1/5 [00:02<00:11,  2.83s/it]

(30, 25600)


Performing PCA...:  40%|████      | 2/5 [00:05<00:08,  2.76s/it]

(30, 25600)


Performing PCA...:  60%|██████    | 3/5 [00:08<00:05,  2.77s/it]

(30, 25600)


Performing PCA...:  80%|████████  | 4/5 [00:11<00:03,  3.03s/it]

(30, 25600)


Performing PCA...: 100%|██████████| 5/5 [00:14<00:00,  2.92s/it]


(30, 25600)
stage_4 PCs stored in: C:\Users\marce\PycharmProjects\Brainvision_Project\PCA_channel_0.95\stage_4\pca_results.pkl
Step 4 done.
Processing stage_5...
Attempting to access: C:\Users\marce\PycharmProjects\Brainvision_Project\preprocessed_videos_30frames\stage_5
Number of videos found: 5
(150, 1, 7, 7, 2048)
Step 1 done.
(150, 1, 7, 7, 2048)
Step 2 done.
(30, 1, 7, 7, 2048)
(30, 1, 7, 7, 2048)
(30, 1, 7, 7, 2048)
(30, 1, 7, 7, 2048)
(30, 1, 7, 7, 2048)
Step 3 done.


Finding max. number of PCs...: 100%|██████████| 5/5 [00:08<00:00,  1.75s/it]


Max. number of PCs: 20


Performing PCA...:  20%|██        | 1/5 [00:01<00:07,  1.98s/it]

(30, 40960)


Performing PCA...:  40%|████      | 2/5 [00:03<00:06,  2.00s/it]

(30, 40960)


Performing PCA...:  60%|██████    | 3/5 [00:05<00:03,  1.86s/it]

(30, 40960)


Performing PCA...:  80%|████████  | 4/5 [00:07<00:01,  1.81s/it]

(30, 40960)


Performing PCA...: 100%|██████████| 5/5 [00:09<00:00,  1.84s/it]

(30, 40960)
stage_5 PCs stored in: C:\Users\marce\PycharmProjects\Brainvision_Project\PCA_channel_0.95\stage_5\pca_results.pkl
Step 4 done.





In [None]:
-----------------------------------------------------------------------

In [74]:
os.getcwd()

'C:\\Users\\marce\\PycharmProjects\\Brainvision_Project'

In [109]:
for stage in stages:
    path = f"PCA_channel_0.95/{stage}/pca_results.pkl"
    
    with open(path, 'rb') as f:
        loaded_dict = pickle.load(f)
    
    # Display the shape of each value in the dictionary
    shapes = {key: value.shape for key, value in loaded_dict.items()}
    print(stage, shapes)


stage_1 {'0001': (30, 1664), '0002': (30, 1664), '0003': (30, 1664), '0004': (30, 1664), '0005': (30, 1664)}
stage_2 {'0001': (30, 7168), '0002': (30, 7168), '0003': (30, 7168), '0004': (30, 7168), '0005': (30, 7168)}
stage_3 {'0001': (30, 13824), '0002': (30, 13824), '0003': (30, 13824), '0004': (30, 13824), '0005': (30, 13824)}
stage_4 {'0001': (30, 25600), '0002': (30, 25600), '0003': (30, 25600), '0004': (30, 25600), '0005': (30, 25600)}
stage_5 {'0001': (30, 40960), '0002': (30, 40960), '0003': (30, 40960), '0004': (30, 40960), '0005': (30, 40960)}
