Channel PCA captures most significant patterns across frames for each channel, potentially highlighting the most prominent changes or features in the video content for that specific channel.
**Do we want that or are there better ways to capture spatial features per frame?**

In [1]:
import os
import pickle
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from tqdm import tqdm

In [2]:
def find_repo_root(path='.'):
    path = os.path.abspath(path)
    while not os.path.isdir(os.path.join(path, '.git')):
        parent = os.path.dirname(path)
        if parent == path:
            # We've reached the root of the file system without finding '.git'
            return None
        path = parent
    return path

repo_root = find_repo_root()
print("Repository Root:", repo_root)

Repository Root: C:\Users\marce\PycharmProjects\Brainvision_Project


In [3]:
def get_full_path(relative_path, repo_root):
    if not repo_root:
        raise ValueError("Repository root not found. Ensure you're inside a Git repository.")

    return os.path.join(repo_root, relative_path)


In [4]:
# step 1 function
def load_and_combine_tensors(stage_name, input_folder, num_videos):
    combined_tensor = []
    video_indices = {}

    for video_id in range(1, num_videos + 1):
        filename = f"{str(video_id).zfill(4)}_{stage_name}.pkl"
        file_path = os.path.join(input_folder, stage_name, filename)

        if os.path.exists(file_path):
            #print(f"Loading tensor from: {file_path}")
            with open(file_path, 'rb') as file:
                tensor = pickle.load(file)
                combined_tensor.append(tensor)
                # Track start and end indices for each video
                end_index = sum(t.shape[0] for t in combined_tensor)
                video_indices[str(video_id).zfill(4)] = (end_index - tensor.shape[0], end_index)

    if not combined_tensor:
        print("No tensors found to combine.")
        return None, None

    combined_tensor = np.concatenate(combined_tensor, axis=0)
    return combined_tensor, video_indices


In [5]:
# Step 2: globalized standardization (only based on training set)
def standardize_tensors(combined_tensor, video_indices, training_end_id='0005'):
    reshaped_tensor = combined_tensor.reshape(combined_tensor.shape[0], -1)
    scaler = StandardScaler()

    # Find the end index of the training set
    training_end_index = video_indices[training_end_id][1]

    # Fit the scaler only on the training set
    scaler.fit(reshaped_tensor[:training_end_index])

    # Transform both training and test sets
    standardized_data = scaler.transform(reshaped_tensor)
    
    return standardized_data.reshape(combined_tensor.shape)

In [7]:
input_folder = 'preprocessed_videos_30frames'
output_folder = os.getcwd()
stages = ["stage_5"] # ["stage_2", "stage_3", "stage_4", "stage_5"] # success: ["stage_1"]
n_components = 0.95
stage_name = "stage_5"


current_working_directory = os.getcwd()
stage_folder = os.path.join(current_working_directory, input_folder, stage_name)
print("Attempting to access:", stage_folder)

if not os.path.exists(stage_folder):
    print("Directory not found:", stage_folder)

# Calculate the number of video files in the folder
num_videos = 5 # len([f for f in os.listdir(stage_folder) if os.path.isfile(os.path.join(stage_folder, f))])
print(f"Number of videos found: {num_videos}")

# Step 1: Load and combine tensors
combined_tensor, video_indices = load_and_combine_tensors(stage_name, input_folder, num_videos)
print("Step 1 done.")

# Step 2: Globally standardize the tensor
standardized_tensor = standardize_tensors(combined_tensor, video_indices)

Attempting to access: C:\Users\marce\PycharmProjects\Brainvision_Project\preprocessed_videos_30frames\stage_5
Number of videos found: 5
Step 1 done.


In [8]:
standardized_tensor.shape

(150, 1, 7, 7, 2048)

In [17]:
def apply_fpca_and_save(standardized_tensor, stage_name, output_folder, n_components, training_end_id=800, seed=42):
    # create filter batches for pca
    num_batches = 8
    batch_size = standardized_tensor.shape[-1] // num_batches
    # Calculate the product of the spatial dimensions
    spatial_dims_prod = np.prod(standardized_tensor.shape[2:4])
    
    pca_results = []

    for i in range(num_batches):
        # Extract the batch
        batch = standardized_tensor[..., i*batch_size : (i+1)*batch_size]
        # Reshape the batch for PCA: flattening the spatial dimensions and channels
        reshaped_batch = batch.reshape(-1, spatial_dims_prod*batch_size)
        print(f"Shape of flattened filter batch {i}: {reshaped_batch.shape}")

        # Apply PCA
        pca = PCA(n_components=n_components, random_state=seed)
        pca.fit(reshaped_batch[:training_end_id, :])
        pca_result = pca.transform(reshaped_batch)
        print(f"Number of PCs in filter batch {i}: {pca_result.shape}")
        # Append the PCA result
        pca_results.append(pca_result)
        
        # print(len(pca_results))

    # Concatenate the PCA results from all slices
    final_pca_results = np.concatenate(pca_results, axis=1)
    print(final_pca_results.shape)
    # save PCA results
    pca_folder = os.path.join(output_folder, f"PCA_filter_{n_components}", stage_name)
    if not os.path.exists(pca_folder):
        os.makedirs(pca_folder)
    
    stage_number = stage_name[-1]
    file_path = os.path.join(pca_folder, f'layer_{stage_number}_pca.pkl')
    with open(file_path, 'wb') as f:
        pickle.dump(final_pca_results, f)
    
    print(f"{stage_name} PCs stored in: {file_path}")

In [19]:
def process_stage_for_pca(input_folder, output_folder, stage_name):
    """
    Process all videos of a given stage: standardize, apply PCA, and save the PCA-transformed tensors.
    Args:
    - input_folder: Folder containing the pre-processed videos.
    - output_folder: Folder to save PCA results.
    - stage_name: Name of the stage to process.
    Returns:
    - DataFrame containing metadata (video ID and variance captured).
    """
     # Use the current working directory or a known absolute path
    current_working_directory = os.getcwd()
    stage_folder = os.path.join(current_working_directory, input_folder, stage_name)
    print("Attempting to access:", stage_folder)

    if not os.path.exists(stage_folder):
        print("Directory not found:", stage_folder)
        return None
    # Calculate the number of video files in the folder
    num_videos = 5 # len([f for f in os.listdir(stage_folder) if os.path.isfile(os.path.join(stage_folder, f))])
    print(f"Number of videos found: {num_videos}")

    # Step 1: Load and combine tensors
    combined_tensor, video_indices = load_and_combine_tensors(stage_name, input_folder, num_videos)
    print("Step 1 done.")
    
    # Step 2: Globally standardize the tensor
    standardized_tensor = standardize_tensors(combined_tensor, video_indices)
    
    # # save standardized tensors
    # st_folder = os.path.join(output_folder, f"fm_standardized", stage_name)
    # if not os.path.exists(st_folder):
    #     os.makedirs(st_folder)
    # file_path = os.path.join(st_folder, f'fm_standardized_{stage_name}.pkl')
    # with open(file_path, 'wb') as f:
    #     pickle.dump(standardized_tensor, f)
    # print(f"{stage_name} standardized tensors stored in: {file_path}")
    
    print("Step 2 done.")

    # Step 3: Apply PCA to each tensor and save the result
    apply_fpca_and_save(standardized_tensor, stage_name, output_folder, n_components)
    print("Step 3 done.")

In [21]:
print(os.getcwd())
print(repo_root)

/work
None


In [20]:
# Example usage
input_folder = 'preprocessed_videos_30frames'
output_folder = os.getcwd()
stages = ["stage_5"] # ["stage_2", "stage_3", "stage_4", "stage_5"] # success: ["stage_1"]
n_components = 0.95

# Iterate over each stage and process it
for stage in stages:
    print(f"Processing {stage}...")
    process_stage_for_pca(input_folder, output_folder, stage)


Processing stage_5...
Attempting to access: C:\Users\marce\PycharmProjects\Brainvision_Project\preprocessed_videos_30frames\stage_5
Number of videos found: 5
Step 1 done.
Step 2 done.
Shape of flattened filter batch 0: (150, 12544)
Number of PCs in filter batch 0: (150, 112)
Shape of flattened filter batch 1: (150, 12544)
Number of PCs in filter batch 1: (150, 112)
Shape of flattened filter batch 2: (150, 12544)
Number of PCs in filter batch 2: (150, 113)
Shape of flattened filter batch 3: (150, 12544)
Number of PCs in filter batch 3: (150, 113)
Shape of flattened filter batch 4: (150, 12544)
Number of PCs in filter batch 4: (150, 111)
Shape of flattened filter batch 5: (150, 12544)
Number of PCs in filter batch 5: (150, 113)
Shape of flattened filter batch 6: (150, 12544)
Number of PCs in filter batch 6: (150, 114)
Shape of flattened filter batch 7: (150, 12544)
Number of PCs in filter batch 7: (150, 113)
(150, 901)
stage_5 PCs stored in: C:\Users\marce\PycharmProjects\Brainvision_Pro

In [None]:
#-----------------------------------------------------------------------

In [None]:
# zip pca folder
directory_to_zip = "PCA_slice_"  # Replace with your directory name
output_filename = "PCA_channel_0.95_dataset"  # Replace with your desired output name
output_path = os.path.join(os.getcwd(), output_filename)
shutil.make_archive(output_path, 'zip', directory_to_zip)

In [13]:
stage = "stage_1"
stage_nr = stage[-1]
path = f"PCA_slice_0.95/{stage}/layer_{stage_nr}_pca.pkl"
with open(path, 'rb') as f:
    pcs = pickle.load(f)

print(pcs.shape)

(1000, 187)
