In [42]:
import os
import pickle
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import pandas as pd

In [43]:
def find_repo_root(path='.'):
    path = os.path.abspath(path)
    while not os.path.isdir(os.path.join(path, '.git')):
        parent = os.path.dirname(path)
        if parent == path:
            # We've reached the root of the file system without finding '.git'
            return None
        path = parent
    return path

repo_root = find_repo_root()
print("Repository Root:", repo_root)

Repository Root: C:\Users\celin\PycharmProjects\Brainvision_Project


In [44]:
def get_full_path(relative_path, repo_root):
    if not repo_root:
        raise ValueError("Repository root not found. Ensure you're inside a Git repository.")

    return os.path.join(repo_root, relative_path)


In [70]:
def max_pca_components(stage_folder):
    max_components_per_video = {}
    for filename in os.listdir(stage_folder):
        if filename.endswith('.pkl'):
            file_path = os.path.join(stage_folder, filename)
            with open(file_path, 'rb') as file:
                tensor = pickle.load(file)
                num_samples, num_features = tensor.shape[0], tensor.shape[1]  # Assuming 2D tensor
                max_components = min(num_samples, num_features)
                max_components_per_video[filename] = max_components
    return max_components_per_video

# Example usage
stage_folder = 'C:/Users/celin/PycharmProjects/Brainvision_Project/preprocessed_videos/stage_1'  # Replace with your actual path
max_components = max_pca_components(stage_folder)
for video, components in max_components.items():
    print(f"{video}: Maximum {components} PCA components")

0001_stage_1.pkl: Maximum 1 PCA components
0002_stage_1.pkl: Maximum 1 PCA components
0003_stage_1.pkl: Maximum 1 PCA components
0004_stage_1.pkl: Maximum 1 PCA components
0005_stage_1.pkl: Maximum 1 PCA components
0006_stage_1.pkl: Maximum 1 PCA components
0007_stage_1.pkl: Maximum 1 PCA components
0008_stage_1.pkl: Maximum 1 PCA components
0009_stage_1.pkl: Maximum 1 PCA components
0010_stage_1.pkl: Maximum 1 PCA components


In [45]:
# step 1 function
def load_and_combine_tensors(stage_name, relative_input_folder, num_videos=10):
    repo_root = find_repo_root()
    if not repo_root:
        raise ValueError("Repository root not found. Ensure you're inside a Git repository.")

    combined_tensor = []
    video_indices = {}

    for video_id in range(1, num_videos + 1):
        filename = f"{str(video_id).zfill(4)}_{stage_name}.pkl"
        # Construct the full path
        file_path = os.path.join(repo_root, relative_input_folder, stage_name, filename)

        if os.path.exists(file_path):
            with open(file_path, 'rb') as file:
                tensor = pickle.load(file)
                combined_tensor.append(tensor)
                
                end_index = sum(t.shape[0] for t in combined_tensor)
                video_indices[str(video_id).zfill(4)] = (end_index - tensor.shape[0], end_index)
        else:
            print(f"File not found: {file_path}")  # Optional: Print a message if a file is not found

    if combined_tensor:
        combined_tensor = np.concatenate(combined_tensor, axis=0)
    else:
        print("No tensors found to combine.")
        return None, None

    return combined_tensor, video_indices


In [46]:
test_path = 'Brainvision_Project/preprocessed_videos/stage_1/0001_stage_1.pkl'
print(os.path.exists(test_path))

False


In [63]:
import os
import pickle
import numpy as np

def load_and_combine_tensors(stage_name, input_folder, num_videos=10):
    combined_tensor = []
    video_indices = {}

    for video_id in range(1, num_videos + 1):
        filename = f"{str(video_id).zfill(4)}_{stage_name}.pkl"
        file_path = os.path.join(repo_root, input_folder, stage_name, filename)

        if os.path.exists(file_path):
            print(f"Loading tensor from: {file_path}")  # Print the file being loaded
            with open(file_path, 'rb') as file:
                tensor = pickle.load(file)
                combined_tensor.append(tensor)
                end_index = sum(t.shape[0] for t in combined_tensor)
                video_indices[str(video_id).zfill(4)] = (end_index - tensor.shape[0], end_index)
        else:
            print(f"File not found: {file_path}")  # Indicate if a file is not found

    if combined_tensor:
        combined_tensor = np.concatenate(combined_tensor, axis=0)
    else:
        print("No tensors found to combine.")
        return None, None

    return combined_tensor, video_indices

# Example usage
input_folder = 'preprocessed_videos'  # Replace with the correct path
stage_name = 'stage_1'
combined_tensor, video_indices = load_and_combine_tensors(stage_name, input_folder)


Loading tensor from: C:\Users\celin\PycharmProjects\Brainvision_Project\preprocessed_videos\stage_1\0001_stage_1.pkl
Loading tensor from: C:\Users\celin\PycharmProjects\Brainvision_Project\preprocessed_videos\stage_1\0002_stage_1.pkl
Loading tensor from: C:\Users\celin\PycharmProjects\Brainvision_Project\preprocessed_videos\stage_1\0003_stage_1.pkl
Loading tensor from: C:\Users\celin\PycharmProjects\Brainvision_Project\preprocessed_videos\stage_1\0004_stage_1.pkl
Loading tensor from: C:\Users\celin\PycharmProjects\Brainvision_Project\preprocessed_videos\stage_1\0005_stage_1.pkl
Loading tensor from: C:\Users\celin\PycharmProjects\Brainvision_Project\preprocessed_videos\stage_1\0006_stage_1.pkl
Loading tensor from: C:\Users\celin\PycharmProjects\Brainvision_Project\preprocessed_videos\stage_1\0007_stage_1.pkl
Loading tensor from: C:\Users\celin\PycharmProjects\Brainvision_Project\preprocessed_videos\stage_1\0008_stage_1.pkl
Loading tensor from: C:\Users\celin\PycharmProjects\Brainvision_

In [64]:
# Step 2: globalized standardization
def standardize_tensor(tensor):
    reshaped_tensor = tensor.reshape(tensor.shape[0], -1)
    scaler = StandardScaler()
    standardized_data = scaler.fit_transform(reshaped_tensor)
    return standardized_data.reshape(tensor.shape)

In [65]:
# Step 3: Separate the standardized tensor back into individual tensors
def separate_standardized_tensor(standardized_tensor, video_indices):
    separated_tensors = {}
    for video_id, (start, end) in video_indices.items():
        separated_tensors[video_id] = standardized_tensor[start:end, :]
    return separated_tensors

In [74]:
 # Step 4: Apply PCA to each tensor and save the result
def apply_pca_and_save(tensors, stage_name, output_folder, num_components=30):
    pca_folder = os.path.join(output_folder, 'PCA', stage_name)
    if not os.path.exists(pca_folder):
        os.makedirs(pca_folder)

    metadata = []
    for video_id, tensor in tensors.items():
        pca = PCA(n_components=num_components)
        tensor_reshaped = tensor.reshape(tensor.shape[0], -1)
        tensor_pca = pca.fit_transform(tensor_reshaped)

        # Save PCA tensor
        pca_filename = os.path.join(pca_folder, f"{video_id}_pca.pkl")
        with open(pca_filename, 'wb') as file:
            pickle.dump(tensor_pca, file)

        # Capture variance and metadata
        variance = np.sum(pca.explained_variance_ratio_)
        metadata.append({'video_id': video_id, 'variance_captured': variance})

    return pd.DataFrame(metadata)

In [75]:

def process_stage_for_pca(input_folder, output_folder, stage_name, num_components=100):
    """
    Process all videos of a given stage: standardize, apply PCA, and save the PCA-transformed tensors.
    Args:
    - input_folder: Folder containing the pre-processed videos.
    - output_folder: Folder to save PCA results.
    - stage_name: Name of the stage to process.
    - num_components: Number of PCA components.
    Returns:
    - DataFrame containing metadata (video ID and variance captured).
    """

    repo_root = find_repo_root()
    if not repo_root:
        raise ValueError("Repository root not found. Ensure you're inside a Git repository.")

    stage_folder = os.path.join(repo_root, input_folder, stage_name)
    print("Attempting to access:", stage_folder)

    if not os.path.exists(stage_folder):
        print("Directory not found:", stage_folder)
        return None
    
    # Calculate the number of video files in the folder
    num_videos = len([f for f in os.listdir(stage_folder) if os.path.isfile(os.path.join(stage_folder, f))])
    print(f"Number of videos found: {num_videos}")

    # Step 1: Load and combine tensors
    combined_tensor, video_indices = load_and_combine_tensors(stage_name, input_folder, num_videos)

    # Step 2: Globally standardize the tensor
    standardized_tensor = standardize_tensor(combined_tensor)

    # Step 3: Separate the standardized tensor back into individual tensors
    separated_tensors = separate_standardized_tensor(standardized_tensor, video_indices)

    # Step 4: Apply PCA to each tensor and save the result
    metadata = apply_pca_and_save(separated_tensors, stage_name, output_folder, num_components)

    return metadata

In [76]:
# Example usage
input_folder = 'preprocessed_videos'
output_folder = 'Brainvision_Project'
stage_name = 'stage_1'

metadata_df = process_stage_for_pca(input_folder, output_folder, stage_name)

Attempting to access: C:\Users\celin\PycharmProjects\Brainvision_Project\preprocessed_videos\stage_1
Number of videos found: 10
Loading tensor from: C:\Users\celin\PycharmProjects\Brainvision_Project\preprocessed_videos\stage_1\0001_stage_1.pkl
Loading tensor from: C:\Users\celin\PycharmProjects\Brainvision_Project\preprocessed_videos\stage_1\0002_stage_1.pkl
Loading tensor from: C:\Users\celin\PycharmProjects\Brainvision_Project\preprocessed_videos\stage_1\0003_stage_1.pkl
Loading tensor from: C:\Users\celin\PycharmProjects\Brainvision_Project\preprocessed_videos\stage_1\0004_stage_1.pkl
Loading tensor from: C:\Users\celin\PycharmProjects\Brainvision_Project\preprocessed_videos\stage_1\0005_stage_1.pkl
Loading tensor from: C:\Users\celin\PycharmProjects\Brainvision_Project\preprocessed_videos\stage_1\0006_stage_1.pkl
Loading tensor from: C:\Users\celin\PycharmProjects\Brainvision_Project\preprocessed_videos\stage_1\0007_stage_1.pkl
Loading tensor from: C:\Users\celin\PycharmProjects\B

ValueError: n_components=100 must be between 0 and min(n_samples, n_features)=45 with svd_solver='full'

-----------------------------------------------------------------------

In [1]:
from sklearn.decomposition import PCA

def dimension_reduction_pca(video_tensor, n_components=0.95):
    """
    Applies PCA to reduce the dimensions of the video tensor.

    Parameters:
    video_tensor (numpy.ndarray): The video tensor to be reduced in dimension.
    n_components (float or int): Number of components to keep. 
                                 If 0 < n_components < 1, select the number of components such that 
                                 the cumulative explained variance ratio is greater than n_components.

    Returns:
    numpy.ndarray: The transformed video tensor with reduced dimensions.
    """

    # Reshape the video tensor for PCA
    # Assuming the shape of the video_tensor is (num_frames, 1, num_features)
    # We need to reshape it to (num_frames, num_features)
    reshaped_tensor = video_tensor.reshape(video_tensor.shape[0], -1)

    # Initialize PCA
    pca = PCA(n_components=n_components)

    # Apply PCA
    reduced_tensor = pca.fit_transform(reshaped_tensor)

    return reduced_tensor

In [None]:
pca_video_tensor = dimension_reduction_pca(video_tensor)

In [None]:
pca_video_tensor

In [None]:
# Define the function to apply PCA and save reduced videos
def save_reduced_videos(source_folder, target_folder, n_components=0.95):

    # Process each tensor in the source folder
    for file_name in os.listdir(source_folder):
        file_path = os.path.join(source_folder, file_name)
        if file_path.endswith('.pkl'):
            # Load the video tensor
            with open(file_path, 'rb') as file:
                video_tensor = pickle.load(file)

            # Reduce dimensionality
            reduced_tensor = dimension_reduction_pca(video_tensor, n_components)

            # Save the reduced data
            reduced_file_path = os.path.join(target_folder, file_name.replace('.pkl', '_reduced.pkl'))
            with open(reduced_file_path, 'wb') as reduced_file:
                pickle.dump(reduced_tensor, reduced_file)
                

In [None]:
source_folder = 'preprocessed_videos'
target_folder = 'reduced_videos'
save_reduced_videos(source_folder, target_folder)