In [None]:
import pandas as pd
import os
import pickle
import numpy as np
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.random_projection import GaussianRandomProjection
from umap import UMAP
from sklearn.preprocessing import StandardScaler

def apply_dimensionality_reduction(pkl_folder, n_components=2):
    """
    Apply Random Projections, UMAP, PCA, and t-SNE to each .pkl file in the specified folder.
    :param pkl_folder: Folder containing .pkl files.
    :param n_components: Number of dimensions to project to for each method.
    """
    for file in os.listdir(pkl_folder):
        # Skip the label file
        if 'label' in file:
            continue
        # Load the data
        if file.endswith('.pkl'):
            file_path = os.path.join(pkl_folder, file)
            data = pd.read_pickle(file_path)
            data = np.array([[float(x.replace(',', '.')) if isinstance(x, str) else x for x in row] for row in data])

            # Standardizing the data before applying transformations
            scaler = StandardScaler()
            scaled_data = scaler.fit_transform(data)

            # Apply each dimensionality reduction technique
            techniques = {
                "rp": GaussianRandomProjection(n_components=n_components),
                "umap": UMAP(n_components=n_components),
                "pca": PCA(n_components=n_components),
                # Apply t-SNE with a maximum of 3 components
                "tsne": TSNE(n_components=min(n_components, 3))
            }

            for technique_name, technique in techniques.items():
                save_filename = f"{n_components}_components_{technique_name}_{file}"
                save_file_path = os.path.join(pkl_folder, technique_name, str(n_components), save_filename)

                # Create the directory if it doesn't exist
                if not os.path.exists(os.path.dirname(save_file_path)):
                    os.makedirs(os.path.dirname(save_file_path))

                transformed_data = technique.fit_transform(scaled_data)

                # Save the data in raw array format as a .pkl file
                with open(save_file_path, 'wb') as f:
                    pickle.dump(transformed_data, f)

                print(f"Applied {technique_name.upper()} to {file} and saved as {save_filename} in {pkl_folder}/{technique_name}/{n_components} folder.")

In [None]:
### Apply dimensionality reduction to all .pkl files in the specified folder ###

# Set the number of components to project to n_components
n_components = 8
# Replace 'path_to_your_pkl_folder' with the path to the folder containing your .pkl files
path_to_your_pkl_folder = './data/MSL/'
apply_dimensionality_reduction(pkl_folder=path_to_your_pkl_folder, n_components=n_components)