## Idea for clustering and detecting

In [None]:
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min
import numpy as np
import matplotlib.pyplot as plt
import math

# Step 1: Dimensionality Reduction
pca = PCA(n_components=8)  # Reduce dimensionality to 8 dimensions
X_low_dim = pca.fit_transform(X)  # X is your high-dimensional data

# Step 2: Clustering
kmeans = KMeans(n_clusters=3)  # Example: clustering into 3 clusters
kmeans.fit(X_low_dim)
cluster_centers = kmeans.cluster_centers_

# Step 3: Distance Calculation
distances, _ = pairwise_distances_argmin_min(X_low_dim, cluster_centers)

# Step 4: Anomaly Detection
threshold = np.percentile(distances, 95)  # Example: 95th percentile
anomalies = X_low_dim[distances > threshold]

# Step 5: Visualization
plt.scatter(X_low_dim[:, 0], X_low_dim[:, 1], c='b', alpha=0.5, label='Normal Data')
plt.scatter(anomalies[:, 0], anomalies[:, 1], c='r', label='Anomalies')
plt.scatter(cluster_centers[:, 0], cluster_centers[:, 1], c='g', marker='s', s=100, label='Cluster Centers')
plt.title('Anomaly Detection in Low-Dimensional Space')
plt.xlabel('Component 1')
plt.ylabel('Component 2')
plt.legend()
plt.show()


### Changing num trials

In [1]:
""" Simulating Neural Datasets

This script is meant to use the variable "both_rates", containing the firing rates of 
both M1 and PdM areas and generate a number X of simulations of this same variable 
containing different perturbations"

"""

## Imports
### Imports
import pandas as pd
import numpy as np
import pickle
import argparse
import math

# Imports from other modules and packages in the project
import os
import sys

# Add the grandparent directory to sys.path
sys.path.append('../')
from src.helpers import *
from Task_Detector_Hnet_PoC.helpers_task_detector import *

import time

# Get the current time as an integer
current_time = int(time.time())

# Set the random seed using the current time
random.seed(current_time)

target_variable = 'vel'

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
name = 'Chewie'
date = '1007'
fold = 0
data_name = 'Experiment7'
num_trials = 5

## Load pre-processed data
data_path = '../Data/Processed_Data/Tidy_'+name+'_'+date+'.pkl'

with open(data_path, 'rb') as file:
    tidy_df = pickle.load(file)
baseline_df = tidy_df.loc[tidy_df['epoch'] == 'BL']

In [3]:
def get_reduced_sets(data, fold, num_trials = 'all'):
    x_train, y_train, x_val, y_val,\
    x_test, y_test, info_train, info_val,\
        info_test, list_mins_base, \
            list_maxs_base= get_dataset(data, 
                                            fold, 
                                            target_variable= target_variable, 
                                            no_outliers = False, 
                                            force_data = True, 
                                            std = False)
    if num_trials == 'all':

        size_train = int(x_train.shape[0]/2)
        size_val = int(x_val.shape[0]/2)
        size_test = int(x_test.shape[0]/2)

        data1 = [x_train_reduced[:size_train,:,:],
                y_train_reduced[:size_train,:,:],
                x_val_reduced[:size_val,:,:],
                y_val_reduced[:size_val,:,:],
                x_test_reduced[:size_test,:,:],
                y_test_reduced[:size_test,:,:]]

        data2 = [x_train_reduced[size_train:,:,:],
                y_train_reduced[size_train:,:,:],
                x_val_reduced[size_val:,:,:],
                y_val_reduced[size_val:,:,:],
                x_test_reduced[size_test:,:,:],
                y_test_reduced[size_test:,:,:]]
    else:
        trials_train = []
        trials_val = []
        trials_test = []
        
        num_test_trials = max(1, math.ceil(num_trials/4))

        for i in range(num_trials*2):
            random.seed()
            trials_train.append(random.randint(0,x_train.shape[0]-1))

        for i in range(num_test_trials*2):
            random.seed()
            trials_val.append(random.randint(0,x_val.shape[0]-1))
            trials_test.append(random.randint(0,x_test.shape[0]-1))

        x_train_reduced = np.array([x_train[i,:,:] for i in trials_train])
        y_train_reduced = np.array([y_train[i,:,:] for i in trials_train])
        x_val_reduced = np.array([x_val[i,:,:] for i in trials_val])
        y_val_reduced = np.array([y_val[i,:,:] for i in trials_val])
        x_test_reduced = np.array([x_test[i,:,:] for i in trials_test])
        y_test_reduced = np.array([y_test[i,:,:] for i in trials_test])


        data1 =  [x_train_reduced[:num_trials,:,:],
                y_train_reduced[:num_trials,:,:],
                x_val_reduced[:num_test_trials,:,:],
                y_val_reduced[:num_test_trials,:,:],
                x_test_reduced[:num_test_trials,:,:],
                y_test_reduced[:num_test_trials,:,:]]

        data2 = [x_train_reduced[num_trials:,:,:],
                y_train_reduced[num_trials:,:,:],
                x_val_reduced[num_test_trials:,:,:],
                y_val_reduced[num_test_trials:,:,:],
                x_test_reduced[num_test_trials:,:,:],
                y_test_reduced[num_test_trials:,:,:]]
        
    return data1, data2


In [4]:
def generate_data(data, fold, num_trials):

    # From those matrices, we will use half the data for one dataset and 
    #the other for a new one. The idea is that the model is exposed to the 
    #other half dataset and recognises the task it has already trained before.

    datasets = {}

    datasets['Data_'+str(0)+'_1'], datasets['Data_'+str(0)+'_2'] = get_reduced_sets(data, fold, num_trials = num_trials)

    
    for i in range(1,5):
        data_matrix = np.vstack(baseline_df['both_rates'])
        baseline_df_sim = baseline_df.copy()
        if i == 1:
            sim_data = remove_neurons(data_matrix, 30)
            sim_data = add_offset(data_matrix,50)
        elif i==2:
            sim_data = shuffle_neurons(data_matrix, 60)
        elif i == 3:
            sim_data = add_gain(data_matrix,50)
        elif i == 4:
            sim_data = add_offset(data_matrix,50)
            
        baseline_df_sim['both_rates'] = sim_data.tolist()
        new_data = baseline_df_sim

        datasets['Data_'+str(i)+'_1'],datasets['Data_'+str(i)+'_2'] = get_reduced_sets(new_data, fold, num_trials = num_trials)

    # Shuffle the dictionnary keys to check the importance of the task order.
    keys_list = list(datasets.keys())
    random.seed()
    random.shuffle(keys_list)
    shuffled_sets = {key: datasets[key] for key in keys_list}

    return shuffled_sets


In [5]:
def main():
    name = 'Chewie'
    date = '1007'
    fold = 0
    data_name = 'Experiment7'
    num_trials = 5

    ## Load pre-processed data
    data_path = '../Data/Processed_Data/Tidy_'+name+'_'+date+'.pkl'

    with open(data_path, 'rb') as file:
        tidy_df = pickle.load(file)
    baseline_df = tidy_df.loc[tidy_df['epoch'] == 'BL']

    sim_data = generate_data(baseline_df, fold, num_trials)
    return sim_data

"""     data_dir = './'
    path_to_save_data = os.path.join(data_dir, 'Sim_Data_'+data_name+'.pkl')

    # Pickle the data and save it to file
    with open(path_to_save_data, 'wb') as handle:
        pickle.dump(sim_data, handle, protocol=4)

    print("Saving data...") """

'     data_dir = \'./\'\n    path_to_save_data = os.path.join(data_dir, \'Sim_Data_\'+data_name+\'.pkl\')\n\n    # Pickle the data and save it to file\n    with open(path_to_save_data, \'wb\') as handle:\n        pickle.dump(sim_data, handle, protocol=4)\n\n    print("Saving data...") '

In [6]:
main()

Train trials 109
Test trials  34
Val trials 27
We are testing the optimization method on fold  0
Train trials 109
Test trials  34
Val trials 27
We are testing the optimization method on fold  0
Train trials 109
Test trials  34
Val trials 27
We are testing the optimization method on fold  0
Train trials 109
Test trials  34
Val trials 27
We are testing the optimization method on fold  0
Train trials 109
Test trials  34
Val trials 27
We are testing the optimization method on fold  0


{'Data_0_1': [array([[[1.51380167e+01, 6.56242847e+00, 1.48557577e+01, ...,
           1.58309117e-01, 8.91070843e+00, 1.94683704e+01],
          [1.35491705e+01, 7.23014116e+00, 1.51941977e+01, ...,
           4.76817861e-02, 6.41552687e+00, 1.81734600e+01],
          [1.20083122e+01, 9.25305748e+00, 1.39940872e+01, ...,
           1.22380443e-02, 4.13847256e+00, 1.69299240e+01],
          ...,
          [3.28690605e+01, 3.31315207e+00, 1.58141928e+01, ...,
           7.81329727e+00, 7.19302750e+00, 1.90961609e+01],
          [3.28154068e+01, 1.58763051e+00, 1.10880156e+01, ...,
           6.87365389e+00, 4.83992338e+00, 1.84512062e+01],
          [3.22976761e+01, 8.12190890e-01, 8.52823544e+00, ...,
           6.10214090e+00, 3.51914334e+00, 1.80302219e+01]],
  
         [[6.06199920e-01, 6.10225296e+00, 8.63625813e+00, ...,
           3.88372278e+00, 1.31604481e+01, 8.23097992e+00],
          [6.06199920e-01, 9.67763615e+00, 8.37758923e+00, ...,
           5.79383373e+00, 1.50738049

In [None]:
if __name__ == "__main__":
    # Check if the correct number of arguments are provided
    parser = argparse.ArgumentParser(
    description="Main script to run experiments"
    )

    parser.add_argument(
        "--name",
        type=str,
        default='Chewie',
        help="Name of the participant from whom the data was recorded",
    )
    parser.add_argument(
        "--date",
        type=str,
        default='1007',
        help="Date when the data was recorded",
    )
    parser.add_argument(
        "--fold",
        type=int,
        default=0,
        help="Data fold to use (from 0 to 4)",
    )
    parser.add_argument(
        "--data_name",
        type=str,
        default='Test',
        help="How the data should be saved, usually name of the experiment",
    )
    parser.add_argument(
        "--num_trials",
        type=int/str,
        default='All',
        help="Number of trials to include for the experiment. Default All.",
    )

    args = parser.parse_args()
    main(args)