In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
!pip install torch torchvision mat73 pymatreader matplotlib tensorboard mne numpy scipy numba scikit-learn PyWavelets pandas mne-features

In [None]:
import scipy.io as scipy
import string
import os
import torch
from torch.utils.data import Dataset
from torch.nn.utils.rnn import pad_sequence
import zipfile
import io
from zipfile import ZipFile
import numpy as np
import mat73
import mne
from enum import Enum
import matplotlib.pyplot as plt
import random
import sys
from sklearn.preprocessing import MinMaxScaler
import mne_features
import joblib

In [None]:
#@title General File Operations
def file_saving(current_dir, filenames, tensor_list, dir_name = None, key=None, feature=None):
  assert len(filenames) == len(tensor_list)
  if key is None:
    print(f'Saving EEG and ECoG Tensors....',end='')
  elif feature is None:
    print(f'Saving {key} Tensors....',end='')
  else:
    assert key is not None and feature is not None
    print(f'Saving {key}_{feature} Tensor(s)....')
  checkpoint_dir = current_dir
  if dir_name is not None:
    checkpoint_dir = current_dir + '/'+dir_name
  if not os.path.exists(checkpoint_dir):
    os.makedirs(checkpoint_dir)
  for ind in range(len(filenames)):
    if type(tensor_list[ind]) is list:
      tensor_shapes = [item.shape for item in tensor_list[ind]]
    else:
      tensor_shapes = tensor_list[ind].shape
    print(f'\tFile {filenames[ind]} shape ={tensor_shapes}')
    file_loc = os.path.join(checkpoint_dir,filenames[ind])
    torch.save(tensor_list[ind],file_loc)
  print(f'Finished')

                                #|     dataset_dirs         |        dataset_names              |
                                #checkpoint4/normalized_data/powerset,zeroset,raw_dataset,meanset
def save_datasets(current_dir, dataset_dirs, feature_names, tensor_list, key, specific_index = -1):
  inner_directory = current_dir
  #create the innermost directory
  for dir_name in dataset_dirs:
    inner_directory = os.path.join(inner_directory,dir_name)
    if not os.path.exists(inner_directory):
      os.makedirs(inner_directory)
  assert len(feature_names) == len(tensor_list) # dataset_names = [data,power,mean,zero]; elem => tensor_list[elem]
  dataset_type = ['train','validation','test'] # dataset_types; elem => tensor_list[i][elem]
  for index in range(len(feature_names)):
    file_names = []
    if specific_index < 0:
      for dataset_ind in range(len(dataset_type)):
        file_names.append(f"{key}_{feature_names[index]}_{dataset_type[dataset_ind]}.pt")
      assert len(file_names) == 3
      print(f'tensor_list[index] length = {len(tensor_list[index])}')
      file_saving(inner_directory,file_names,tensor_list[index],dir_name=feature_names[index]+"_dir")
    else:
      file_names = [f'{key}_{dataset_name}_{dataset_type[specific_index]}.pt' for dataset_name in feature_names]
      to_save_tensors = [tensor[specific_index] for tensor in tensor_list]
      file_saving(current_dir,file_names,to_save_tensors,dir_name=f'{key}_{dataset_type[specific_index]}')

In [None]:
#@title File Extraction Driver

"""
To find the key in a loaded_mat(.mat file which returns the map)
"""
def find_key(diction, key_tag):
  for key in diction:
      if key_tag in key:
        return key
  return "WaveData" 

"""
Smaller helper method to sort the ECoG and EEG file names(helps for numerical ordering)
"""
def separated_data_files(fileList):
  eegfiles = []
  ecogfiles = []
  for elem in fileList :
    child_elem = elem.split("/")[-1]
    if ("EEG" in child_elem):
      eegfiles.append(elem)
    elif ("ECoG" in child_elem):
      ecogfiles.append(elem)
  
  assert len(eegfiles) == len(ecogfiles)
  #align input with target
  eegfiles.sort()
  ecogfiles.sort()
  return eegfiles, ecogfiles

"""
Method extracts the data from the file
"""
def extract_file_data(data_dir,file_name, data_key,zipObj):
  extracted_file = os.path.join(data_dir,file_name)
  if not os.path.exists(extracted_file):
    print(f'\t\tExtracting {file_name}....',end='')
    extracted_file = zipObj.extract(file_name,path=data_dir)
  if "EEG" in data_key:
    loaded_mat = scipy.loadmat(extracted_file)
  elif "ECoG" in data_key:
    loaded_mat = mat73.loadmat(extracted_file)
  keyname = find_key(loaded_mat,data_key)
  final_form = torch.tensor(loaded_mat[keyname])
  print(f'Finished Extracting {file_name}_{data_key}: Key={keyname}\tShape={final_form.shape}')
  return final_form
  
"""
A 'Top Level' method for extracting a specified directory
"""
def extract_directory(location):
  print(f"\tExtracting files in {location}...")
  data_directory = location[:-4]+ "_Extracted_Data" # exlucde the .zip and append Extracted_Data
  if not os.path.exists(data_directory):
    os.makedirs(data_directory)
  EEG_dir_Tensors = []
  ECoG_dir_Tensors = []
  with ZipFile(location, 'r') as zipObj :
    # get sorted list of eeg and ecog files
    fileList = zipObj.namelist() 
    eeg_files, ecog_files = separated_data_files(fileList)
    assert len(eeg_files) == len(ecog_files)
    for pair in range(len(eeg_files)):
      EEG_dir_Tensors.append(extract_file_data(data_directory,eeg_files[pair],"EEG",zipObj))
      ECoG_dir_Tensors.append(extract_file_data(data_directory,ecog_files[pair],"ECoG",zipObj))
  print(f"Finished Extracting\n")
  return EEG_dir_Tensors, ECoG_dir_Tensors

"""
The Driver method for extracting data from the Zip files
"""
def extraction_driver(current_directory,zipnames, checkpoint = False):
  EEG_Tensors = []
  ECoG_Tensors = []
  print('Beginning Extraction Process....')
  for zipdir in zipnames:
    file_loc = current_directory + "/" + zipdir
    Dir_EEG_Tensors, Dir_ECoG_Tensors=extract_directory(file_loc)
    # add the returned lists to the back of continued list
    EEG_Tensors = EEG_Tensors + Dir_EEG_Tensors
    ECoG_Tensors = ECoG_Tensors + Dir_ECoG_Tensors
  print(f'Finished Extracting {len(EEG_Tensors)} Sequences')

  # if user wants to save checkpoint
  if checkpoint:
    file_saving(current_directory,["raw_unfiltered_ecog_list.pt","raw_unfiltered_eeg_list.pt"],[ECoG_Tensors, EEG_Tensors],dir_name="checkpoint1")

  return EEG_Tensors, ECoG_Tensors

In [None]:
extracted_eeg, extracted_ecog = extraction_driver("/content/gdrive/MyDrive/BCI_Project/Datasets", ["S1_EEGandECoG.zip","S2_EEGandECoG.zip","S3_EEGandECoG.zip"])

Beginning Extraction Process....
	Extracting files in /content/gdrive/MyDrive/BCI_Project/Datasets/S1_EEGandECoG.zip...
Finished Extracting 20110607S1_EEGECoG_Su_Oosugi-Naoya+Nagasaka-Yasuo+Hasegawa+Naomi_ECoG128-EEG18_mat/EEG01.mat_EEG: Key=EEG2	Shape=torch.Size([19, 327815])
Finished Extracting 20110607S1_EEGECoG_Su_Oosugi-Naoya+Nagasaka-Yasuo+Hasegawa+Naomi_ECoG128-EEG18_mat/ECoG01.mat_ECoG: Key=WaveData	Shape=torch.Size([129, 318156])
Finished Extracting 20110607S1_EEGECoG_Su_Oosugi-Naoya+Nagasaka-Yasuo+Hasegawa+Naomi_ECoG128-EEG18_mat/EEG02.mat_EEG: Key=EEG2	Shape=torch.Size([19, 326744])
Finished Extracting 20110607S1_EEGECoG_Su_Oosugi-Naoya+Nagasaka-Yasuo+Hasegawa+Naomi_ECoG128-EEG18_mat/ECoG02.mat_ECoG: Key=WaveData	Shape=torch.Size([129, 317907])
Finished Extracting

	Extracting files in /content/gdrive/MyDrive/BCI_Project/Datasets/S2_EEGandECoG.zip...
Finished Extracting 20110607S2_EEGECoG_Su_Oosugi-Naoya+Nagasaka-Yasuo+Hasegawa+Naomi_ECoG128-EEG18_mat/EEG01_anesthesia.mat_EE

In [None]:
#@title Resampling Data Driver
def calc_frequency(old_points, new_points,orig_frequency):
  total_time = old_points/orig_frequency
  new_freq = (new_points/total_time)

  if(old_points > new_points):
      return orig_frequency / new_freq, new_freq
  else:
    return new_freq/orig_frequency, new_freq

def resample_eeg(eeg_tensor,ecog_seq_len,original_frequency, id, mode = "ndarray"):
  eeg_data = eeg_tensor.data.numpy()
  eeg_seq_len = len(eeg_data[1])
  frequency_factor, new_frequency = calc_frequency(eeg_seq_len,ecog_seq_len,original_frequency)
  print(f'\tResampling Sequence {id}: {original_frequency} => {new_frequency}')
  if mode in "ndarray":
    if eeg_seq_len > ecog_seq_len : #down-sample branch
      return torch.tensor(mne.filter.resample(eeg_data,down = frequency_factor, pad="constant")), new_frequency
    elif eeg_seq_len < ecog_seq_len: # up-sample branch
      return torch.tensor(mne.filter.resample(eeg_data,up = frequency_factor,pad="constant")), new_frequency
  elif mode in "raw":
    eeg_raw_array = mne.io.RawArray(eeg_data,mne.create_info(ch_names=len(eeg_data),sfreq=1000.0,ch_types='eeg',verbose=None))
    eeg_data2,times = eeg_raw_array.resample(new_frequency)[:]
    return torch.tensor(eeg_data2), new_frequency
  else:
    return None

def resampling_driver(current_directory,eeg_data, ecog_lens, eeg_freqs, overwrite_unfiltered = False):
  is_freq_list = type(eeg_freqs) is list
  if is_freq_list:
    assert len(eeg_freqs) == len(eeg_data)
  assert len(ecog_lens) == len(eeg_data)

  resampled_eeg_list = []
  frequency_list = []
  print('Beginning Resampling Process....')
  #go through eegs
  for tensor_id in range(len(eeg_data)):
    if eeg_data[tensor_id].size(1) != ecog_lens[tensor_id]:
      res = resample_eeg(eeg_data[tensor_id], ecog_lens[tensor_id],eeg_freqs[tensor_id] if is_freq_list else eeg_freqs, tensor_id)
      resampled_eeg_list.append(res[0])
      frequency_list.append(res[1])
    else:
      resampled_eeg_list.append(eeg_data[tensor_id])
      frequency_list.append(eeg_freqs)
  if overwrite_unfiltered:
    print('Overwriting Unfiltered EEG Data with Resampled EEG Data....',end='')
    checkpoint1_dir = current_directory + '/checkpoint1_extraction'
    if not os.path.exists(checkpoint1_dir):
      print("\tUnable to save data as the checkpoint1_extraction step is not saved")
    else:
      file_saving(current_directory,["resampled_unfiltered_eeg_tensor_list.pt"],[resampled_eeg_list],dir_name="checkpoint1")
  return resampled_eeg_list, frequency_list

In [None]:
ecog_seq_lens = [ecog_tens.size(1) for ecog_tens in extracted_ecog]
resampled_eeg_tensors, frequency_tensors = resampling_driver("/content/gdrive/MyDrive/BCI_Project/Datasets",extracted_eeg,ecog_seq_lens,1000.0,overwrite_unfiltered=False)

Beginning Resampling Process....
	Resampling Sequence 0: 1000.0 => 970.5352104083096
	Resampling Sequence 1: 1000.0 => 972.9543618245476
	Resampling Sequence 2: 1000.0 => 1065.819450806432
	Resampling Sequence 3: 1000.0 => 991.5698696592785
	Resampling Sequence 4: 1000.0 => 968.4353125735831
	Resampling Sequence 5: 1000.0 => 955.7968617071556
	Resampling Sequence 6: 1000.0 => 987.5395190279093
	Resampling Sequence 7: 1000.0 => 984.1045149312752


In [None]:
print(frequency_tensors)

[970.5352104083096, 972.9543618245476, 1065.819450806432, 991.5698696592785, 968.4353125735831, 955.7968617071556, 987.5395190279093, 984.1045149312752]


In [None]:
#@title Truncated, Spatially Filtered Sequences

def find_minlen(tensor_list):
  min_number =sys.maxsize
  for item in tensor_list:
    min_number = min(item.size(1),min_number)
  return min_number

"""
Responsible for truncating and filtering channels out
"""
def truncate_tensors(tensor_list, min_len, key, specific_channels = None):
  truncated_tensor_list = []
  print(f'Beginning Trunc_Process for {key}\n')
  for ind in range(len(tensor_list)):
    print(f'\tProc. Sequence {ind}...',end='')

    #for EEG, which may require left channels only
    if specific_channels is not None:
      truncated_tensor = tensor_list[ind].data[specific_channels,0:min_len]
      print(f'Filtered Channels...',end='')
    else:
      truncated_tensor = tensor_list[ind].data[:,0:min_len]

    truncated_tensor = CAR_signal_removal(truncated_tensor)
    truncated_tensor_list.append(truncated_tensor)
    print(f"Shape={truncated_tensor_list[-1].shape}")
  
  print('Combining Trunc_Sequences...',end='')
  stacked_tensor = torch.stack(truncated_tensor_list)
  print(f'Combined {key} Shape={stacked_tensor.shape}\n')
  return stacked_tensor

"""
Responsible for spatial filtering and transposing input tensor
"""
def CAR_signal_removal(tensor):
  mean_tensor = torch.mean(tensor,dim=0)
  print(f'Removing CAR({mean_tensor.shape})...',end='')
  assert mean_tensor.size(0) == tensor.size(1)
  #remove CAR from each channel
  for row in range(tensor.size(0)):
    tensor[row] -= mean_tensor
  print(f'Transposed...Finished')
  return torch.transpose(tensor,0,1)

def truncate_driver(current_directory,eeg_tensors, ecog_tensors, eeg_filtered_channels = None, save_checkpoint2=False):
  assert len(eeg_tensors) == len(ecog_tensors)
  min_len_ecog = find_minlen(ecog_tensors)
  min_len_eeg = find_minlen(eeg_tensors)
  assert min_len_eeg == min_len_ecog
  cleaned_eeg_tensor = truncate_tensors(eeg_tensors,min_len_ecog,"EEG",specific_channels=eeg_filtered_channels)
  cleaned_ecog_tensor = truncate_tensors(ecog_tensors,min_len_ecog,"ECoG")
  assert cleaned_eeg_tensor.shape[0:1] == cleaned_ecog_tensor.shape[0:1]
  if save_checkpoint2:
    eeg_file_name = "cleaned_eeg_data_tensor.pt" if eeg_filtered_channels is None else "cleaned_LEFT_eeg_data_tensor.pt"
    file_saving(current_directory,[eeg_file_name,"cleaned_ecog_data_tensor.pt"],[cleaned_eeg_tensor,cleaned_ecog_tensor],dir_name="checkpoint2")
  return cleaned_eeg_tensor, cleaned_ecog_tensor

In [None]:
cleaned_eeg, cleaned_ecog = truncate_driver("/content/gdrive/MyDrive/BCI_Project/Datasets",resampled_eeg_tensors,extracted_ecog)

Beginning Trunc_Process for EEG

	Proc. Sequence 0...Removing CAR(torch.Size([316689]))...Transposed...Finished
Shape=torch.Size([316689, 19])
	Proc. Sequence 1...Removing CAR(torch.Size([316689]))...Transposed...Finished
Shape=torch.Size([316689, 19])
	Proc. Sequence 2...Removing CAR(torch.Size([316689]))...Transposed...Finished
Shape=torch.Size([316689, 19])
	Proc. Sequence 3...Removing CAR(torch.Size([316689]))...Transposed...Finished
Shape=torch.Size([316689, 19])
	Proc. Sequence 4...Removing CAR(torch.Size([316689]))...Transposed...Finished
Shape=torch.Size([316689, 19])
	Proc. Sequence 5...Removing CAR(torch.Size([316689]))...Transposed...Finished
Shape=torch.Size([316689, 19])
	Proc. Sequence 6...Removing CAR(torch.Size([316689]))...Transposed...Finished
Shape=torch.Size([316689, 19])
	Proc. Sequence 7...Removing CAR(torch.Size([316689]))...Transposed...Finished
Shape=torch.Size([316689, 19])
Combining Trunc_Sequences...Combined EEG Shape=torch.Size([8, 316689, 19])

Beginning T

In [None]:
#@title Feature Extraction
import numpy as np
import torch
import mne_features

def power_spectral_calc(data, arg_tuple):
  return mne_features.univariate.compute_spect_entropy(arg_tuple,data)

def univariate_window_calculation(operation, tensor, window_size,function, *var_args):
  print(f'\tExtracting Features for {operation}...',end='')
  feature_agg = []
  for sequence_id in range(len(tensor)):
    print(f'Sequence {sequence_id}...',end='')
    total_windows = int(tensor[sequence_id].shape[1]/window_size)
    result = np.zeros((tensor[sequence_id].shape[0],total_windows))
    if not var_args:
      for i in range(total_windows):
        left_end = i*window_size
        right_end = left_end + window_size
        result[:,i] = function(tensor[sequence_id][:,left_end:right_end])
    else:
      freq_list = var_args[0]
      ind = min(sequence_id,len(freq_list)-1)
      print(f'Sampling at {var_args[0][ind]}...',end='')
      for i in range(total_windows):
        left_end = i*window_size
        right_end = left_end + window_size
        result[:,i] = function(tensor[sequence_id][:,left_end:right_end],var_args[0][ind])
    feature_agg.append(torch.from_numpy(result).transpose(0,1))
  result_torch = torch.stack(feature_agg)
  print(f'Shape={result_torch.shape}')
  return result_torch

def feature_extraction(tensor_1, sampling_freq, steps_per_points, key):
  print(f'Extracting Features for {key}...',end='')
  numpy_tensor = tensor_1.transpose(1,2).data.numpy()
  print(f'Converted from torch to numpy; newshape={numpy_tensor.shape}')
  feature1 = univariate_window_calculation('power spectral entropy', numpy_tensor, steps_per_points,power_spectral_calc,(sampling_freq))
  feature2 = univariate_window_calculation('mean',numpy_tensor,steps_per_points,mne_features.univariate.compute_mean)
  feature3 = univariate_window_calculation('zero crossings',numpy_tensor,steps_per_points,mne_features.univariate.compute_zero_crossings)
  assert feature1.shape == feature2.shape and feature2.shape == feature3.shape
  return [feature1, feature2, feature3]

#good checkpoint to save the raw data -- will load data from there now on
def feature_extraction_driver(current_directory,eeg_tensor, ecog_tensor, sampling_freq=[1000.0], steps_per_point=10, expected_len = 4,  save_checkpoint2=True):
  eeg_all_data = [eeg_tensor]
  ecog_all_data = [ecog_tensor]
  ecog_all_data = ecog_all_data + feature_extraction(ecog_tensor,[1000.0],steps_per_point,'ECoG')
  eeg_all_data = eeg_all_data + feature_extraction(eeg_tensor,sampling_freq,steps_per_point,"EEG")
  assert len(eeg_all_data) == expected_len and len(ecog_all_data) == expected_len
  if save_checkpoint2:
    file_saving(current_directory,["mod_eeg_features_list.pt","mod_ecog_features_list.pt"],[eeg_all_data,ecog_all_data], dir_name="checkpoint2")
  return eeg_all_data

In [None]:
eeg_full_data = feature_extraction_driver("/content/gdrive/MyDrive/BCI_Project/Datasets",cleaned_eeg, cleaned_ecog, sampling_freq = frequency_tensors)

Extracting Features for ECoG...Converted from torch to numpy; newshape=(8, 129, 316689)
	Extracting Features for power spectral entropy...Sequence 0...Sampling at 1000.0...Sequence 1...Sampling at 1000.0...Sequence 2...Sampling at 1000.0...Sequence 3...Sampling at 1000.0...Sequence 4...Sampling at 1000.0...Sequence 5...Sampling at 1000.0...Sequence 6...Sampling at 1000.0...Sequence 7...Sampling at 1000.0...Shape=torch.Size([8, 31668, 129])
	Extracting Features for mean...Sequence 0...Sequence 1...Sequence 2...Sequence 3...Sequence 4...Sequence 5...Sequence 6...Sequence 7...Shape=torch.Size([8, 31668, 129])
	Extracting Features for zero crossings...Sequence 0...Sequence 1...Sequence 2...Sequence 3...Sequence 4...Sequence 5...Sequence 6...Sequence 7...Shape=torch.Size([8, 31668, 129])
Extracting Features for EEG...Converted from torch to numpy; newshape=(8, 19, 316689)
	Extracting Features for power spectral entropy...Sequence 0...Sampling at 970.5352104083096...Sequence 1...Sampling at 

In [None]:
#@title Normalize Driver (Checkpt 3)- Normalizes numbers between (-1,1) and Splits data stored in files to train, test, and validation sets

#normalizes a 3D tensor
def normalize_data(data_tensor, key, feature, model_range=(-1,1),debug=True,transform = True):

  scalers = []
  normalized_data_tensor = [] if transform else None
  numpy_data = data_tensor.data.numpy()
  print(f'\tNormalizing {key}\'s {feature}: {numpy_data.shape}')

  #create scaler for each sequence
  for index in range(numpy_data.shape[0]):
    scaler = MinMaxScaler(feature_range=model_range).fit(numpy_data[index])
    scalers.append(scaler)
    if debug:
      print(f"\t\tScaler Shapes=(Min={scaler.data_min_.shape};Max={scaler.data_max_.shape})")
    if transform == True:
      normalized_data_tensor.append(torch.tensor(scaler.transform(numpy_data[index])))
  finalized_tensor = torch.stack(normalized_data_tensor)
  print(f'\t\tNormalized Shape={finalized_tensor.shape}')
  return finalized_tensor, scalers

def load_dataset(current_dir, dataset_dir, dataset_name, key, expected_size = 4):
  feature_type = ["data","power","mean", "zero"]
  dir_loc = os.path.join(current_dir,dataset_dir)
  data_loc = None
  if not os.path.exists(dir_loc):
    raise Exception(f'Directory {dataset_dir} does not exist in {current_dir}\ndownload from github again')
  else:
    data_loc = os.path.join(dir_loc,dataset_name)
    if not os.path.exists(data_loc):
      raise Exception(f'File {dataset_name} does not exist in {dir_loc}\nDownload from github')
  
  tensor_data = torch.load(data_loc)
  assert len(tensor_data) == expected_size
  return tensor_data

def normalize_dataset(current_dir,dataset_dir,key, tensor_data,norm_key,def_range = (-1,1),debug_top=True):
  feature_type = ["data","power","mean", "zero"]
  ecog_scalers = [] if "ECoG" in key else None
  dir_loc = os.path.join(current_dir,dataset_dir)
  normalized_data = []
  print(f'Beginning to Normalize {key} Data in Range {def_range}; Key={norm_key}')
  for dataset_id in range(len(tensor_data)):
    if "EEG" in key:
      norm_dataset, ___ = normalize_data(tensor_data[dataset_id],key,feature_type[dataset_id],model_range=def_range,debug=debug_top)
    elif "ECoG" in key:
      norm_dataset, scalers = normalize_data(tensor_data[dataset_id],key,feature_type[dataset_id],model_range=def_range,debug=debug_top)
      ecog_scalers.append(scalers)
    normalized_data.append(norm_dataset)
  if ecog_scalers is not None:
    print(f'Saving ECoG Scalers: Rows={len(ecog_scalers)}; Cols={len(ecog_scalers[0])}')
    assert len(ecog_scalers) == 4
    assert len(ecog_scalers[0]) == 8
    joblib.dump(ecog_scalers,os.path.join(dir_loc,f"ecog_feature_scalers_{norm_key}.save"))
  print(f'Finished Normalizing {key}')
  return normalized_data

def define_lengths(proportions, data_len, feature_len):
  print(f'\tFinding Splits for lengths {data_len} and {feature_len}...',end='')
  feature_splits = [int(feature_len * prop) for prop in proportions]
  data_splits = [int(data_len * prop) for prop in proportions]
  partition_sizes = [data_len,feature_len]
  split_nums = [data_splits,feature_splits]
  for i in range(len(split_nums)):
    summed_parts = sum(split_nums[i])
    if summed_parts != partition_sizes[i]:
      print(f'Adjusting for Length {i}...',end='')
      split_nums[i][-1] = partition_sizes[i] - (summed_parts - split_nums[i][-1])
      assert sum(split_nums[i]) == partition_sizes[i]
  print('Determined Lengths: ')  
  print(f'\tLong Seq: {split_nums[0]}')
  print(f'\tShort Seq: {split_nums[1]}')
  print('Finished')
  return split_nums
  
def divide_tensors(tensor_list,splits, key, feature_set):
  list_of_split_tensor = []
  assert len(feature_set) == len(tensor_list)
  for dataset_id in range(len(tensor_list)):
    split_data = torch.split(tensor_list[dataset_id],splits[int(dataset_id > 0)],dim=1)
    list_shapes = [elem.shape for elem in split_data]
    print(f'\tSplit {key} Tensor {feature_set[dataset_id]} into {list_shapes}')
    list_of_split_tensor.append(split_data)
  print('Finished')
  return list_of_split_tensor

def normalize_subdriver(current_directory,checkpt2_keys,load_file_names,checkpt3_keys,subdir_names,norm_key,my_range = (-1,1)):
  features = ["data","power", "mean", "zero"]
  assert len(checkpt2_keys) == len(load_file_names)
  for dataset_ind in range(len(checkpt2_keys)):
    mod_list = load_dataset(current_directory,"checkpoint2",load_file_names[dataset_ind],checkpt2_keys[dataset_ind])
    norm_list = normalize_dataset(current_directory,"checkpoint2",checkpt2_keys[dataset_ind],mod_list,norm_key,def_range=my_range,debug_top=False)
    print(f'Dividing {checkpt2_keys[dataset_ind]} into Training,Validation, Test Sets')
    split_lengths = define_lengths([0.7,0.2,0.1],norm_list[0].size(1), norm_list[1].size(1)) 
    tensor_list = [mod_list,norm_list]
    for item_id in range(len(tensor_list)):
      print(f'Saving {subdir_names[item_id]}...')
      divided_list = divide_tensors(tensor_list[item_id],split_lengths,checkpt3_keys[item_id],features)
      save_datasets(current_directory,["checkpoint3",subdir_names[item_id]],features,divided_list,checkpt3_keys[item_id])
  
def normalize_driver(current_directory, dataset_ind=-1, def_range = (-1,1), checkpoint3 = True):
  load_file_names = ["mod_eeg_features_list.pt","mod_ecog_features_list.pt"]
  keys = ["EEG","ECoG"]
  if def_range[0] == -1:
    norm_key='11'
  elif def_range[0] == 0:
    norm_key='01'
  if dataset_ind >= 0:
    assert dataset_ind < 2
    checkpt2_keys = [keys[dataset_ind]]
    files_load = [load_file_names[dataset_ind]]
    checkpt3_keys = [f"{checkpt2_keys[0]}_Orig_{norm_key}",f"{checkpt2_keys[0]}_Norm_{norm_key}"]
    subdir_names = [f"divided_{checkpt2_keys[0].lower()}_{norm_key}",f"normalized_{checkpt2_keys[0].lower()}_{norm_key}"]
  else:
    checkpt2_keys = keys
    files_load = load_file_names
    checkpt3_keys = [f"EEG_Orig_{norm_key}",f"ECoG_Orig_{norm_key}",f"EEG_Norm_{norm_key}",f"ECoG_Norm_{norm_key}"]
    subdir_names = [f"divided_eeg_{norm_key}",f"divided_ecog_{norm_key}",f"normalized_eeg_{norm_key}",f"normalized_ecog_{norm_key}"]
  normalize_subdriver(current_directory,checkpt2_keys,files_load,checkpt3_keys,subdir_names,norm_key,my_range=def_range)

In [None]:
normalize_driver("/content/gdrive/MyDrive/BCI_Project/Datasets",dataset_ind=0,def_range=(0,1))

Beginning to Normalize EEG Data in Range (0, 1); Key=01
	Normalizing EEG's data: (8, 316689, 19)
		Normalized Shape=torch.Size([8, 316689, 19])
	Normalizing EEG's power: (8, 31668, 19)
		Normalized Shape=torch.Size([8, 31668, 19])
	Normalizing EEG's mean: (8, 31668, 19)
		Normalized Shape=torch.Size([8, 31668, 19])
	Normalizing EEG's zero: (8, 31668, 19)
		Normalized Shape=torch.Size([8, 31668, 19])
Finished Normalizing EEG
Dividing EEG into Training,Validation, Test Sets
	Finding Splits for lengths 316689 and 31668...Adjusting for Length 0...Adjusting for Length 1...Determined Lengths: 
	Long Seq: [221682, 63337, 31670]
	Short Seq: [22167, 6333, 3168]
Finished
Saving divided_eeg_01...
	Split EEG_Orig_01 Tensor data into [torch.Size([8, 221682, 19]), torch.Size([8, 63337, 19]), torch.Size([8, 31670, 19])]
	Split EEG_Orig_01 Tensor power into [torch.Size([8, 22167, 19]), torch.Size([8, 6333, 19]), torch.Size([8, 3168, 19])]
	Split EEG_Orig_01 Tensor mean into [torch.Size([8, 22167, 19])