<a href="https://colab.research.google.com/github/laurenneal/capstone-visual-neuroscience/blob/Lauren/Feature_Engineering_from_Raw_Data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import sys
from numpy.ma.core import ceil, floor
from more_itertools import sliced

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# FUNCTIONS (Just for setting up functions, not running code)
This section contains functions that can be used on datasets for feature engineering

## Train Test Split on Stacks

In [None]:
def trainTestSplit(df, splt = [.7,.2,.1]):
  # splits on roi, rois accross stacks will have the same group (important if stacks are from the same video)
  # split is the train test valuidation split (proportions given in that order)
  if sum(splt) < 0.9999 or sum(splt) > 1.0001:
    sys.exit('Splt must add to 1')
  
  num_rois = len(df.index.unique(level='roi'))

  tr_ind = int(ceil(num_rois*splt[0]))
  ts_ind = int(ceil(num_rois*splt[1])) + tr_ind

  ar = np.arange(num_rois)
  np.random.shuffle(ar)
  ar = ar+1 # roi id's start from 1

  train = ar[:tr_ind]
  test = ar[tr_ind:ts_ind]
  val = ar[ts_ind:]

  for i in train:
    df.loc[pd.IndexSlice[:, :, i],'training'] = 'train'

  for i in test:
    df.loc[pd.IndexSlice[:, :, i],'training'] = 'test'

  for i in val:
    df.loc[pd.IndexSlice[:, :, i],'training'] = 'validate'

  return(df)

In [None]:
# df_temp = trainTestSplit(raw_df,splt=[.5,.3,.3]) # should give error
df_temp = trainTestSplit(raw_df,splt=[.5,.3,.2]) # need to pull in sample data
df_temp.sample(15)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,filename,resp,stim1,stim2,stim3,training
stack,label,roi,frame,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
9,tm1,7,5083,210815_0_9_stackRaw_mc_mix1_syt_result_2022021...,0.128334,0.250706,165.740607,0.082373,train
5,tm2,10,4188,210815_0_5_stackRaw_mc_mix1_syt_result_2022021...,0.609664,-0.08391,-0.541267,0.050309,test
9,tm2,4,1616,210815_0_9_stackRaw_mc_mix1_syt_result_2022021...,-0.138847,0.252253,-90.414106,0.040229,train
10,t5,12,2259,210815_0_10_stackRaw_mc_mix1_syt_result_202202...,-0.003006,-0.108252,-26.373534,0.024327,train
3,tm4,9,2677,210815_0_3_stackRaw_mc_mix1_syt_result_2022021...,0.22833,0.528554,100.160078,0.058583,test
10,tm4,15,2142,210815_0_10_stackRaw_mc_mix1_syt_result_202202...,-0.178556,0.414275,0.0,0.0,test
7,t5,13,4342,210815_0_7_stackRaw_mc_mix1_syt_result_2022021...,0.034298,0.069427,-102.343741,0.038521,train
4,tm1,7,2294,210815_0_4_stackRaw_mc_mix1_syt_result_2022021...,0.675795,-0.067587,56.443104,0.009296,train
5,t5,13,2891,210815_0_5_stackRaw_mc_mix1_syt_result_2022021...,-0.077628,-0.11301,-106.558525,0.04597,train
10,t5,12,3215,210815_0_10_stackRaw_mc_mix1_syt_result_202202...,0.030358,-0.156187,-84.624545,0.005174,train


## Combine Frames into Temporal Chunks - NOT Working

In [None]:
# select frame size
temporal_period_length = 200

In [None]:

def groupFrames(df, period):
  # inputs are data frame and temporal period length
  # df needs to be sorted correctly, the indexing should take care of that

  # check that all rois are the same size
  # if this is a problem we can write a more computationally heavy workthrough
  if (raw_df.groupby(['stack','roi']).count()['filename'].max() != raw_df.groupby(['stack','roi']).count()['filename'].min()):
    sys.exit('ROIs need to be the same length')

  num_stacks = len(df.index.unique(level='stack'))
  num_roi = len(df.index.unique(level='roi'))

  # get first label
  lbl_1 = raw_df.head(1).index.values[0][1]
  # use it to find the number of frames in a roi
  num_frames = raw_df.loc[(1,lbl_1,1),'filename'].count()
  # how many groups will ther be in each roi?
  num_pds = int(floor(num_frames/period))
  # how many left over
  remainder_pds = num_frames%period

  # iterate a column for one roi
  lst = [[li + 1] * period for li in range(0,num_pds)]
  lst = [li for sublist in lst for li in sublist] # https://stackoverflow.com/questions/952914/how-to-make-a-flat-list-out-of-a-list-of-lists
  lst = lst + [lst[-1] + 1] * remainder_pds
  lst

  new_col = np.array(lst * num_stacks * num_roi)
  
  df['frame_group'] = new_col

  return(df)

In [None]:
# df_temp2 = groupFrames(raw_df,temporal_period_length)
# df_temp2

## Stim 1 feature engineering

In [None]:
from operator import setitem
def stimResponse(df, stim, response, zero_point = 0):
  """
  Takes a df, the name of the col holding the stimulus, the name of the col holding the response, and the value to use as the zero point

  Returns the df with the relationship type and the relation value added

  A different function will calculate the relationship between time periods
  """


  # takes a stim column and response column and returns two new feature column
  # PR is Positive Response
  # NR is Negative Response
  # PS is Positive Stimulus
  # NS is Negative Stimulus
  df_in = df.copy()
  #
  PR = df_in[response] > zero_point
  NR = df_in[response] <= zero_point
  PS = df_in[stim] > zero_point
  NS = df_in[stim] <= zero_point

  PR = PR.rename('PR')
  NR = NR.rename('NR')
  PS = PS.rename('PS')
  NS = NS.rename('NS')

  # combine into df
  df = pd.concat([df_in[stim], df_in[response], PR,NR,PS,NS], axis=1)
  #set the data types back to boolean
  df = df.astype({'PR': bool,
                'PS': bool,
                'NR': bool,
                'PR': bool})

  # get min and max values for normalizing
  S_min = df[stim].min()
  S_max = df[stim].max()
  R_min = df[response].min()
  R_max = df[response].max()

  #add a column normalizing the stim and response
  df['stim_norm'] = df[stim].apply(lambda x: x/S_max if x>0 else x/S_min)
  df['resp_norm'] = df[response].apply(lambda x: x/R_max if x>0 else x/R_min)


  # multiply stim and resp
  df['relation'] = df['stim_norm'] * df['resp_norm']

  #Set a new column with the category each row's relationship falls under
  df.loc[(df['PR'] & df['PS']),'relation_type'] = 'PRPS'
  df.loc[(df['NR'] & df['NS']),'relation_type'] = 'NRNS'
  df.loc[(df['PR'] & df['NS']),'relation_type'] = 'PRNS'
  df.loc[(df['NR'] & df['PS']),'relation_type'] = 'NRPS'

  #UNCOMMENT this to create the len-4 matrix for the current time period directly

  # #one-hot encode the four categories
  # df = pd.get_dummies(df, columns=['relation_type'], prefix = stim)

  # #get a list of the columns created
  # dummy_cols = [col for col in df.columns if 'relation_type_' in col]

  # #multiply the dummy columns by the relation value to distribute the value to the appropriate column
  # for col in dummy_cols:
  #   df[col] = df[col] * df['relation']
  
  df_in['relation_type_' + stim] = df['relation_type']
  df_in['relation_' + stim] = df['relation']
  return(df_in)

In [None]:
#pass in the raw df, the name of the stimulus we want to use, the response column, and the zero-value (in this case zero)
df_temp_3 = stimResponse(raw_df, 'stim1', 'resp', 0)

In [None]:
df_temp_3.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,filename,resp,stim1,stim2,stim3,training,relation_type_stim1,relation_stim1
stack,label,roi,frame,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,tm2,1,0,210815_0_1_stackRaw_mc_mix1_syt_result_2022021...,-0.106575,-0.395877,160.44796,0.110063,train,NRNS,0.044529
1,tm2,1,1,210815_0_1_stackRaw_mc_mix1_syt_result_2022021...,0.244535,-0.395877,0.0,0.0,train,PRNS,0.023493
1,tm2,1,2,210815_0_1_stackRaw_mc_mix1_syt_result_2022021...,0.398458,-0.395877,0.0,0.0,train,PRNS,0.03828
1,tm2,1,3,210815_0_1_stackRaw_mc_mix1_syt_result_2022021...,0.61929,-0.138699,170.252676,0.03079,train,PRNS,0.020845
1,tm2,1,4,210815_0_1_stackRaw_mc_mix1_syt_result_2022021...,0.627615,0.890013,-62.485104,0.122134,train,PRPS,0.13475


In [None]:
def get_future_stim_and_relationship(df_in, stim, time_window): 
  """
  takes a df, the stimulus name we're comparing, and the time window we're looking to in the future (in frames)

  returns a new df with two extra columns containing the relationship type and the value at the future time period
  """

  #make copy of the df to avoid editing the one in memory
  df = df_in.copy()

  #procedurally generate the column names we need to reference
  current_relation_colname = 'relation_'+stim
  current_relation_type_colname = 'relation_type_'+stim

  #and the column names we're creating
  future_relation_colname = 'relation_'+stim+'_+'+str(time_window)
  future_relation_type_colname = 'relation_type_'+stim+'_+'+str(time_window)

  #reset the index so that we can join the new columns later
  df = df.reset_index()


  # for each of the new columns, do four steps

  #get the existing array of relation values or relation types

  #cut off the first x values, where x is the time window we want to look in the future (done by indexing [time_window:])
        #this will drop the time x values in the future to line up with the current time

  #pad the end of the array with zeros by the same number of values so that the lengths still match (done by concatenating with np.zeros(time_window))

  #set the new array as the new 'future' column
  df[future_relation_colname] = pd.concat([df[current_relation_colname] \
                                           .reset_index(drop=True)[time_window:], \
                                           pd.Series((np.zeros(time_window)))], ignore_index = True)
  
  #repeat for the relation type
  df[future_relation_type_colname] = pd.concat([df[current_relation_type_colname] \
                                           .reset_index(drop=True)[time_window:], \
                                           pd.Series((np.zeros(time_window)))], ignore_index = True)

  #need to stop the end of one stack from referencing the beginning of another

  #get the index of any row where the frame # is high enough that x-frames in the future would run into another stack
  #by checking that the frame number is greater than the max frame number - (time window-1) (corrects for index number)
  end_of_stack_index = df.loc[df['frame'] >= max(df['frame'])-(time_window-1)].index

  #use .loc to set the two new columns to None for those rows
  df.loc[end_of_stack_index, future_relation_colname] = None
  df.loc[end_of_stack_index, future_relation_type_colname] = None

  #set the index back to the way it was
  df = df.set_index(['stack','label','roi','frame'])

  return df

In [None]:
#check that the values match the records 10 in the future and that the last 10 rows in each stack are null for future values
df_temp_4 = get_future_stim_and_relationship(df_temp_3, 'stim1', 10)
df_temp_4.tail(30)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,filename,resp,stim1,stim2,stim3,training,relation_type_stim1,relation_stim1,relation_stim1_+10,relation_type_stim1_+10
stack,label,roi,frame,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
10,tm4,15,5483,210815_0_10_stackRaw_mc_mix1_syt_result_202202...,0.410775,-0.92895,0.0,0.0,test,PRNS,0.092603,0.000778,PRNS
10,tm4,15,5484,210815_0_10_stackRaw_mc_mix1_syt_result_202202...,0.597176,-0.92895,0.0,0.0,test,PRNS,0.134624,0.034858,PRPS
10,tm4,15,5485,210815_0_10_stackRaw_mc_mix1_syt_result_202202...,0.691033,-0.385567,127.344289,0.04152338,test,PRNS,0.064659,0.030826,PRPS
10,tm4,15,5486,210815_0_10_stackRaw_mc_mix1_syt_result_202202...,0.783472,0.157816,102.290206,0.0503442,test,PRPS,0.029827,0.026053,PRPS
10,tm4,15,5487,210815_0_10_stackRaw_mc_mix1_syt_result_202202...,0.6817,0.157816,0.0,0.0,test,PRPS,0.025953,0.003851,NRNS
10,tm4,15,5488,210815_0_10_stackRaw_mc_mix1_syt_result_202202...,0.645437,0.157816,0.0,0.0,test,PRPS,0.024572,0.009391,PRNS
10,tm4,15,5489,210815_0_10_stackRaw_mc_mix1_syt_result_202202...,0.621711,-0.099867,-62.608303,0.008519235,test,PRNS,0.015067,0.002789,PRNS
10,tm4,15,5490,210815_0_10_stackRaw_mc_mix1_syt_result_202202...,0.477412,-0.35755,66.36129,0.0074747,test,PRNS,0.041425,0.030966,PRNS
10,tm4,15,5491,210815_0_10_stackRaw_mc_mix1_syt_result_202202...,0.302191,-0.35755,14.624876,0.0001330373,test,PRNS,0.026221,0.020704,PRNS
10,tm4,15,5492,210815_0_10_stackRaw_mc_mix1_syt_result_202202...,0.278669,-0.345466,-12.361882,0.001833777,test,PRNS,0.023363,0.031913,PRNS


In [None]:
def current_future_relationship(df, stim, time_window):
  """
  Takes df with a column for stim and a column for stim + a given time window. Returns the relationship.
  """
  # column names for current row
  current_relation_colname = 'relation_'+stim
  current_relation_type_colname = 'relation_type_'+stim

  # column names for rows in the future
  future_relation_colname = 'relation_'+stim+'_+'+str(time_window)
  future_relation_type_colname = 'relation_type_'+stim+'_+'+str(time_window)

  #with this approach, we need to drop the last x frames from each stack, so dropna
  df = df.dropna()

  #make a new column with the combo of current relation type and future relation type
  df['full_relation_type'] = df[current_relation_type_colname] + '-' + df[future_relation_type_colname]

  #one-hot encode the overall relationship, then multiply the new columns by the product of the relation values
  df = pd.get_dummies(df, columns=['full_relation_type'], prefix = current_relation_colname)

  #get a list of the columns created
  dummy_cols = [col for col in df.columns if current_relation_colname in col]

  #multiply the dummy columns by the relation value to distribute the value to the appropriate column
  for col in dummy_cols:
    df[col] = df[col] * (df[current_relation_colname]*df[future_relation_colname])

  return df

In [None]:
df = df_temp_4

In [None]:
# # #make a new column with the combo of current relation type and future relation type
# df['full_relation_type'] = df['relation_type_stim1'] + '-' + df['relation_type_stim1_+10']
# df.head()

In [None]:
# #one-hot encode the overall relationship, then multiply the new columns by the product of the relation values

# df = pd.get_dummies(df, columns=['full_relation_type'], prefix = 'relation_stim1')

# #get a list of the columns created
# dummy_cols = [col for col in df.columns if 'relation_stim1' in col]

# #multiply the dummy columns by the relation value to distribute the value to the appropriate column
# for col in dummy_cols:
#   df[col] = df[col] * (df['relation_stim1']*df['relation_stim1_+10'])

# df.head(10) 

In [None]:
# df.to_csv('drive/MyDrive/DS6011_Capstone_VisualNeuroscience/Seeded_CNMF/Extracted_Features/210815_0_20220213T070259_sparse_stim1_draft.csv')

In [None]:
df_temp_5 = current_future_relationship(df_temp_4, 'stim1', 10)
df_temp_5.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,filename,resp,stim1,stim2,stim3,training,relation_type_stim1,relation_stim1,relation_stim1_+10,relation_type_stim1_+10,...,relation_stim1_NRPS-PRNS,relation_stim1_NRPS-PRPS,relation_stim1_PRNS-NRNS,relation_stim1_PRNS-NRPS,relation_stim1_PRNS-PRNS,relation_stim1_PRNS-PRPS,relation_stim1_PRPS-NRNS,relation_stim1_PRPS-NRPS,relation_stim1_PRPS-PRNS,relation_stim1_PRPS-PRPS
stack,label,roi,frame,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
1,tm2,1,0,210815_0_1_stackRaw_mc_mix1_syt_result_2022021...,-0.106575,-0.395877,160.44796,0.110063,train,NRNS,1e-06,3.629198e-13,PRNS,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,tm2,1,1,210815_0_1_stackRaw_mc_mix1_syt_result_2022021...,0.244535,-0.395877,0.0,0.0,train,PRNS,2e-05,2.510677e-08,PRPS,...,0.0,0.0,0.0,0.0,0.0,4.946459e-13,0.0,0.0,0.0,0.0
1,tm2,1,2,210815_0_1_stackRaw_mc_mix1_syt_result_2022021...,0.398458,-0.395877,0.0,0.0,train,PRNS,0.000188,3.087067e-06,PRPS,...,0.0,0.0,0.0,0.0,0.0,5.799064e-10,0.0,0.0,0.0,0.0
1,tm2,1,3,210815_0_1_stackRaw_mc_mix1_syt_result_2022021...,0.61929,-0.138699,170.252676,0.03079,train,PRNS,4.2e-05,3.889907e-07,PRPS,...,0.0,0.0,0.0,0.0,0.0,1.628974e-11,0.0,0.0,0.0,0.0
1,tm2,1,4,210815_0_1_stackRaw_mc_mix1_syt_result_2022021...,0.627615,0.890013,-62.485104,0.122134,train,PRPS,0.001595,1.23045e-05,PRPS,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.962413e-08


In [None]:
! ls drive/MyDrive/DS6011_Capstone_VisualNeuroscience/Seeded_CNMF/Extracted_Features/

# RUN FUNCTIONS ON DATA

In [None]:
# raw_df = pd.read_csv('drive/MyDrive/DS6011_Capstone_VisualNeuroscience/Seeded_CNMF/Extracted_Features/210815_0__20220213T070259RAW_stimulus_data.csv', index_col=['stack','label','roi','frame']).drop(columns='Unnamed: 0')
# raw_df

In [None]:
# get all raw DF's
from os import listdir
root = 'drive/MyDrive/DS6011_Capstone_VisualNeuroscience/Seeded_CNMF/Extracted_Features/'
raw_fnames = listdir(root)
raw_fnames = [x for x in raw_fnames if 'RAW_stimulus' in x]
raw_fnames

['210815_0__20220213T070259RAW_stimulus_data.csv',
 '210816_0_20220304T112124RAW_stimulus_data.csv',
 '210816_1_20220304T113821RAW_stimulus_data.csv',
 '210728_0_20220304T003321RAW_stimulus_data.csv',
 '210731_0_20220304T005413RAW_stimulus_data.csv']

In [None]:
# Dictionary of df's. One for each video. You can call each df my name
raw_dfs = {}
for p in raw_fnames:
  df = pd.read_csv(root + p, index_col=['stack','label','roi','frame']).drop(columns='Unnamed: 0')
  raw_dfs[p] = df
raw_dfs[raw_fnames[-1]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,filename,resp,stim1,stim2,stim3
stack,label,roi,frame,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,t5,1,0,210731_0_1_stackRaw_mc_mix2_syt_result_2022030...,0.713073,-0.388456,-178.575598,0.076285
1,t5,1,1,210731_0_1_stackRaw_mc_mix2_syt_result_2022030...,0.540231,-0.388456,0.000000,0.000000
1,t5,1,2,210731_0_1_stackRaw_mc_mix2_syt_result_2022030...,0.612379,-0.388456,0.000000,0.000000
1,t5,1,3,210731_0_1_stackRaw_mc_mix2_syt_result_2022030...,0.690993,0.342613,74.915548,0.049884
1,t5,1,4,210731_0_1_stackRaw_mc_mix2_syt_result_2022030...,0.564123,0.829992,45.791075,0.076806
...,...,...,...,...,...,...,...,...
11,t5,7,5508,210731_0_11_stackRaw_mc_mix2_syt_result_202203...,0.306624,0.240081,0.000000,0.000000
11,t5,7,5509,210731_0_11_stackRaw_mc_mix2_syt_result_202203...,0.209693,0.483895,-98.917787,0.026947
11,t5,7,5510,210731_0_11_stackRaw_mc_mix2_syt_result_202203...,0.594176,0.581421,-61.715702,0.009356
11,t5,7,5511,210731_0_11_stackRaw_mc_mix2_syt_result_202203...,0.361334,0.581421,0.000000,0.000000


In [None]:
raw_dfs.keys()

dict_keys(['210815_0__20220213T070259RAW_stimulus_data.csv', '210816_0_20220304T112124RAW_stimulus_data.csv', '210816_1_20220304T113821RAW_stimulus_data.csv', '210728_0_20220304T003321RAW_stimulus_data.csv', '210731_0_20220304T005413RAW_stimulus_data.csv'])

## For each df, run through pipeline

In [None]:
stim = 'stim1'
response = 'resp'
time_window = 10
for p in raw_fnames:
  df = raw_dfs[p]
  df = stimResponse(df, stim, response, zero_point = 0)
  df = get_future_stim_and_relationship(df, stim, time_window)
  df = current_future_relationship(df, stim, time_window)
  raw_dfs[p] = df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [None]:
raw_dfs[raw_fnames[0]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,filename,resp,stim1,stim2,stim3,relation_type_stim1,relation_stim1,relation_stim1_+10,relation_type_stim1_+10,relation_stim1_NRNS-NRNS,...,relation_stim1_NRPS-PRNS,relation_stim1_NRPS-PRPS,relation_stim1_PRNS-NRNS,relation_stim1_PRNS-NRPS,relation_stim1_PRNS-PRNS,relation_stim1_PRNS-PRPS,relation_stim1_PRPS-NRNS,relation_stim1_PRPS-NRPS,relation_stim1_PRPS-PRNS,relation_stim1_PRPS-PRPS
stack,label,roi,frame,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
1,tm2,1,0,210815_0_1_stackRaw_mc_mix1_syt_result_2022021...,-0.106575,-0.395877,160.447960,1.100629e-01,NRNS,7.379508e-08,1.989011e-15,PRNS,0.0,...,0.0,0.0,0.0,0.0,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.000000e+00
1,tm2,1,1,210815_0_1_stackRaw_mc_mix1_syt_result_2022021...,0.244535,-0.395877,0.000000,0.000000e+00,PRNS,4.763116e-07,5.075006e-11,PRPS,0.0,...,0.0,0.0,0.0,0.0,0.000000e+00,2.417284e-17,0.0,0.0,0.0,0.000000e+00
1,tm2,1,2,210815_0_1_stackRaw_mc_mix1_syt_result_2022021...,0.398458,-0.395877,0.000000,0.000000e+00,PRNS,4.541501e-06,6.240104e-09,PRPS,0.0,...,0.0,0.0,0.0,0.0,0.000000e+00,2.833944e-14,0.0,0.0,0.0,0.000000e+00
1,tm2,1,3,210815_0_1_stackRaw_mc_mix1_syt_result_2022021...,0.619290,-0.138699,170.252676,3.079016e-02,PRNS,1.012424e-06,7.862941e-10,PRPS,0.0,...,0.0,0.0,0.0,0.0,0.000000e+00,7.960630e-16,0.0,0.0,0.0,0.000000e+00
1,tm2,1,4,210815_0_1_stackRaw_mc_mix1_syt_result_2022021...,0.627615,0.890013,-62.485104,1.221339e-01,PRPS,3.855797e-05,2.487194e-08,PRPS,0.0,...,0.0,0.0,0.0,0.0,0.000000e+00,0.000000e+00,0.0,0.0,0.0,9.590115e-13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10,tm4,15,5498,210815_0_10_stackRaw_mc_mix1_syt_result_202202...,0.072787,-0.531665,-140.057655,3.599874e-02,PRNS,3.389000e-08,7.158503e-13,PRPS,0.0,...,0.0,0.0,0.0,0.0,0.000000e+00,2.426017e-20,0.0,0.0,0.0,0.000000e+00
10,tm4,15,5499,210815_0_10_stackRaw_mc_mix1_syt_result_202202...,0.021619,-0.531665,81.091849,9.244374e-08,PRNS,5.524556e-09,3.984088e-13,PRNS,0.0,...,0.0,0.0,0.0,0.0,2.201031e-21,0.000000e+00,0.0,0.0,0.0,0.000000e+00
10,tm4,15,5500,210815_0_10_stackRaw_mc_mix1_syt_result_202202...,0.240001,-0.531665,0.000000,0.000000e+00,PRNS,6.760862e-07,4.808052e-11,PRNS,0.0,...,0.0,0.0,0.0,0.0,3.250657e-17,0.000000e+00,0.0,0.0,0.0,0.000000e+00
10,tm4,15,5501,210815_0_10_stackRaw_mc_mix1_syt_result_202202...,0.191335,-0.445888,-136.187404,9.534815e-03,PRNS,2.377727e-07,1.046580e-11,PRNS,0.0,...,0.0,0.0,0.0,0.0,2.488482e-18,0.000000e+00,0.0,0.0,0.0,0.000000e+00


In [None]:
raw_dfs[raw_fnames[1]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,filename,resp,stim1,stim2,stim3,relation_type_stim1,relation_stim1,relation_stim1_+10,relation_type_stim1_+10,relation_stim1_NRNS-NRNS,...,relation_stim1_NRPS-PRNS,relation_stim1_NRPS-PRPS,relation_stim1_PRNS-NRNS,relation_stim1_PRNS-NRPS,relation_stim1_PRNS-PRNS,relation_stim1_PRNS-PRPS,relation_stim1_PRPS-NRNS,relation_stim1_PRPS-NRPS,relation_stim1_PRPS-PRNS,relation_stim1_PRPS-PRPS
stack,label,roi,frame,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
1,tm2,1,0,210816_0_1_stackRaw_mc_result_20220304T112124.h5,-0.326919,-0.308124,-89.396360,0.136451,NRNS,3.932784e-04,1.192360e-06,NRNS,4.689294e-10,...,0.0,0.0,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0
1,tm2,1,1,210816_0_1_stackRaw_mc_result_20220304T112124.h5,-0.207673,-0.308124,0.000000,0.000000,NRNS,9.050242e-05,8.923287e-08,NRPS,0.000000e+00,...,0.0,0.0,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0
1,tm2,1,2,210816_0_1_stackRaw_mc_result_20220304T112124.h5,-0.135514,-0.308124,0.000000,0.000000,NRNS,5.765411e-05,1.272394e-07,NRPS,0.000000e+00,...,0.0,0.0,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0
1,tm2,1,3,210816_0_1_stackRaw_mc_result_20220304T112124.h5,-0.299841,-0.020208,159.307519,0.018253,NRNS,1.056250e-06,1.764626e-09,NRPS,0.000000e+00,...,0.0,0.0,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0
1,tm2,1,4,210816_0_1_stackRaw_mc_result_20220304T112124.h5,-0.127388,0.267709,115.439141,0.071810,NRPS,3.287368e-05,5.406300e-08,NRPS,0.000000e+00,...,0.0,0.0,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10,tm2,12,5498,210816_0_10_stackRaw_mc_result_20220304T112124.h5,0.852713,-0.146144,-26.531723,0.014386,PRNS,3.772792e-05,2.523875e-07,NRPS,0.000000e+00,...,0.0,0.0,0.0,9.522057e-12,0.0,0.0,0.0,0.0,0.0,0.0
10,tm2,12,5499,210816_0_10_stackRaw_mc_result_20220304T112124.h5,0.361943,-0.146144,107.561423,0.000018,PRNS,1.219673e-06,2.627008e-10,NRPS,0.000000e+00,...,0.0,0.0,0.0,3.204092e-16,0.0,0.0,0.0,0.0,0.0,0.0
10,tm2,12,5500,210816_0_10_stackRaw_mc_result_20220304T112124.h5,0.189078,-0.146144,32.019898,0.000015,PRNS,9.083170e-07,1.456939e-09,NRPS,0.000000e+00,...,0.0,0.0,0.0,1.323363e-15,0.0,0.0,0.0,0.0,0.0,0.0
10,tm2,12,5501,210816_0_10_stackRaw_mc_result_20220304T112124.h5,0.042800,-0.292261,120.708901,0.020317,PRNS,1.136693e-07,6.799551e-11,NRPS,0.000000e+00,...,0.0,0.0,0.0,7.729000e-18,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
raw_dfs[raw_fnames[2]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,filename,resp,stim1,stim2,stim3,relation_type_stim1,relation_stim1,relation_stim1_+10,relation_type_stim1_+10,relation_stim1_NRNS-NRNS,...,relation_stim1_NRPS-PRNS,relation_stim1_NRPS-PRPS,relation_stim1_PRNS-NRNS,relation_stim1_PRNS-NRPS,relation_stim1_PRNS-PRNS,relation_stim1_PRNS-PRPS,relation_stim1_PRPS-NRNS,relation_stim1_PRPS-NRPS,relation_stim1_PRPS-PRNS,relation_stim1_PRPS-PRPS
stack,label,roi,frame,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
1,tm9,1,0,210816_1_1_stackRaw_mc_result_20220304T113821.h5,0.262507,-0.317137,-66.979573,0.150225,PRNS,1.922948e-06,2.835790e-11,PRNS,0.000000e+00,...,0.0,0.0,0.000000e+00,0.000000e+00,5.453075e-17,0.000000e+00,0.0,0.0,0.0,0.0
1,tm9,1,1,210816_1_1_stackRaw_mc_result_20220304T113821.h5,0.259463,-0.317137,0.000000,0.000000,PRNS,8.043972e-08,2.174934e-15,NRPS,0.000000e+00,...,0.0,0.0,0.000000e+00,1.749511e-22,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0
1,tm9,1,2,210816_1_1_stackRaw_mc_result_20220304T113821.h5,0.185675,-0.317137,0.000000,0.000000,PRNS,1.807876e-07,9.415279e-14,PRPS,0.000000e+00,...,0.0,0.0,0.000000e+00,0.000000e+00,0.000000e+00,1.702166e-20,0.0,0.0,0.0,0.0
1,tm9,1,3,210816_1_1_stackRaw_mc_result_20220304T113821.h5,0.195478,-0.306574,-169.115598,0.003716,PRNS,1.648514e-06,6.653735e-11,PRPS,0.000000e+00,...,0.0,0.0,0.000000e+00,0.000000e+00,0.000000e+00,1.096878e-16,0.0,0.0,0.0,0.0
1,tm9,1,4,210816_1_1_stackRaw_mc_result_20220304T113821.h5,0.123776,-0.296012,163.928414,0.007392,PRNS,1.759987e-06,5.795208e-10,NRPS,0.000000e+00,...,0.0,0.0,0.000000e+00,1.019949e-15,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10,tm1,10,5498,210816_1_10_stackRaw_mc_result_20220304T113821.h5,0.380766,-0.275614,-54.001499,0.023867,PRNS,4.137477e-05,1.118648e-07,NRNS,0.000000e+00,...,0.0,0.0,4.628379e-12,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0
10,tm1,10,5499,210816_1_10_stackRaw_mc_result_20220304T113821.h5,0.135014,-0.275614,0.000000,0.000000,PRNS,8.867442e-07,6.966195e-11,NRNS,0.000000e+00,...,0.0,0.0,6.177233e-17,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0
10,tm1,10,5500,210816_1_10_stackRaw_mc_result_20220304T113821.h5,0.218645,-0.275614,0.000000,0.000000,PRNS,4.824334e-08,1.631072e-15,NRNS,0.000000e+00,...,0.0,0.0,7.868838e-23,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0
10,tm1,10,5501,210816_1_10_stackRaw_mc_result_20220304T113821.h5,-0.019655,-0.130126,125.837828,0.013383,NRNS,1.882561e-08,5.208446e-12,NRNS,9.805217e-20,...,0.0,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0


In [None]:
raw_dfs[raw_fnames[3]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,filename,resp,stim1,stim2,stim3,relation_type_stim1,relation_stim1,relation_stim1_+10,relation_type_stim1_+10,relation_stim1_NRNS-NRNS,...,relation_stim1_NRPS-PRNS,relation_stim1_NRPS-PRPS,relation_stim1_PRNS-NRNS,relation_stim1_PRNS-NRPS,relation_stim1_PRNS-PRNS,relation_stim1_PRNS-PRPS,relation_stim1_PRPS-NRNS,relation_stim1_PRPS-NRPS,relation_stim1_PRPS-PRNS,relation_stim1_PRPS-PRPS
stack,label,roi,frame,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
1,tm9,1,0,210728_0_1_stackRaw_mc_tm2_tm9_syt_result_2022...,-0.021039,0.600920,-110.553431,0.333161,NRPS,3.849098e-08,7.779102e-15,PRNS,0.0,...,2.994253e-22,0.0,0.0,0.0,0.0,0.000000e+00,0.0,0.0,0.000000e+00,0.000000e+00
1,tm9,1,1,210728_0_1_stackRaw_mc_tm2_tm9_syt_result_2022...,-0.041107,0.239804,16.216478,0.033097,NRPS,6.516208e-08,1.021142e-13,NRNS,0.0,...,0.000000e+00,0.0,0.0,0.0,0.0,0.000000e+00,0.0,0.0,0.000000e+00,0.000000e+00
1,tm9,1,2,210728_0_1_stackRaw_mc_tm2_tm9_syt_result_2022...,-0.042191,-0.382424,111.599505,0.052631,NRNS,5.614675e-08,9.102391e-15,PRPS,0.0,...,0.000000e+00,0.0,0.0,0.0,0.0,0.000000e+00,0.0,0.0,0.000000e+00,0.000000e+00
1,tm9,1,3,210728_0_1_stackRaw_mc_tm2_tm9_syt_result_2022...,-0.008145,-0.450328,84.015116,0.007321,NRNS,3.736903e-08,1.004772e-12,PRNS,0.0,...,0.000000e+00,0.0,0.0,0.0,0.0,0.000000e+00,0.0,0.0,0.000000e+00,0.000000e+00
1,tm9,1,4,210728_0_1_stackRaw_mc_tm2_tm9_syt_result_2022...,0.000000,-0.408975,135.214141,0.012863,NRNS,0.000000e+00,0.000000e+00,PRPS,0.0,...,0.000000e+00,0.0,0.0,0.0,0.0,0.000000e+00,0.0,0.0,0.000000e+00,0.000000e+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9,tm4,12,5498,210728_0_9_stackRaw_mc_tm2_tm9_syt_result_2022...,0.304771,0.482281,-124.617985,0.029550,PRPS,3.606231e-05,6.716463e-09,PRNS,0.0,...,0.000000e+00,0.0,0.0,0.0,0.0,0.000000e+00,0.0,0.0,2.422112e-13,0.000000e+00
9,tm4,12,5499,210728_0_9_stackRaw_mc_tm2_tm9_syt_result_2022...,0.572298,0.613607,-3.163166,0.001918,PRPS,5.428081e-05,7.030173e-10,PRPS,0.0,...,0.000000e+00,0.0,0.0,0.0,0.0,0.000000e+00,0.0,0.0,0.000000e+00,3.816035e-14
9,tm4,12,5500,210728_0_9_stackRaw_mc_tm2_tm9_syt_result_2022...,0.460452,0.332815,-95.416102,0.015904,PRPS,3.546345e-06,5.405920e-12,PRPS,0.0,...,0.000000e+00,0.0,0.0,0.0,0.0,0.000000e+00,0.0,0.0,0.000000e+00,1.917126e-17
9,tm4,12,5501,210728_0_9_stackRaw_mc_tm2_tm9_syt_result_2022...,0.626407,0.016469,136.837402,0.043392,PRPS,5.749383e-08,1.121622e-12,PRNS,0.0,...,0.000000e+00,0.0,0.0,0.0,0.0,0.000000e+00,0.0,0.0,6.448637e-20,0.000000e+00


In [None]:
raw_dfs[raw_fnames[4]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,filename,resp,stim1,stim2,stim3,relation_type_stim1,relation_stim1,relation_stim1_+10,relation_type_stim1_+10,relation_stim1_NRNS-NRNS,...,relation_stim1_NRPS-PRNS,relation_stim1_NRPS-PRPS,relation_stim1_PRNS-NRNS,relation_stim1_PRNS-NRPS,relation_stim1_PRNS-PRNS,relation_stim1_PRNS-PRPS,relation_stim1_PRPS-NRNS,relation_stim1_PRPS-NRPS,relation_stim1_PRPS-PRNS,relation_stim1_PRPS-PRPS
stack,label,roi,frame,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
1,t5,1,0,210731_0_1_stackRaw_mc_mix2_syt_result_2022030...,0.713073,-0.388456,-178.575598,0.076285,PRNS,2.269073e-04,1.770255e-06,PRNS,0.0,...,0.0,0.000000e+00,0.0,0.0,4.016837e-10,0.000000e+00,0.0,0.0,0.0,0.000000e+00
1,t5,1,1,210731_0_1_stackRaw_mc_mix2_syt_result_2022030...,0.540231,-0.388456,0.000000,0.000000,PRNS,6.462188e-05,1.241214e-07,PRPS,0.0,...,0.0,0.000000e+00,0.0,0.0,0.000000e+00,8.020957e-12,0.0,0.0,0.0,0.000000e+00
1,t5,1,2,210731_0_1_stackRaw_mc_mix2_syt_result_2022030...,0.612379,-0.388456,0.000000,0.000000,PRNS,3.932846e-04,1.694598e-05,PRPS,0.0,...,0.0,0.000000e+00,0.0,0.0,0.000000e+00,6.664593e-09,0.0,0.0,0.0,0.000000e+00
1,t5,1,3,210731_0_1_stackRaw_mc_mix2_syt_result_2022030...,0.690993,0.342613,74.915548,0.049884,PRPS,3.524002e-04,1.284749e-05,PRPS,0.0,...,0.0,0.000000e+00,0.0,0.0,0.000000e+00,0.000000e+00,0.0,0.0,0.0,4.527459e-09
1,t5,1,4,210731_0_1_stackRaw_mc_mix2_syt_result_2022030...,0.564123,0.829992,45.791075,0.076806,PRPS,1.284561e-03,4.067193e-05,PRPS,0.0,...,0.0,0.000000e+00,0.0,0.0,0.000000e+00,0.000000e+00,0.0,0.0,0.0,5.224557e-08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11,t5,7,5498,210731_0_11_stackRaw_mc_mix2_syt_result_202203...,0.691645,0.202075,62.322086,0.038643,PRPS,8.592916e-06,1.533412e-09,PRPS,0.0,...,0.0,0.000000e+00,0.0,0.0,0.000000e+00,0.000000e+00,0.0,0.0,0.0,1.317648e-14
11,t5,7,5499,210731_0_11_stackRaw_mc_mix2_syt_result_202203...,-0.234363,0.203980,87.566001,0.005355,NRPS,2.592433e-06,8.789540e-10,PRPS,0.0,...,0.0,2.278630e-15,0.0,0.0,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.000000e+00
11,t5,7,5500,210731_0_11_stackRaw_mc_mix2_syt_result_202203...,0.841037,0.201777,165.228125,0.005023,PRPS,5.945178e-05,2.336503e-07,PRPS,0.0,...,0.0,0.000000e+00,0.0,0.0,0.000000e+00,0.000000e+00,0.0,0.0,0.0,1.389093e-11
11,t5,7,5501,210731_0_11_stackRaw_mc_mix2_syt_result_202203...,0.686215,-0.109233,156.838327,0.049430,PRNS,7.171723e-06,1.042349e-08,PRPS,0.0,...,0.0,0.000000e+00,0.0,0.0,0.000000e+00,7.475436e-14,0.0,0.0,0.0,0.000000e+00


In [None]:
# train test split on movie (when saving out csv file)

## Remove outlier from 210815_0__20220213T070259RAW_stimulus_data.csv

In [None]:
#access the 11th roi in the first stack and drop all frames for that roi
raw_dfs['210815_0__20220213T070259RAW_stimulus_data.csv'] = raw_dfs['210815_0__20220213T070259RAW_stimulus_data.csv'].drop(raw_dfs['210815_0__20220213T070259RAW_stimulus_data.csv'].loc[1,:,11].index)
raw_dfs['210815_0__20220213T070259RAW_stimulus_data.csv']

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,filename,resp,stim1,stim2,stim3,relation_type_stim1,relation_stim1,relation_stim1_+10,relation_type_stim1_+10,relation_stim1_NRNS-NRNS,...,relation_stim1_NRPS-PRNS,relation_stim1_NRPS-PRPS,relation_stim1_PRNS-NRNS,relation_stim1_PRNS-NRPS,relation_stim1_PRNS-PRNS,relation_stim1_PRNS-PRPS,relation_stim1_PRPS-NRNS,relation_stim1_PRPS-NRPS,relation_stim1_PRPS-PRNS,relation_stim1_PRPS-PRPS
stack,label,roi,frame,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
1,tm2,1,0,210815_0_1_stackRaw_mc_mix1_syt_result_2022021...,-0.106575,-0.395877,160.447960,1.100629e-01,NRNS,7.379508e-08,1.989011e-15,PRNS,0.0,...,0.0,0.0,0.0,0.0,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.000000e+00
1,tm2,1,1,210815_0_1_stackRaw_mc_mix1_syt_result_2022021...,0.244535,-0.395877,0.000000,0.000000e+00,PRNS,4.763116e-07,5.075006e-11,PRPS,0.0,...,0.0,0.0,0.0,0.0,0.000000e+00,2.417284e-17,0.0,0.0,0.0,0.000000e+00
1,tm2,1,2,210815_0_1_stackRaw_mc_mix1_syt_result_2022021...,0.398458,-0.395877,0.000000,0.000000e+00,PRNS,4.541501e-06,6.240104e-09,PRPS,0.0,...,0.0,0.0,0.0,0.0,0.000000e+00,2.833944e-14,0.0,0.0,0.0,0.000000e+00
1,tm2,1,3,210815_0_1_stackRaw_mc_mix1_syt_result_2022021...,0.619290,-0.138699,170.252676,3.079016e-02,PRNS,1.012424e-06,7.862941e-10,PRPS,0.0,...,0.0,0.0,0.0,0.0,0.000000e+00,7.960630e-16,0.0,0.0,0.0,0.000000e+00
1,tm2,1,4,210815_0_1_stackRaw_mc_mix1_syt_result_2022021...,0.627615,0.890013,-62.485104,1.221339e-01,PRPS,3.855797e-05,2.487194e-08,PRPS,0.0,...,0.0,0.0,0.0,0.0,0.000000e+00,0.000000e+00,0.0,0.0,0.0,9.590115e-13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10,tm4,15,5498,210815_0_10_stackRaw_mc_mix1_syt_result_202202...,0.072787,-0.531665,-140.057655,3.599874e-02,PRNS,3.389000e-08,7.158503e-13,PRPS,0.0,...,0.0,0.0,0.0,0.0,0.000000e+00,2.426017e-20,0.0,0.0,0.0,0.000000e+00
10,tm4,15,5499,210815_0_10_stackRaw_mc_mix1_syt_result_202202...,0.021619,-0.531665,81.091849,9.244374e-08,PRNS,5.524556e-09,3.984088e-13,PRNS,0.0,...,0.0,0.0,0.0,0.0,2.201031e-21,0.000000e+00,0.0,0.0,0.0,0.000000e+00
10,tm4,15,5500,210815_0_10_stackRaw_mc_mix1_syt_result_202202...,0.240001,-0.531665,0.000000,0.000000e+00,PRNS,6.760862e-07,4.808052e-11,PRNS,0.0,...,0.0,0.0,0.0,0.0,3.250657e-17,0.000000e+00,0.0,0.0,0.0,0.000000e+00
10,tm4,15,5501,210815_0_10_stackRaw_mc_mix1_syt_result_202202...,0.191335,-0.445888,-136.187404,9.534815e-03,PRNS,2.377727e-07,1.046580e-11,PRNS,0.0,...,0.0,0.0,0.0,0.0,2.488482e-18,0.000000e+00,0.0,0.0,0.0,0.000000e+00


## Save out DF's with train/test labels

In [None]:
# SET THESE VALUES EACH TIME

folder_name = '10_frames_stim1' # create a new folder name for saving out these files (make sure this folder already exists)
num_train = 3
num_test = 2
num_val = 0

In [None]:
arr = np.array(['train'] * num_train + ['test'] * num_test + ['val'] * num_val)
np.random.shuffle(arr)

# save out each DF with a label indicating train, test or validation
for i, p in enumerate(raw_fnames):
  p_split = p.split('.')[-2] # chop off csv
  
  raw_dfs[p].to_csv(root + folder_name + '/' + p_split + '_' + arr[i] + '.csv') #saves to new folder

In [None]:
listdir(root + folder_name)

['210815_0__20220213T070259RAW_stimulus_data_test.csv',
 '210816_0_20220304T112124RAW_stimulus_data_train.csv',
 '210816_1_20220304T113821RAW_stimulus_data_train.csv',
 '210728_0_20220304T003321RAW_stimulus_data_test.csv',
 '210731_0_20220304T005413RAW_stimulus_data_train.csv']