<a href="https://colab.research.google.com/github/laurenneal/capstone-visual-neuroscience/blob/main/Feature_Engineering_from_Raw_Data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [107]:
import pandas as pd
import numpy as np
import sys
from numpy.ma.core import ceil, floor
from more_itertools import sliced

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# FUNCTIONS (Just for setting up functions, not running code)
This section contains functions that can be used on datasets for feature engineering

## Train Test Split on Stacks

In [81]:
def trainTestSplit(df, splt = [.7,.2,.1]):
  # splits on roi, rois accross stacks will have the same group (important if stacks are from the same video)
  # split is the train test valuidation split (proportions given in that order)
  if sum(splt) < 0.9999 or sum(splt) > 1.0001:
    sys.exit('Splt must add to 1')
  
  num_rois = len(df.index.unique(level='roi'))

  tr_ind = int(ceil(num_rois*splt[0]))
  ts_ind = int(ceil(num_rois*splt[1])) + tr_ind

  ar = np.arange(num_rois)
  np.random.shuffle(ar)
  ar = ar+1 # roi id's start from 1

  train = ar[:tr_ind]
  test = ar[tr_ind:ts_ind]
  val = ar[ts_ind:]

  for i in train:
    df.loc[pd.IndexSlice[:, :, i],'training'] = 'train'

  for i in test:
    df.loc[pd.IndexSlice[:, :, i],'training'] = 'test'

  for i in val:
    df.loc[pd.IndexSlice[:, :, i],'training'] = 'validate'

  return(df)

## Combine Frames into Temporal Chunks - DO THIS LAST

In [98]:
def groupFrames(df, period):
  # inputs are data frame and number of periods per stack

  df = df.reset_index()
  # set a unique id for each roi stack combo
  df['long_id'] = df['movie_ID'] + '+' + df['stack'].astype('str') + '+' + df['roi'].astype('str')
  df = df.set_index('long_id')
  long_ids = df.index.unique()

  # for each unique roi we calculate a grouping index
  new_col = []
  for id in long_ids:
    # number of rows for each unique id
    num_rows = df.loc[id].shape[0]
    
    # how many frames per division to split the roi into number of periods
    frm_pd = int(floor(num_rows/period))

    # iterate a column for one roi
    lst = [[li + 1] * frm_pd for li in range(0,period)]
    lst = [li for sublist in lst for li in sublist] # https://stackoverflow.com/questions/952914/how-to-make-a-flat-list-out-of-a-list-of-lists

    if len(lst) > num_rows:
      lst = lst[:num_rows] # remove some on the back if needed
    elif len(lst) < num_rows:
      lst = lst + [lst[-1]] * (num_rows - len(lst)) # add a few extra on the end if needed

    # checks to make sure the list has the correct number of rows
    if len(lst) != num_rows:
      sys.exit('Error 1: Somethings wrong with this code')

    new_col += lst

  # new col length should match the column length
  if len(new_col) != df.shape[0]:
    sys.exit('Error 2: Somethings wrong with this code')

  df['frame_group'] = new_col

  return(df)

## Stim 1 feature engineering

In [84]:
from operator import setitem
def stimResponse(df, stim, response, zero_point = 0):
  """
  Takes a df, the name of the col holding the stimulus, the name of the col holding the response, and the value to use as the zero point

  Returns the df with the relationship type and the relation value added

  A different function will calculate the relationship between time periods
  """


  # takes a stim column and response column and returns two new feature column
  # PR is Positive Response
  # NR is Negative Response
  # PS is Positive Stimulus
  # NS is Negative Stimulus
  df_in = df.copy()
  #
  PR = df_in[response] > zero_point
  NR = df_in[response] <= zero_point
  PS = df_in[stim] > zero_point
  NS = df_in[stim] <= zero_point

  PR = PR.rename('PR')
  NR = NR.rename('NR')
  PS = PS.rename('PS')
  NS = NS.rename('NS')

  # combine into df
  df = pd.concat([df_in[stim], df_in[response], PR,NR,PS,NS], axis=1)
  #set the data types back to boolean
  df = df.astype({'PR': bool,
                'PS': bool,
                'NR': bool,
                'PR': bool})

  # get min and max values for normalizing
  S_min = df[stim].min()
  S_max = df[stim].max()
  R_min = df[response].min()
  R_max = df[response].max()

  #add a column normalizing the stim and response
  df['stim_norm'] = df[stim].apply(lambda x: x/S_max if x>0 else x/S_min)
  df['resp_norm'] = df[response].apply(lambda x: x/R_max if x>0 else x/R_min)


  # multiply stim and resp
  df['relation'] = df['stim_norm'] * df['resp_norm']

  #Set a new column with the category each row's relationship falls under
  df.loc[(df['PR'] & df['PS']),'relation_type'] = 'PRPS'
  df.loc[(df['NR'] & df['NS']),'relation_type'] = 'NRNS'
  df.loc[(df['PR'] & df['NS']),'relation_type'] = 'PRNS'
  df.loc[(df['NR'] & df['PS']),'relation_type'] = 'NRPS'

  #UNCOMMENT this to create the len-4 matrix for the current time period directly

  # #one-hot encode the four categories
  # df = pd.get_dummies(df, columns=['relation_type'], prefix = stim)

  # #get a list of the columns created
  # dummy_cols = [col for col in df.columns if 'relation_type_' in col]

  # #multiply the dummy columns by the relation value to distribute the value to the appropriate column
  # for col in dummy_cols:
  #   df[col] = df[col] * df['relation']
  
  df_in['relation_type_' + stim] = df['relation_type']
  df_in['relation_' + stim] = df['relation']
  return(df_in)

In [85]:
def get_future_stim_and_relationship(df_in, stim, time_window): 
  """
  takes a df, the stimulus name we're comparing, and the time window we're looking to in the future (in frames)

  returns a new df with two extra columns containing the relationship type and the value at the future time period
  """

  #make copy of the df to avoid editing the one in memory
  df = df_in.copy()

  #procedurally generate the column names we need to reference
  current_relation_colname = 'relation_'+stim
  current_relation_type_colname = 'relation_type_'+stim

  #and the column names we're creating
  future_relation_colname = 'relation_'+stim+'_+'+str(time_window)
  future_relation_type_colname = 'relation_type_'+stim+'_+'+str(time_window)

  #reset the index so that we can join the new columns later
  df = df.reset_index()


  # for each of the new columns, do four steps

  #get the existing array of relation values or relation types

  #cut off the first x values, where x is the time window we want to look in the future (done by indexing [time_window:])
        #this will drop the time x values in the future to line up with the current time

  #pad the end of the array with zeros by the same number of values so that the lengths still match (done by concatenating with np.zeros(time_window))

  #set the new array as the new 'future' column
  df[future_relation_colname] = pd.concat([df[current_relation_colname] \
                                           .reset_index(drop=True)[time_window:], \
                                           pd.Series((np.zeros(time_window)))], ignore_index = True)
  
  #repeat for the relation type
  df[future_relation_type_colname] = pd.concat([df[current_relation_type_colname] \
                                           .reset_index(drop=True)[time_window:], \
                                           pd.Series((np.zeros(time_window)))], ignore_index = True)

  #need to stop the end of one stack from referencing the beginning of another

  #get the index of any row where the frame # is high enough that x-frames in the future would run into another stack
  #by checking that the frame number is greater than the max frame number - (time window-1) (corrects for index number)
  end_of_stack_index = df.loc[df['frame'] >= max(df['frame'])-(time_window-1)].index

  #use .loc to set the two new columns to None for those rows
  df.loc[end_of_stack_index, future_relation_colname] = None
  df.loc[end_of_stack_index, future_relation_type_colname] = None

  #set the index back to the way it was
  df = df.set_index(['stack','label','roi','frame'])

  return df

In [86]:
def current_future_relationship(df, stim, time_window):
  """
  Takes df with a column for stim and a column for stim + a given time window. Returns the relationship.
  """
  # column names for current row
  current_relation_colname = 'relation_'+stim
  current_relation_type_colname = 'relation_type_'+stim

  # column names for rows in the future
  future_relation_colname = 'relation_'+stim+'_+'+str(time_window)
  future_relation_type_colname = 'relation_type_'+stim+'_+'+str(time_window)

  #with this approach, we need to drop the last x frames from each stack, so dropna
  df = df.dropna()

  #make a new column with the combo of current relation type and future relation type
  df['full_relation_type'] = df[current_relation_type_colname] + '-' + df[future_relation_type_colname]

  #one-hot encode the overall relationship, then multiply the new columns by the product of the relation values
  df = pd.get_dummies(df, columns=['full_relation_type'], prefix = current_relation_colname)

  #get a list of the columns created
  dummy_cols = [col for col in df.columns if current_relation_colname in col]

  #multiply the dummy columns by the relation value to distribute the value to the appropriate column
  for col in dummy_cols:
    df[col] = df[col] * (df[current_relation_colname]*df[future_relation_colname])

  return df

In [87]:
def directionalResponse(df, stim, response):
  """
  Takes a df, the name of the col holding the direction of motion stimulus, the name of the col holding the response
  Returns the df with the stim broken out into four cols based on direction of motion

  A different function will calculate the relationship between response and direction
  """


  # add a column that categorizes the direction into four quadrants (segments can be changed)
  df_in = df.copy()

  #set conditions checking the direction of the stimulus - there has to be a better way but this is it for now
  conditions = [
    ((-45 <= df_in[stim]) & (df_in[stim] < 0)), #northwest
    ((0 < df_in[stim]) & (df_in[stim] < 45)), #northeast
    ((45 <= df_in[stim]) & (df_in[stim] < 135)), #east
    ((135 <= df_in[stim]) & (df_in[stim] <= 180)),#southeast
    ((-180 <= df_in[stim]) & (df_in[stim] < -135)), #southwest
    ((-135 <= df_in[stim]) & (df_in[stim] < -45)), #west
    (df_in[stim] == 0) #no movement
    ]

  #set the names we assign to those conditions
  values = ['up', 'up', 'right', 'down', 'down', 'left', 'no_motion']

  #add a column to the df holding the direction category
  df['direction'] = np.select(conditions, values)

  #one-hot encode the quadrants
  df = pd.get_dummies(df, columns=['direction'], prefix = 'direction')

  #get the names of the 4 direction columns we just added
  direction_cols = [col for col in df.columns if 'direction' in col]

  #TODO - TEST SOME OPTIONS AND SEE HOW WE WANT TO REPRESENT THE RESPONSE TO THESE
  #STOPPING HERE FOR NOW WITH JUST THE DIRECTION ENCODED

  return(df)

# RUN FUNCTIONS ON DATA

In [None]:
# raw_df = pd.read_csv('drive/MyDrive/DS6011_Capstone_VisualNeuroscience/Seeded_CNMF/Extracted_Features/210815_0__20220213T070259RAW_stimulus_data.csv', index_col=['stack','label','roi','frame']).drop(columns='Unnamed: 0')
# raw_df

In [108]:
# get all raw DF's
from os import listdir
root = 'drive/MyDrive/DS6011_Capstone_VisualNeuroscience/Seeded_CNMF/Extracted_Features/'
raw_fnames = listdir(root + 'pre-feature extract/')
raw_fnames = [x for x in raw_fnames if 'RAW_extracted_data' in x]
raw_fnames

['210809_2_RAW_extracted_data.csv',
 '210728_0_RAW_extracted_data.csv',
 '210802_0_RAW_extracted_data.csv',
 '210808_0_RAW_extracted_data.csv',
 '210731_0_RAW_extracted_data.csv',
 '210815_1_RAW_extracted_data.csv',
 '210816_0_RAW_extracted_data.csv',
 '210815_0_RAW_extracted_data.csv',
 '210816_1_RAW_extracted_data.csv']

In [122]:
# Dictionary of df's. One for each video. You can call each df my name
raw_dfs = {}
for p in raw_fnames:
  df = pd.read_csv(root + 'pre-feature extract/' + p, index_col=['stack','label','roi','frame']).drop(columns='Unnamed: 0')
  raw_dfs[p] = df
raw_dfs[raw_fnames[-1]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,movie_ID,resp,stim1,stim2,stim3,stim4,stim5,stim6,stim7,stim8
stack,label,roi,frame,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,tm4,1,0,210816_1,0.153797,-2.195615,-1.954008,0.382234,0.0000,0.0,0.000699,0.000000,0.000000
1,tm4,1,1,210816_1,0.280574,-2.195615,0.777023,-0.001856,0.0000,0.0,-0.077014,0.000000,0.000000
1,tm4,1,2,210816_1,0.493612,-2.195615,0.777023,-0.001856,0.0000,0.0,-0.340523,0.000000,0.000000
1,tm4,1,3,210816_1,0.667897,-1.966966,-0.498707,1.864190,0.0000,0.0,-0.878509,0.000000,0.000000
1,tm4,1,4,210816_1,0.497233,-1.738318,-1.713994,1.108878,0.0000,0.0,-1.669368,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9,tm1,10,5508,210816_1,0.336650,1.353177,0.772721,0.007225,0.0000,,0.451827,-0.353452,0.084494
9,tm1,10,5509,210816_1,0.297701,1.411397,-0.710081,-1.866442,-3.0862,,1.437190,-0.353452,0.084494
9,tm1,10,5510,210816_1,0.438173,1.423041,-0.597663,-1.871432,0.0000,,1.767656,-0.353452,0.321161
9,tm1,10,5511,210816_1,0.337997,1.423041,0.772721,0.007225,0.0000,,1.726848,-0.353452,0.321161


In [110]:
raw_dfs.keys()

dict_keys(['210809_2_RAW_extracted_data.csv', '210728_0_RAW_extracted_data.csv', '210802_0_RAW_extracted_data.csv', '210808_0_RAW_extracted_data.csv', '210731_0_RAW_extracted_data.csv', '210815_1_RAW_extracted_data.csv', '210816_0_RAW_extracted_data.csv', '210815_0_RAW_extracted_data.csv', '210816_1_RAW_extracted_data.csv'])

## For each df, run through pipeline

In [123]:
stim = 'stim1'
response = 'resp'
time_window = 5
for p in raw_fnames:
  df = raw_dfs[p]
  df = directionalResponse(df, 'stim2', response)

  #one movie has no zero values in the direction, which means it's missing that column after breaking it up
  #add the column in if it doesn't exist and fill it with zeros
  if 'direction_no_motion' not in df.columns:
    df['direction_no_motion'] = 0
  df.insert(9, 'direction_no_motion', df.pop('direction_no_motion'))
  
  df = stimResponse(df, stim, response, zero_point = 0)
  df = get_future_stim_and_relationship(df, stim, time_window)
  df = df.dropna()
  df = current_future_relationship(df, stim, time_window)
  df = groupFrames(df, 10) # 10 splits per roi/stack
  # df = df.set_index(['movie_ID','stack','roi','label'])
  df = df.groupby(['movie_ID','stack', 'roi', 'label', 'frame_group']).agg({
       'resp': 'mean', 'stim1': 'mean', 'stim2': 'mean',
       'stim3': 'mean', 'stim4': 'mean', 'stim5': 'mean', 'stim6': 'mean', 'stim7': 'mean', 'direction_no_motion': 'max',
       'stim8': 'max', 'direction_up': 'max', 'relation_type_stim1': 'max', 'relation_stim1': 'max',
       'relation_stim1_+5': 'max', 'relation_type_stim1_+5': 'max',
       'relation_stim1_NRNS-NRNS': 'max', 'relation_stim1_NRNS-NRPS': 'max',
       'relation_stim1_NRNS-PRNS': 'max', 'relation_stim1_NRNS-PRPS': 'max',
       'relation_stim1_NRPS-NRNS': 'max', 'relation_stim1_NRPS-NRPS': 'max',
       'relation_stim1_NRPS-PRNS': 'max', 'relation_stim1_NRPS-PRPS': 'max',
       'relation_stim1_PRNS-NRNS': 'max', 'relation_stim1_PRNS-NRPS': 'max',
       'relation_stim1_PRNS-PRNS': 'max', 'relation_stim1_PRNS-PRPS': 'max',
       'relation_stim1_PRPS-NRNS': 'max', 'relation_stim1_PRPS-NRPS': 'max',
       'relation_stim1_PRPS-PRNS': 'max', 'relation_stim1_PRPS-PRPS': 'max',})

  raw_dfs[p] = df

In [None]:
'stack': 'max', 'label': 'max', 'roi': 'max', 'frame': 'max', 'movie_ID': 'max', 'resp': 'max', 'stim1': 'max', 'stim2': 'max',
       'stim3': 'max', 'stim4': 'max', 'stim5': 'max', 'stim6': 'max', 'stim7': 'max', 'direction_no_motion': 'max',
       'stim8': 'max', 'direction_up': 'max', 'relation_type_stim1': 'max', 'relation_stim1': 'max',
       'relation_stim1_+5': 'max', 'relation_type_stim1_+5': 'max',
       'relation_stim1_NRNS-NRNS': 'max', 'relation_stim1_NRNS-NRPS': 'max',
       'relation_stim1_NRNS-PRNS': 'max', 'relation_stim1_NRNS-PRPS': 'max',
       'relation_stim1_NRPS-NRNS': 'max', 'relation_stim1_NRPS-NRPS': 'max',
       'relation_stim1_NRPS-PRNS': 'max', 'relation_stim1_NRPS-PRPS': 'max',
       'relation_stim1_PRNS-NRNS': 'max', 'relation_stim1_PRNS-NRPS': 'max',
       'relation_stim1_PRNS-PRNS': 'max', 'relation_stim1_PRNS-PRPS': 'max',
       'relation_stim1_PRPS-NRNS': 'max', 'relation_stim1_PRPS-NRPS': 'max',
       'relation_stim1_PRPS-PRNS': 'max', 'relation_stim1_PRPS-PRPS': 'max', 

In [124]:
raw_dfs[raw_fnames[0]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,resp,stim1,stim2,stim3,stim4,stim5,stim6,stim7,direction_no_motion,stim8,...,relation_stim1_NRPS-PRNS,relation_stim1_NRPS-PRPS,relation_stim1_PRNS-NRNS,relation_stim1_PRNS-NRPS,relation_stim1_PRNS-PRNS,relation_stim1_PRNS-PRPS,relation_stim1_PRPS-NRNS,relation_stim1_PRPS-NRPS,relation_stim1_PRPS-PRNS,relation_stim1_PRPS-PRPS
movie_ID,stack,roi,label,frame_group,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
210809_2,1,6,tm1,1,0.796548,0.073295,-0.021459,-0.024604,-0.341384,0.0,0.004752,0.0,0,0.0,...,6.394488e-09,1.817008e-13,8.203747e-18,5.947579e-14,1.636119e-05,3.412350e-06,1.467567e-12,2.133861e-12,1.870456e-07,2.353902e-08
210809_2,1,6,tm1,2,0.753909,0.053414,0.014347,-0.002635,-0.346981,0.0,0.020120,0.0,0,0.0,...,1.591302e-09,2.132790e-13,2.146371e-14,5.492159e-14,6.094265e-07,5.371710e-06,2.054573e-14,8.801675e-13,1.452705e-08,4.574572e-09
210809_2,1,6,tm1,3,0.666328,0.010441,0.037009,-0.052629,-0.397349,0.0,-0.012918,0.0,0,0.0,...,1.246678e-08,4.965334e-13,4.693035e-13,1.450184e-14,5.557104e-06,5.561223e-05,9.447062e-14,6.283123e-11,7.883205e-08,1.419442e-08
210809_2,1,6,tm1,4,0.375412,-0.034179,0.186046,0.110923,-0.279823,0.0,-0.006564,0.0,0,0.0,...,9.956835e-11,3.014529e-13,4.872117e-12,5.663351e-13,2.067170e-07,2.065395e-07,7.985681e-15,6.763816e-13,1.014385e-07,1.506953e-11
210809_2,1,6,tm1,5,0.366044,0.026595,-0.018464,-0.082227,-0.296613,0.0,0.045676,0.0,0,0.0,...,2.926810e-09,2.415381e-15,1.652753e-12,3.096439e-12,3.168164e-08,3.413050e-09,2.064920e-15,2.780606e-11,1.012629e-09,1.283999e-11
210809_2,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
210809_2,10,7,tm1,6,0.244625,0.110964,-0.009384,0.055806,-0.319817,0.0,0.069987,0.0,0,0.0,...,2.850714e-12,6.581836e-14,3.571431e-13,1.359321e-14,1.159174e-11,7.446375e-10,4.652901e-11,4.473077e-11,5.135283e-11,1.062375e-10
210809_2,10,7,tm1,7,0.402618,0.065707,0.114733,0.028576,-0.353482,0.0,0.033067,0.0,0,0.0,...,3.798071e-10,4.975317e-10,6.157552e-09,2.835019e-10,2.076704e-11,7.214490e-07,9.678265e-14,8.870883e-11,1.178396e-10,2.174477e-09
210809_2,10,7,tm1,8,0.203319,-0.006067,0.088341,0.026044,-0.319817,0.0,-0.005687,0.0,0,0.0,...,3.035020e-12,2.792210e-12,1.732362e-10,1.983554e-09,5.143451e-11,5.627874e-08,3.287177e-12,6.679080e-13,4.242542e-12,2.766867e-13
210809_2,10,7,tm1,9,0.275736,0.101272,-0.111413,-0.045991,-0.359093,0.0,0.021802,0.0,0,0.0,...,1.420929e-11,2.312305e-12,2.111479e-12,8.796103e-12,3.107928e-10,2.559207e-09,2.122014e-13,1.443192e-11,8.196087e-11,7.272978e-11


In [None]:
# train test split on movie (when saving out csv file)

## Remove outlier from 210815_0__20220213T070259RAW_stimulus_data.csv

What movie is this in now??

In [126]:
#access the 11th roi in the first stack and drop all frames for that roi 
# raw_dfs['210815_0_20220308T134319RAW_stimulus_data.csv'] = raw_dfs['210815_0_20220308T134319RAW_stimulus_data.csv'].drop(raw_dfs['210815_0_20220308T134319RAW_stimulus_data.csv'].loc[1,:,11].index)
# raw_dfs['210815_0_20220308T134319RAW_stimulus_data.csv']

## Save out DF's with train/test labels

In [None]:
# # SET THESE VALUES EACH TIME

# folder_name = '5_frames_stim1' # create a new folder name for saving out these files (make sure this folder already exists)
# num_train = 3
# num_test = 2
# num_val = 0

In [None]:
# arr = np.array(['train'] * num_train + ['test'] * num_test + ['val'] * num_val)
# np.random.shuffle(arr)

# # save out each DF with a label indicating train, test or validation
# for i, p in enumerate(raw_fnames):
#   p_split = p.split('.')[-2] # chop off csv
  
#   raw_dfs[p].to_csv(root + folder_name + '/' + p_split + '_' + arr[i] + '.csv') #saves to new folder

In [127]:
#skipped train/test because it was giving me trouble

folder_name = '5_frames_stim1'

for i, p in enumerate(raw_fnames):
  p_split = p.split('.')[-2] # chop off csv
  
  raw_dfs[p].to_csv(root + folder_name + '/' + p_split + '_features.csv') #saves to new folder

In [128]:
listdir(root + folder_name)

['210809_2_RAW_extracted_data_features.csv',
 '210728_0_RAW_extracted_data_features.csv',
 '210802_0_RAW_extracted_data_features.csv',
 '210808_0_RAW_extracted_data_features.csv',
 '210731_0_RAW_extracted_data_features.csv',
 '210815_1_RAW_extracted_data_features.csv',
 '210816_0_RAW_extracted_data_features.csv',
 '210815_0_RAW_extracted_data_features.csv',
 '210816_1_RAW_extracted_data_features.csv']