<a href="https://colab.research.google.com/github/laurenneal/capstone-visual-neuroscience/blob/main/Feature_Engineering_from_Raw_Data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [21]:
import pandas as pd
import numpy as np
import sys
from numpy.ma.core import ceil, floor
from more_itertools import sliced

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# FUNCTIONS (Just for setting up functions, not running code)
This section contains functions that can be used on datasets for feature engineering

## Train Test Split on Stacks

In [22]:
def trainTestSplit(df, splt = [.7,.2,.1]):
  # splits on roi, rois accross stacks will have the same group (important if stacks are from the same video)
  # split is the train test valuidation split (proportions given in that order)
  if sum(splt) < 0.9999 or sum(splt) > 1.0001:
    sys.exit('Splt must add to 1')
  
  num_rois = len(df.index.unique(level='roi'))

  tr_ind = int(ceil(num_rois*splt[0]))
  ts_ind = int(ceil(num_rois*splt[1])) + tr_ind

  ar = np.arange(num_rois)
  np.random.shuffle(ar)
  ar = ar+1 # roi id's start from 1

  train = ar[:tr_ind]
  test = ar[tr_ind:ts_ind]
  val = ar[ts_ind:]

  for i in train:
    df.loc[pd.IndexSlice[:, :, i],'training'] = 'train'

  for i in test:
    df.loc[pd.IndexSlice[:, :, i],'training'] = 'test'

  for i in val:
    df.loc[pd.IndexSlice[:, :, i],'training'] = 'validate'

  return(df)

In [23]:
# # df_temp = trainTestSplit(raw_df,splt=[.5,.3,.3]) # should give error
# df_temp = trainTestSplit(raw_df,splt=[.5,.3,.2]) # need to pull in sample data
# df_temp.sample(15)

## Combine Frames into Temporal Chunks - NOT Working

In [24]:
# select frame size
temporal_period_length = 200

In [25]:

def groupFrames(df, period):
  # inputs are data frame and temporal period length
  # df needs to be sorted correctly, the indexing should take care of that

  # check that all rois are the same size
  # if this is a problem we can write a more computationally heavy workthrough
  if (raw_df.groupby(['stack','roi']).count()['filename'].max() != raw_df.groupby(['stack','roi']).count()['filename'].min()):
    sys.exit('ROIs need to be the same length')

  num_stacks = len(df.index.unique(level='stack'))
  num_roi = len(df.index.unique(level='roi'))

  # get first label
  lbl_1 = raw_df.head(1).index.values[0][1]
  # use it to find the number of frames in a roi
  num_frames = raw_df.loc[(1,lbl_1,1),'filename'].count()
  # how many groups will ther be in each roi?
  num_pds = int(floor(num_frames/period))
  # how many left over
  remainder_pds = num_frames%period

  # iterate a column for one roi
  lst = [[li + 1] * period for li in range(0,num_pds)]
  lst = [li for sublist in lst for li in sublist] # https://stackoverflow.com/questions/952914/how-to-make-a-flat-list-out-of-a-list-of-lists
  lst = lst + [lst[-1] + 1] * remainder_pds
  lst

  new_col = np.array(lst * num_stacks * num_roi)
  
  df['frame_group'] = new_col

  return(df)

In [26]:
# df_temp2 = groupFrames(raw_df,temporal_period_length)
# df_temp2

## Stim 1 feature engineering

In [27]:
from operator import setitem
def stimResponse(df, stim, response, zero_point = 0):
  """
  Takes a df, the name of the col holding the stimulus, the name of the col holding the response, and the value to use as the zero point

  Returns the df with the relationship type and the relation value added

  A different function will calculate the relationship between time periods
  """


  # takes a stim column and response column and returns two new feature column
  # PR is Positive Response
  # NR is Negative Response
  # PS is Positive Stimulus
  # NS is Negative Stimulus
  df_in = df.copy()
  #
  PR = df_in[response] > zero_point
  NR = df_in[response] <= zero_point
  PS = df_in[stim] > zero_point
  NS = df_in[stim] <= zero_point

  PR = PR.rename('PR')
  NR = NR.rename('NR')
  PS = PS.rename('PS')
  NS = NS.rename('NS')

  # combine into df
  df = pd.concat([df_in[stim], df_in[response], PR,NR,PS,NS], axis=1)
  #set the data types back to boolean
  df = df.astype({'PR': bool,
                'PS': bool,
                'NR': bool,
                'PR': bool})

  # get min and max values for normalizing
  S_min = df[stim].min()
  S_max = df[stim].max()
  R_min = df[response].min()
  R_max = df[response].max()

  #add a column normalizing the stim and response
  df['stim_norm'] = df[stim].apply(lambda x: x/S_max if x>0 else x/S_min)
  df['resp_norm'] = df[response].apply(lambda x: x/R_max if x>0 else x/R_min)


  # multiply stim and resp
  df['relation'] = df['stim_norm'] * df['resp_norm']

  #Set a new column with the category each row's relationship falls under
  df.loc[(df['PR'] & df['PS']),'relation_type'] = 'PRPS'
  df.loc[(df['NR'] & df['NS']),'relation_type'] = 'NRNS'
  df.loc[(df['PR'] & df['NS']),'relation_type'] = 'PRNS'
  df.loc[(df['NR'] & df['PS']),'relation_type'] = 'NRPS'

  #UNCOMMENT this to create the len-4 matrix for the current time period directly

  # #one-hot encode the four categories
  # df = pd.get_dummies(df, columns=['relation_type'], prefix = stim)

  # #get a list of the columns created
  # dummy_cols = [col for col in df.columns if 'relation_type_' in col]

  # #multiply the dummy columns by the relation value to distribute the value to the appropriate column
  # for col in dummy_cols:
  #   df[col] = df[col] * df['relation']
  
  df_in['relation_type_' + stim] = df['relation_type']
  df_in['relation_' + stim] = df['relation']
  return(df_in)

In [28]:
# #pass in the raw df, the name of the stimulus we want to use, the response column, and the zero-value (in this case zero)
# df_temp_3 = stimResponse(raw_df, 'stim1', 'resp', 0)

In [29]:
# df_temp_3.head()

In [30]:
def get_future_stim_and_relationship(df_in, stim, time_window): 
  """
  takes a df, the stimulus name we're comparing, and the time window we're looking to in the future (in frames)

  returns a new df with two extra columns containing the relationship type and the value at the future time period
  """

  #make copy of the df to avoid editing the one in memory
  df = df_in.copy()

  #procedurally generate the column names we need to reference
  current_relation_colname = 'relation_'+stim
  current_relation_type_colname = 'relation_type_'+stim

  #and the column names we're creating
  future_relation_colname = 'relation_'+stim+'_+'+str(time_window)
  future_relation_type_colname = 'relation_type_'+stim+'_+'+str(time_window)

  #reset the index so that we can join the new columns later
  df = df.reset_index()


  # for each of the new columns, do four steps

  #get the existing array of relation values or relation types

  #cut off the first x values, where x is the time window we want to look in the future (done by indexing [time_window:])
        #this will drop the time x values in the future to line up with the current time

  #pad the end of the array with zeros by the same number of values so that the lengths still match (done by concatenating with np.zeros(time_window))

  #set the new array as the new 'future' column
  df[future_relation_colname] = pd.concat([df[current_relation_colname] \
                                           .reset_index(drop=True)[time_window:], \
                                           pd.Series((np.zeros(time_window)))], ignore_index = True)
  
  #repeat for the relation type
  df[future_relation_type_colname] = pd.concat([df[current_relation_type_colname] \
                                           .reset_index(drop=True)[time_window:], \
                                           pd.Series((np.zeros(time_window)))], ignore_index = True)

  #need to stop the end of one stack from referencing the beginning of another

  #get the index of any row where the frame # is high enough that x-frames in the future would run into another stack
  #by checking that the frame number is greater than the max frame number - (time window-1) (corrects for index number)
  end_of_stack_index = df.loc[df['frame'] >= max(df['frame'])-(time_window-1)].index

  #use .loc to set the two new columns to None for those rows
  df.loc[end_of_stack_index, future_relation_colname] = None
  df.loc[end_of_stack_index, future_relation_type_colname] = None

  #set the index back to the way it was
  df = df.set_index(['stack','label','roi','frame'])

  return df

In [31]:
# #check that the values match the records 10 in the future and that the last 10 rows in each stack are null for future values
# df_temp_4 = get_future_stim_and_relationship(df_temp_3, 'stim1', 10)
# df_temp_4.tail(30)

In [32]:
def current_future_relationship(df, stim, time_window):
  """
  Takes df with a column for stim and a column for stim + a given time window. Returns the relationship.
  """
  # column names for current row
  current_relation_colname = 'relation_'+stim
  current_relation_type_colname = 'relation_type_'+stim

  # column names for rows in the future
  future_relation_colname = 'relation_'+stim+'_+'+str(time_window)
  future_relation_type_colname = 'relation_type_'+stim+'_+'+str(time_window)

  #with this approach, we need to drop the last x frames from each stack, so dropna
  df = df.dropna()

  #make a new column with the combo of current relation type and future relation type
  df['full_relation_type'] = df[current_relation_type_colname] + '-' + df[future_relation_type_colname]

  #one-hot encode the overall relationship, then multiply the new columns by the product of the relation values
  df = pd.get_dummies(df, columns=['full_relation_type'], prefix = current_relation_colname)

  #get a list of the columns created
  dummy_cols = [col for col in df.columns if current_relation_colname in col]

  #multiply the dummy columns by the relation value to distribute the value to the appropriate column
  for col in dummy_cols:
    df[col] = df[col] * (df[current_relation_colname]*df[future_relation_colname])

  return df

In [50]:
def directionalResponse(df, stim, response):
  """
  Takes a df, the name of the col holding the direction of motion stimulus, the name of the col holding the response
  Returns the df with the stim broken out into four cols based on direction of motion

  A different function will calculate the relationship between response and direction
  """


  # add a column that categorizes the direction into four quadrants (segments can be changed)
  df_in = df.copy()

  #set conditions checking the direction of the stimulus - there has to be a better way but this is it for now
  conditions = [
    ((-45 <= df_in[stim]) & (df_in[stim] < 0)), #northwest
    (df_in[stim] == 0), #no movement
    ((0 < df_in[stim]) & (df_in[stim] < 45)), #northeast
    ((45 <= df_in[stim]) & (df_in[stim] < 135)), #east
    ((135 <= df_in[stim]) & (df_in[stim] <= 180)),#southeast
    ((-180 <= df_in[stim]) & (df_in[stim] < -135)), #southwest
    ((-135 <= df_in[stim]) & (df_in[stim] < -45)) #west
    ]

  #set the names we assign to those conditions
  values = ['up', 'no_motion', 'up', 'right', 'down', 'down', 'left']

  #add a column to the df holding the direction category
  df['direction'] = np.select(conditions, values)

  #one-hot encode the quadrants
  df = pd.get_dummies(df, columns=['direction'], prefix = 'direction')

  #get the names of the 4 direction columns we just added
  direction_cols = [col for col in df.columns if 'direction' in col]

  #TODO - TEST SOME OPTIONS AND SEE HOW WE WANT TO REPRESENT THE RESPONSE TO THESE
  #STOPPING HERE FOR NOW WITH JUST THE DIRECTION ENCODED

  return(df)

In [34]:
# df = df_temp_4

In [None]:
# # #make a new column with the combo of current relation type and future relation type
# df['full_relation_type'] = df['relation_type_stim1'] + '-' + df['relation_type_stim1_+10']
# df.head()

In [None]:
# #one-hot encode the overall relationship, then multiply the new columns by the product of the relation values

# df = pd.get_dummies(df, columns=['full_relation_type'], prefix = 'relation_stim1')

# #get a list of the columns created
# dummy_cols = [col for col in df.columns if 'relation_stim1' in col]

# #multiply the dummy columns by the relation value to distribute the value to the appropriate column
# for col in dummy_cols:
#   df[col] = df[col] * (df['relation_stim1']*df['relation_stim1_+10'])

# df.head(10) 

In [None]:
# df.to_csv('drive/MyDrive/DS6011_Capstone_VisualNeuroscience/Seeded_CNMF/Extracted_Features/210815_0_20220213T070259_sparse_stim1_draft.csv')

In [None]:
# df_temp_5 = current_future_relationship(df_temp_4, 'stim1', 10)
# df_temp_5.head()

In [35]:
! ls drive/MyDrive/DS6011_Capstone_VisualNeuroscience/Seeded_CNMF/Extracted_Features/

10_frames_stim1
210728_0_20220309T002951RAW_stimulus_data.csv
210728_0_allStacks_20220309T002951_rawExtracts
210731_0_20220308T120131RAW_stimulus_data.csv
210731_0_allStacks_20220308T120131_rawExtracts
210802_0_20220308T122044RAW_stimulus_data.csv
210802_0_allStacks_20220308T122044_rawExtracts
210808_0_20220309T012702RAW_stimulus_data.csv
210808_0_allStacks_20220309T012702_rawExtracts
210809_2_20220308T131617RAW_stimulus_data.csv
210809_2_allStacks_20220308T131617_rawExtracts
210815_0_20220308T134319RAW_stimulus_data.csv
210815_0_allStacks_20220308T134319_rawExtracts
210815_0_allStacks_mc_mix1_syt_rawExtracts
210815_1_20220308T140826RAW_stimulus_data.csv
210815_1_allStacks_20220308T140826_rawExtracts
210816_0_20220308T143121RAW_stimulus_data.csv
210816_0_allStacks_20220308T14312120_rawExtracts
210816_1_20220308T150735RAW_stimulus_data.csv
210816_1_allStacks_20220308T150735_rawExtracts


# RUN FUNCTIONS ON DATA

In [36]:
# raw_df = pd.read_csv('drive/MyDrive/DS6011_Capstone_VisualNeuroscience/Seeded_CNMF/Extracted_Features/210815_0__20220213T070259RAW_stimulus_data.csv', index_col=['stack','label','roi','frame']).drop(columns='Unnamed: 0')
# raw_df

In [37]:
# get all raw DF's
from os import listdir
root = 'drive/MyDrive/DS6011_Capstone_VisualNeuroscience/Seeded_CNMF/Extracted_Features/'
raw_fnames = listdir(root)
raw_fnames = [x for x in raw_fnames if 'RAW_stimulus' in x]
raw_fnames

['210815_1_20220308T140826RAW_stimulus_data.csv',
 '210816_1_20220308T150735RAW_stimulus_data.csv',
 '210816_0_20220308T143121RAW_stimulus_data.csv',
 '210809_2_20220308T131617RAW_stimulus_data.csv',
 '210815_0_20220308T134319RAW_stimulus_data.csv',
 '210808_0_20220309T012702RAW_stimulus_data.csv',
 '210731_0_20220308T120131RAW_stimulus_data.csv',
 '210728_0_20220309T002951RAW_stimulus_data.csv',
 '210802_0_20220308T122044RAW_stimulus_data.csv']

In [38]:
# Dictionary of df's. One for each video. You can call each df my name
raw_dfs = {}
for p in raw_fnames:
  df = pd.read_csv(root + p, index_col=['stack','label','roi','frame']).drop(columns='Unnamed: 0')
  raw_dfs[p] = df
raw_dfs[raw_fnames[-1]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,filename,resp,stim1,stim2,stim3
stack,label,roi,frame,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,tm9,1,0,210802_0_1_stackRaw_mc_result_20220308T122044.h5,0.296236,0.203364,-103.861223,0.097277
1,tm9,1,1,210802_0_1_stackRaw_mc_result_20220308T122044.h5,0.265434,0.203364,0.000000,0.000000
1,tm9,1,2,210802_0_1_stackRaw_mc_result_20220308T122044.h5,0.229053,0.203364,0.000000,0.000000
1,tm9,1,3,210802_0_1_stackRaw_mc_result_20220308T122044.h5,0.184726,-0.248205,-103.453969,0.044159
1,tm9,1,4,210802_0_1_stackRaw_mc_result_20220308T122044.h5,0.259741,-0.473989,-104.693881,0.040879
...,...,...,...,...,...,...,...,...
11,t5,14,5508,210802_0_11_stackRaw_mc_result_20220308T122044.h5,0.019915,0.186905,68.869163,0.011823
11,t5,14,5509,210802_0_11_stackRaw_mc_result_20220308T122044.h5,0.043048,0.045419,110.558529,0.021267
11,t5,14,5510,210802_0_11_stackRaw_mc_result_20220308T122044.h5,0.019737,-0.041530,124.799748,0.014075
11,t5,14,5511,210802_0_11_stackRaw_mc_result_20220308T122044.h5,-0.038230,-0.041530,45.087250,0.002800


In [39]:
raw_dfs.keys()

dict_keys(['210815_1_20220308T140826RAW_stimulus_data.csv', '210816_1_20220308T150735RAW_stimulus_data.csv', '210816_0_20220308T143121RAW_stimulus_data.csv', '210809_2_20220308T131617RAW_stimulus_data.csv', '210815_0_20220308T134319RAW_stimulus_data.csv', '210808_0_20220309T012702RAW_stimulus_data.csv', '210731_0_20220308T120131RAW_stimulus_data.csv', '210728_0_20220309T002951RAW_stimulus_data.csv', '210802_0_20220308T122044RAW_stimulus_data.csv'])

## For each df, run through pipeline

In [51]:
stim = 'stim1'
response = 'resp'
time_window = 5
for p in raw_fnames:
  df = raw_dfs[p]
  df = directionalResponse(df, 'stim2', response)
  df = stimResponse(df, stim, response, zero_point = 0)
  df = get_future_stim_and_relationship(df, stim, time_window)
  df = current_future_relationship(df, stim, time_window)
  raw_dfs[p] = df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [52]:
raw_dfs[raw_fnames[1]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,filename,resp,stim1,stim2,stim3,direction_down,direction_left,direction_no_motion,direction_right,direction_up,...,relation_stim1_NRPS-PRNS,relation_stim1_NRPS-PRPS,relation_stim1_PRNS-NRNS,relation_stim1_PRNS-NRPS,relation_stim1_PRNS-PRNS,relation_stim1_PRNS-PRPS,relation_stim1_PRPS-NRNS,relation_stim1_PRPS-NRPS,relation_stim1_PRPS-PRNS,relation_stim1_PRPS-PRPS
stack,label,roi,frame,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
1,tm4,1,0,210816_1_1_stackRaw_mc_result_20220308T150735.h5,0.153797,-0.426309,-3.620520,0.123511,0,0,0,0,1,...,0.0,0.000000e+00,0.0,0.0,1.227192e-10,0.000000e+00,0.0,0.0,0.0,0.000000e+00
1,tm4,1,1,210816_1_1_stackRaw_mc_result_20220308T150735.h5,0.280574,-0.426309,0.000000,0.000000,0,0,1,0,0,...,0.0,0.000000e+00,0.0,0.0,3.289960e-10,0.000000e+00,0.0,0.0,0.0,0.000000e+00
1,tm4,1,2,210816_1_1_stackRaw_mc_result_20220308T150735.h5,0.493612,-0.426309,0.000000,0.000000,0,0,1,0,0,...,0.0,0.000000e+00,0.0,0.0,2.345242e-10,0.000000e+00,0.0,0.0,0.0,0.000000e+00
1,tm4,1,3,210816_1_1_stackRaw_mc_result_20220308T150735.h5,0.667897,-0.520911,-176.994026,0.013512,1,0,0,0,0,...,0.0,0.000000e+00,0.0,0.0,1.537879e-08,0.000000e+00,0.0,0.0,0.0,0.000000e+00
1,tm4,1,4,210816_1_1_stackRaw_mc_result_20220308T150735.h5,0.497233,-0.615513,-145.677535,0.009002,1,0,0,0,0,...,0.0,0.000000e+00,0.0,0.0,2.869498e-08,0.000000e+00,0.0,0.0,0.0,0.000000e+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10,tm1,10,5503,210816_1_10_stackRaw_mc_result_20220308T150735.h5,0.077264,-0.325382,0.000000,0.000000,0,0,1,0,0,...,0.0,0.000000e+00,0.0,0.0,0.000000e+00,1.723740e-13,0.0,0.0,0.0,0.000000e+00
10,tm1,10,5504,210816_1_10_stackRaw_mc_result_20220308T150735.h5,0.235186,-0.325382,0.000000,0.000000,0,0,1,0,0,...,0.0,0.000000e+00,0.0,0.0,0.000000e+00,1.122748e-11,0.0,0.0,0.0,0.000000e+00
10,tm1,10,5505,210816_1_10_stackRaw_mc_result_20220308T150735.h5,-0.201205,0.210543,-137.807470,0.027125,1,0,0,0,0,...,0.0,1.718149e-10,0.0,0.0,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.000000e+00
10,tm1,10,5506,210816_1_10_stackRaw_mc_result_20220308T150735.h5,0.062201,0.478506,-101.804298,0.044868,0,1,0,0,0,...,0.0,0.000000e+00,0.0,0.0,0.000000e+00,0.000000e+00,0.0,0.0,0.0,4.350690e-13


In [None]:
raw_dfs[raw_fnames[2]]

In [None]:
raw_dfs[raw_fnames[3]]

In [None]:
raw_dfs[raw_fnames[4]]

In [None]:
# train test split on movie (when saving out csv file)

## Remove outlier from 210815_0__20220213T070259RAW_stimulus_data.csv

In [53]:
#access the 11th roi in the first stack and drop all frames for that roi
raw_dfs['210815_0__20220213T070259RAW_stimulus_data.csv'] = raw_dfs['210815_0__20220213T070259RAW_stimulus_data.csv'].drop(raw_dfs['210815_0__20220213T070259RAW_stimulus_data.csv'].loc[1,:,11].index)
raw_dfs['210815_0__20220213T070259RAW_stimulus_data.csv']

KeyError: ignored

## Save out DF's with train/test labels

In [54]:
# # SET THESE VALUES EACH TIME

# folder_name = '5_frames_stim1' # create a new folder name for saving out these files (make sure this folder already exists)
# num_train = 3
# num_test = 2
# num_val = 0

In [None]:
# arr = np.array(['train'] * num_train + ['test'] * num_test + ['val'] * num_val)
# np.random.shuffle(arr)

# # save out each DF with a label indicating train, test or validation
# for i, p in enumerate(raw_fnames):
#   p_split = p.split('.')[-2] # chop off csv
  
#   raw_dfs[p].to_csv(root + folder_name + '/' + p_split + '_' + arr[i] + '.csv') #saves to new folder

In [58]:
raw_fnames

['210815_1_20220308T140826RAW_stimulus_data.csv',
 '210816_1_20220308T150735RAW_stimulus_data.csv',
 '210816_0_20220308T143121RAW_stimulus_data.csv',
 '210809_2_20220308T131617RAW_stimulus_data.csv',
 '210815_0_20220308T134319RAW_stimulus_data.csv',
 '210808_0_20220309T012702RAW_stimulus_data.csv',
 '210731_0_20220308T120131RAW_stimulus_data.csv',
 '210728_0_20220309T002951RAW_stimulus_data.csv',
 '210802_0_20220308T122044RAW_stimulus_data.csv']

In [59]:
#skipped train/test because it was giving me trouble

folder_name = '5_frames_stim1'

for i, p in enumerate(raw_fnames):
  p_split = p.split('.')[-2] # chop off csv
  
  raw_dfs[p].to_csv(root + folder_name + '/' + p_split + '_features.csv') #saves to new folder

In [60]:
listdir(root + folder_name)

['210815_1_20220308T140826RAW_stimulus_data_features.csv',
 '210816_1_20220308T150735RAW_stimulus_data_features.csv',
 '210816_0_20220308T143121RAW_stimulus_data_features.csv',
 '210809_2_20220308T131617RAW_stimulus_data_features.csv',
 '210815_0_20220308T134319RAW_stimulus_data_features.csv',
 '210808_0_20220309T012702RAW_stimulus_data_features.csv',
 '210731_0_20220308T120131RAW_stimulus_data_features.csv',
 '210728_0_20220309T002951RAW_stimulus_data_features.csv',
 '210802_0_20220308T122044RAW_stimulus_data_features.csv']