# ACQ to CSV 

Before running this file make sure all the acq files are organized and named properly and are on the NasDrive (or somewhere). To run from the NasDrive make sure to be connected to Psychsecure or campus VPN. 

Thie script assumes that subject and run are in the file name like this: 
fmcc_sub47_task_0001.acq 

This is Subject 47, Run 1. The runs go from 0 to 8, later in the pipeline this will be converted to Run 1 to 9.  

The full ReadMe and How-To Guide is located here: 
https://docs.google.com/document/d/1hfkKuHdc5htsAZjkRZWYzMZev2CSRcb4WBH9agrphzE/edit?usp=sharing 

In [1]:
# Import these libraries (make sure they are installed before)
import bioread
import pandas as pd
from scipy.io import savemat
import numpy as np
import os


In [2]:
# Optional first step - scan the data structure
# By seeing the data structure we can modify the script accordingly. 
# Each ACQ template may have a slightly different structure - different channels used in physio collection means different columns in the DF 

# use the bioread function on one acq file- this gives us an idea of what it looks like  
acq_dataset = bioread.read_file("/Volumes/labshare/Nadu/fmcc_winter_spring_quarter/sub49/fmcc_sub49_task_0000.acq")


In [4]:
# check how many columns in this dataset
# important for later in the code when creating a dataframe
num_cols = len(acq_dataset.channels)
print(num_cols)
 
#12 

12


In [11]:
# explore the data and scan the structure. 
# optional step: export a csv of this structure 

data = {}
for channel in acq_dataset.channels:
    data[channel.name] = channel.data  # Use channel names as column headers

# Convert to DataFrame
df = pd.DataFrame(data)

# Display basic statistics
print(df.describe())

# Export the DataFrame to CSV
#output_path = "/Users/nadezhdabarbashova/Library/CloudStorage/Dropbox/LEAP_Neuro_Lab/researchProjects/nadu/fmcc/data/fmcc_w25/fmcc_csv"
#df.to_csv(output_path, index=False, header=True)
#print(f"DataFrame successfully exported to {output_path}") 

       GSR - EDA100C  ECG - ECG100C  Feedback Cable - CBLCFMA - Current Feed  \
count   1.226013e+06   1.226013e+06                             1.226013e+06   
mean    1.009395e+01  -4.319720e-02                             3.139719e-01   
std     9.381568e+00   8.430103e-01                             7.183593e-02   
min    -1.556398e+00  -1.000000e+01                            -1.231689e+01   
25%     1.524809e-03  -1.492310e-01                             3.021240e-01   
50%     1.784058e+01  -2.410889e-02                             3.143311e-01   
75%     1.848144e+01  -1.098633e-02                             3.265381e-01   
max     2.221069e+01   9.999695e+00                             1.189880e+01   

       Stim - Custom, AMI / HLT - A16  Digital (STP Input 0)  \
count                    1.226013e+06           1.226013e+06   
mean                    -2.007456e-01           5.632077e-03   
std                      1.175066e-01           1.677161e-01   
min                    

In [12]:
# check the first 10 rows 
df.head(10)

Unnamed: 0,GSR - EDA100C,ECG - ECG100C,Feedback Cable - CBLCFMA - Current Feed,"Stim - Custom, AMI / HLT - A16",Digital (STP Input 0),Digital (STP Input 1),Digital (STP Input 2),Digital (STP Input 3),Digital (STP Input 4),Digital (STP Input 5),Digital (STP Input 6),Digital (STP Input 7)
0,22.062682,-0.148926,0.299072,-0.202938,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,22.062682,-0.149841,0.305176,-0.210872,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,22.064208,-0.150452,0.311279,-0.202327,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,22.064208,-0.152283,0.268555,-0.201412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,22.065734,-0.153198,0.314331,-0.202633,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,22.064208,-0.155029,0.289917,-0.203548,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,22.062682,-0.15625,0.302124,-0.206905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,22.064208,-0.158386,0.332642,-0.204464,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,22.062682,-0.161133,0.305176,-0.2066,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,22.065734,-0.16449,0.341797,-0.204159,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [18]:
# Specify the paths to the folder you want to list

# Input path should be folder where all the acq files are located 
# This is the path to Nasdrive folder 
biopacpath = "/Volumes/labshare/Nadu/fmcc_winter_spring_quarter"


# path to output the csv files - make sure there is no final slash. 
# os.path.join is used later - it expects no final slash 
# outputdir = '/Users/nadezhdabarbashova/Library/CloudStorage/Dropbox/LEAP_Neuro_Lab/researchProjects/nadu/fmcc/data/fmcc_w25/fmcc_csv'
outputdir = "/Users/nadezhdabarbashova/Documents/fmcc_heart_rate/raw_csv"

# list of subjects. Have them as character formats 
# make sure to check conventions like adding a 0 before 

subjects = ["49", "50", "51", "52", "54", "55", "56", "57", "58", "61", "62", "63", "65", "67",
            "68", "69", "70", "71", "72", "73", "74", "76", "78", "81", "82", "84", "85", "87",
           "88", "89", "91", "93", "98", "99", "100", "104", 
             "107", "109", "110"]

subjects = ["91", "93", "98", "99", "100", "104", 
             "107", "109", "110"]

# remove this if it exists: 
# subjects.remove(".DS_Store")


In [19]:

for sub in subjects:
#Below convert the acq files to matlab files
    inputdir = biopacpath + "/sub" + sub
    allfiles = os.listdir(inputdir)
    #print(f"Files in directory {inputdir}: {allfiles}")  # Debugging: list files

    #Get the acq file names that only task related
    # Check if the directory exists
    if not os.path.exists(inputdir):
        print(f"Directory does not exist: {inputdir}")
        continue  # Skip this subject if directory doesn't exist

    print(f"Processing subject: {sub}")  # Debugging: confirm subject being processed
    #print(f"directory: {inputdir}")

    # Find prefix for fmcc 
    # Then get task-related .acq files - must end with acq, must have task (no physio)
     
    taskfnli = [item for item in allfiles if item.endswith('.acq') and 'task' in item]
    
    #print(f"Task-related files: {taskfnli}")  # Debugging: check filtered files
    
    for inputfn in taskfnli:
        print(f"Processing file: {inputfn}")
        
        # filename = main directory + filename of this acq file 
        fn = os.path.join(inputdir, inputfn)
    
        # there are only 12 channels. use bioread to extract info from each channel 
        # rename them to go from 1 to 12 instead of 0 to 11 
        acq_dataset = bioread.read_file(fn)
        chan1 = acq_dataset.channels[0].data 
        chan2 = acq_dataset.channels[1].data
        chan3 = acq_dataset.channels[2].data
        chan4 = acq_dataset.channels[3].data
        chan5 = acq_dataset.channels[4].data
        chan6 = acq_dataset.channels[5].data
        chan7 = acq_dataset.channels[6].data
        chan8 = acq_dataset.channels[7].data
        chan9 = acq_dataset.channels[8].data
        chan10 = acq_dataset.channels[9].data
        chan11 = acq_dataset.channels[10].data
        chan12 = acq_dataset.channels[11].data

        # make a dataframe by combining these channels 
        df = pd.DataFrame({"1": chan1, "2": chan2, "3": chan3, "4": chan4, "5": chan5, 
                   "6": chan6, "7": chan7, "8": chan8, "9": chan9, "10": chan10, 
                   "11": chan11, "12": chan12})

        # if you want to see the structure at this stage - print first 10 rows: 
        # df.head(10)
        
        #create output sub-directory for this subject - main directory + sub## 
        outputsubdir = outputdir + "/sub" + sub
        
        #if the folder is not found - create a folder 
        if not os.path.exists(outputsubdir):
            
            # If it doesn't exist, create the folder
            print("folder not found - will be created")
            os.makedirs(outputsubdir)   
            #mat_fn = inputfn.replace(".acq", ".mat")
        
        # copy the acq filename but now end it with .csv 
        csv_fn = inputfn.replace(".acq", ".csv")
              
        # construct and output file path from filename and output directory    
        csv_file = os.path.join(outputsubdir, csv_fn)
 
        # Save the NumPy array to a MATLAB file
        #savemat(mat_file, {'data': df.to_numpy()})
            
        # turn the df into a csv file and save it to the csv file directory 
        df.to_csv(csv_file, index=False)
        print(f"done creating csv for {inputfn}")
        
        
print("done processing all subjects")


Processing subject: 91
Processing file: fmcc_sub91_task_0000.acq
done creating csv for fmcc_sub91_task_0000.acq
Processing file: fmcc_sub91_task_0001.acq
done creating csv for fmcc_sub91_task_0001.acq
Processing file: fmcc_sub91_task_0002.acq
done creating csv for fmcc_sub91_task_0002.acq
Processing file: fmcc_sub91_task_0003.acq
done creating csv for fmcc_sub91_task_0003.acq
Processing file: fmcc_sub91_task_0004.acq
done creating csv for fmcc_sub91_task_0004.acq
Processing file: fmcc_sub91_task_0005.acq
done creating csv for fmcc_sub91_task_0005.acq
Processing file: fmcc_sub91_task_0006.acq
done creating csv for fmcc_sub91_task_0006.acq
Processing file: fmcc_sub91_task_0007.acq
done creating csv for fmcc_sub91_task_0007.acq
Processing file: fmcc_sub91_task_0008.acq
done creating csv for fmcc_sub91_task_0008.acq
Processing subject: 93
Processing file: fmcc_sub93_task_0000.acq
folder not found - will be created
done creating csv for fmcc_sub93_task_0000.acq
Processing file: fmcc_sub93_t

In [16]:
# Below just to get the sample rate  
# EDASampleRate.append(acq_dataset.channels[0].samples_per_second)
# TrigerSampleRate.append(acq_dataset.channels[2].samples_per_second)
# make dataframe and save
# sampleRdf = pd.DataFrame({"participant": subli, "run": runli, "EDASampleRate": EDASampleRate, "TrigerSampleRate":TrigerSampleRate})
# sampleRdf.to_csv("/Users/jingyiwang/Dropbox/LEAP_Neuro_Lab/researchProjects/jingyi_documents/emotion_motor_grant/AA_fMRIAnalyses/EDAAnalysis/SampleRate.csv", index=False)

/Users/nadezhdabarbashova/Library/CloudStorage/Dropbox/LEAP_Neuro_Lab/researchProjects/nadu/fmcc/data/fmcc_w25/acq_data
