In [201]:
import os
import sys
import numpy as np
import pandas as pd


In [214]:
def load_data(folder_dir):
    all_files = os.listdir(os.path.normpath(sys.path[3] + "/" + folder_dir)) #check that the system path is correct
    
    #check the number of files with the tags 
    note_files = list(filter(lambda f: f.endswith('_notes.txt'), all_files))
    qcm_txt_files = list(filter(lambda f: f.endswith('_slb.txt'), all_files))
    num_note_files = len(note_files)
    num_qcm_files = len(qcm_txt_files)
    print("==== Found {} QCM-D and {} note file(s) in {} folder! ====".format(num_qcm_files, num_note_files, folder_dir))

    qcm_dir = {} #tag slb is for the support lipid bilayer 
    note_dir = {} #tag note is for the notes taken during the experiments

    #goes through the files in the data folder to add them to the empty directories above
    for file_name in all_files:
        if file_name.endswith("_slb.csv"):
            data_table = pd.read_csv(os.path.join(folder_dir, file_name), delimiter=",") 
            qcm_dir[file_name] = data_table
            for key in qcm_dir.keys():
                print(key)
                print(qcm_dir[key].head(1))
            print()

        if file_name.endswith("_notes.csv"):
            data_table = pd.read_csv(os.path.join(folder_dir, file_name), delimiter=",")
            note_dir[file_name] = data_table
            for key in note_dir.keys():
                print(key)
                print(note_dir[key].head(1))
            print()

    #checks the number of files in the directories to be able to compare to the number of files in the data folder
    num_qcm_dir = len(qcm_dir)
    num_note_dir = len(note_dir)
    print("==== Found {} QCM-D and {} note file(s) in the directories! ====".format(num_qcm_dir, num_note_dir))
    return qcm_dir, note_dir


In [215]:
print(load_data("data"))

==== Found 0 QCM-D and 0 note file(s) in data folder! ====
20230718_qcm_sept_notes.csv
   Unnamed: 0  time (min)  pump stirrer solution
0           0           0    20    none       fb

20230718_qcm_sept_slb.csv
   Unnamed: 0  Time_1 [s]     f1_1 [Hz]  D1_1 [ppm]     f3_1 [Hz]  D3_1 [ppm]  \
0           0    0.216997  4.955102e+06  356.316176  1.484775e+07  177.763463   

      f5_1 [Hz]  D5_1 [ppm]     f7_1 [Hz]  D7_1 [ppm]  ...     f5_4 [Hz]  \
0  2.474229e+07  136.374282  3.463544e+07  111.583842  ...  2.474404e+07   

   D5_4 [ppm]     f7_4 [Hz]  D7_4 [ppm]     f9_4 [Hz]  D9_4 [ppm]  \
0  135.566046  3.463789e+07  111.140838  4.453265e+07   99.470117   

     f11_4 [Hz]  D11_4 [ppm]    f13_4 [Hz]  D13_4 [ppm]  
0  5.442725e+07     90.99574  6.432258e+07     84.50777  

[1 rows x 61 columns]

==== Found 1 QCM-D and 1 note file(s) in the directories! ====
({'20230718_qcm_sept_slb.csv':       Unnamed: 0    Time_1 [s]     f1_1 [Hz]  D1_1 [ppm]     f3_1 [Hz]  \
0              0      0.2

In [216]:
qcm_dir, note_dir = load_data("data")

==== Found 0 QCM-D and 0 note file(s) in data folder! ====
20230718_qcm_sept_notes.csv
   Unnamed: 0  time (min)  pump stirrer solution
0           0           0    20    none       fb

20230718_qcm_sept_slb.csv
   Unnamed: 0  Time_1 [s]     f1_1 [Hz]  D1_1 [ppm]     f3_1 [Hz]  D3_1 [ppm]  \
0           0    0.216997  4.955102e+06  356.316176  1.484775e+07  177.763463   

      f5_1 [Hz]  D5_1 [ppm]     f7_1 [Hz]  D7_1 [ppm]  ...     f5_4 [Hz]  \
0  2.474229e+07  136.374282  3.463544e+07  111.583842  ...  2.474404e+07   

   D5_4 [ppm]     f7_4 [Hz]  D7_4 [ppm]     f9_4 [Hz]  D9_4 [ppm]  \
0  135.566046  3.463789e+07  111.140838  4.453265e+07   99.470117   

     f11_4 [Hz]  D11_4 [ppm]    f13_4 [Hz]  D13_4 [ppm]  
0  5.442725e+07     90.99574  6.432258e+07     84.50777  

[1 rows x 61 columns]

==== Found 1 QCM-D and 1 note file(s) in the directories! ====


In [233]:
def filter_data(dataset: pd.DataFrame, overtone: int, chamber: int) -> pd.DataFrame:
    """Takes a csv file to seperate out the time(Time), frequency (f), and dissipation (D) for a chamber (c) at the Harmonic of interest (n) to give an array with these columns"""
    
    n = overtone
    c = chamber
    desired_columns = [
        f"Time_{c} [s]",
        f"f{n}_{c} [Hz]",
        f"D{n}_{c} [ppm]"
    ]
    filtered_table = dataset[desired_columns]    
    return filtered_table

# # can filter out an overtone, chamber for dataset
# # TODO: this only works for chamber 1
# # set the filtered data for overtone and channel to the working data for math (dm). Overtones are only odd. 
working_file = qcm_dir['20230718_qcm_sept_slb.csv']
filter_c = filter_data(working_file, 5, 1)
print(filter_c)


        Time_1 [s]     f5_1 [Hz]  D5_1 [ppm]
0         0.216997  2.474229e+07  136.374282
1         1.729103  2.474229e+07  136.330911
2         3.370784  2.474229e+07  136.358563
3         5.273353  2.474229e+07  136.384499
4         7.417136  2.474229e+07  136.370432
...            ...           ...         ...
9337  18494.308498  2.474215e+07  138.132795
9338  18496.279786  2.474215e+07  138.057634
9339  18498.234589  2.474215e+07  138.152295
9340  18500.395085  2.474215e+07  138.133736
9341  18502.246921  2.474215e+07  138.106987

[9342 rows x 3 columns]


In [236]:
def ave_slb_baseline(dataset: pd.DataFrame, t_base_s: float = 6., t_base_e: float = 8.):
    """Takes the filtered dataset to convert the time from seconds to minutes and average the frequency and dissipation from the SLB baseline."""
    
    time_sec = dataset.iloc[:, 0] #takes the first row from the filter_data, which is always time
    time_min = (time_sec / 60).round(2) #convert the time into minutes
    #set a range to find the average to normalize the frequency and dissipation channels across the entire dataset
    rows_in_range = (time_min >= t_base_s) & (time_min <= t_base_e)
    data_in_range = dataset.loc[rows_in_range]
    
    #TODO: Set a if and else statement for if the data is properly selected to find the averages/no data if found
    # if data_in_range[]:
   
    f_avg = data_in_range.iloc[:, 1].mean()  # averaging over frequency
    d_avg = data_in_range.iloc[:, 2].mean()  # averaging over dissipation
       # else:
    # #     print ("zero rows in range")
    norm_f = (dataset.iloc[:, 1] - f_avg) / 5 # 5 is the scaling for overtone 5
    norm_d = dataset.iloc[:, 2] - d_avg
    
    norm_table = pd.DataFrame(data={
        'time (min)': time_min,
        'f (Hz)': norm_f,
        'd (ppm)': norm_d
    })

    return norm_table

# # normalize the dataset that was selected for 
# 
# normalized_data(filter_data(file_d["n_20221006_slb.csv"], 5, 1))
# norm_data = normalized_data(filter_c)
print(ave_slb_baseline(filter_data(qcm_dir['20230718_qcm_sept_slb.csv'], 5, 1)))


      time (min)     f (Hz)   d (ppm)
0           0.00  -0.631021  0.053867
1           0.03  -0.553655  0.010496
2           0.06  -0.556431  0.038148
3           0.09  -0.543055  0.064085
4           0.12  -0.643289  0.050017
...          ...        ...       ...
9337      308.24 -28.457011  1.812380
9338      308.27 -28.387018  1.737219
9339      308.30 -28.459543  1.831880
9340      308.34 -28.467026  1.813322
9341      308.37 -28.455242  1.786572

[9342 rows x 3 columns]
