In [3]:
import os
import pandas as pd

In [2]:
def get_file_paths_by_extension(directory, extension):
    file_paths = []
    
    # Recursively traverse the directory and its subdirectories
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(extension):
                file_paths.append(os.path.join(root, file))
    
    return file_paths

# Directory path
directory_path = '$base_dir$'

# Desired file extension (e.g., 'txt', 'csv', 'pdf', etc.)
file_extension = 'asc'

# Get file paths for the specified extension
file_paths = get_file_paths_by_extension(directory_path, file_extension)

# Print the file paths
for file_path in file_paths:
    print(file_path)

In [4]:

start_time_set = True
# Base directory
base_directory = '$base_dir$'

# Initialize lists to store data
data = {'SweepNumber': [], 'SweepTime(s)': [], 'SweepTime(24h)': [], 'Experiment': [], 'Sweep_Index': [], 'Time[s](Within Sweep)': [], 'Time[s](Total)': [], 'I-mon[A]': [],
        'Exp_Conditions': [], 'Molecule_ID': [], 'Experiment_ID': []}

# Traverse through the base directory and process each folder
for root, dirs, files in os.walk(base_directory):
    for folder_name in dirs:
        folder_path = os.path.join(root, folder_name)
        start_time = 86500  # Start time at more seconds than in a day so the first start time check is always true
        print(folder_path)
        # Check if the directory contains .asc files
        asc_files = [f for f in os.listdir(folder_path) if f.endswith('.asc')]
        if asc_files:
            # Extract experiment, Exp_Conditions, and molecule_id from folder structure
            molecule_id, Exp_Conditions, experiment_id = folder_path.split('\\')[-3:]
            Exp_Conditions.replace(' ','_')
            print('molecule_id =',molecule_id,' conc =', Exp_Conditions,'experiment_id=',experiment_id)

        # Get the list of files in the folder
        folder_files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]

        # Process each file in the folder
        for file_name in folder_files:
            file_path = os.path.join(folder_path, file_name)

            with open(file_path, 'r') as file:
                for line in file:
                    line = line.strip()

                    # Extract sweep information
                    if line.startswith('Sweep_'):
                        # Split the line into parts based on commas
                        parts = line.split(',')

                        # Store start time for time calculation (only on the first occurrence)
                        if not start_time_set:
                            start_time_set = True
                            start_time_sweep = float(parts[1])

                        # Update current_time and calculate time difference
                        current_time = float(parts[1])

                        # Store sweep information for the current data
                        current_sweep = {
                            'SweepNumber': int(parts[0].split('_')[3]),
                            'SweepTime(s)': current_time,
                            'SweepTime(24h)': parts[2].strip(),
                        }

                    elif line.startswith('Online'):  # Break the loop when encountering 'Online' as online analyis was the endpoint for this file type may need to be changed
                        print('Last run at ' + str(current_sweep))
                        break

                    # Extract data rows
                    elif line and line[0].isdigit():
                        parts = line.split(',')
                        index, time, i_mon = int(parts[0]), float(parts[1]), float(parts[2])

                        # Append data to the respective lists
                        data['Sweep_Index'].append(index)
                        data['Time[s](Within Sweep)'].append(time)
                        data['Time[s](Total)'].append(time + current_time)
                        data['I-mon[A]'].append(i_mon)
                        data['SweepNumber'].append(current_sweep['SweepNumber'])
                        data['SweepTime(s)'].append(current_sweep['SweepTime(s)'])
                        data['SweepTime(24h)'].append(current_sweep['SweepTime(24h)'])
                        data['Experiment'].append(folder_name)
                        data['Exp_Conditions'].append(Exp_Conditions)
                        data['Molecule_ID'].append(molecule_id)
                        data['Experiment_ID'].append(experiment_id)
                        

        
        #reset the dataframe to be empty
        data_df=pd.DataFrame()
        #add data
        data_df = pd.DataFrame(data)
        # Find the smallest total time for each experiment
        start_times = data_df.groupby('Experiment')['Time[s](Total)'].transform('min')
        # Calculate relative time based on the experiment's start_time
        data_df['Time[s](Relative to experiment start)'] = data_df['Time[s](Total)'] - start_times
        
        #reset the data for new dataframes
        data = {'SweepNumber': [], 'SweepTime(s)': [], 'SweepTime(24h)': [], 'Experiment': [], 'Sweep_Index': [], 'Time[s](Within Sweep)': [], 'Time[s](Total)': [], 'I-mon[A]': [],
        'Exp_Conditions': [], 'Molecule_ID': [], 'Experiment_ID': []}
        #if the data frame contains data take the dataframe
        if len(data_df) != 0:
            processed_data_path = folder_path.replace("data_curated_for_analysis", "processed_data")
            print(processed_data_path)
            os.makedirs(processed_data_path, exist_ok=True)
            data_df.to_csv(f"{processed_data_path}\molecule_id_{molecule_id}_exp_cond_{Exp_Conditions}_exp_{experiment_id}.csv", encoding='utf-8', index=False)
            
        
        
