# ambr250 Data Export

This is a tool to reformat ambr250 data into an EDD-importable format.

# 1. Specify name of project

Enter the name of the ambr250 data folder name below.

Ex: For a folder with the name FCIC_AMBR_05 enter 'FCIC_AMBR_05' as the project_name variable.

In [55]:
project_name = 'FCIC_AMBR_05'

In [60]:
data_directory = project_name + '/Data/00001/B/'
export_params_file = 'export_parameters.txt'
library_file = "param_library.json"
datetime_format = "%Y-%m-%dT%H:%M:%S.%f"

# 2. Pick export parameters

Pick your desired export parameters and enter them into export_parameters.txt. A complete list of parameters can be found below:

In [61]:
with open(library_file) as param_library:
    param_patterns = json.load(param_library)
sorted_params = sorted([param for param in param_patterns])
for param in sorted_params:
    print(param)

(1 per Total dry gas flow) (smoothed) (min per mL)
06a17834-55e3-453d-9642-69e05b5031d8.IsEnabled_SP
1f6daeba-0d4a-4cb9-bbfd-36f23e5ea028.ActiveLoopIndex
1f6daeba-0d4a-4cb9-bbfd-36f23e5ea028.ActiveLoopStringKey
1f6daeba-0d4a-4cb9-bbfd-36f23e5ea028.IntegralError
1f6daeba-0d4a-4cb9-bbfd-36f23e5ea028.IsEnabled_SP
1f6daeba-0d4a-4cb9-bbfd-36f23e5ea028.LastDifferentialError
1f6daeba-0d4a-4cb9-bbfd-36f23e5ea028.LastError
1f6daeba-0d4a-4cb9-bbfd-36f23e5ea028.LastError (raw)
5ceb56a6-95dd-4582-a154-1bfa70fb7a49_SP
Acid flow rate (mL per h)
Acid flow rate (raw) (mL per h)
Acid flow rate multiplier_SP
Acid flow rate_SP (mL per h)
Acid reference volume pumped (mL)
Acid volume pumped (mL)
Acid volume since reset (mL)
Acid volume since reset reference (mL)
Air (headspace) added flow_SP (mL per min)
Air (headspace) correction factor
Air (headspace) flow (mL per min)
Air (headspace) flow_SP (mL per min)
Air (headspace) mix_SP (%)
Air (headspace) primary flow_SP (mL per min)
Air (headspace) valve open 

# 3. Generate EDD Import Files

Go to Cell > Run All to generate the EDD import files for the chosen parameters.

In [None]:
import pandas as pd
import numpy as np
import os
import re
import json
import datetime

In [31]:
def write_to_csv(filename, patterns, earliest_time_in_microseconds):
    '''
    Converts input .csv files into pandas dataframe, selects for export parameters
    and writes as compiled .csv file.
    
    string filename: filepath to bioreactor (HT1-HT12) that is being exported
    dictionary patterns: dictionary with pattern names and their corresponding regex patterns
    '''
    reactor_data = os.path.join(data_directory, filename)
    worksheet_name = 'HT' + str(int(filename))
    compiled = pd.DataFrame({'' : []})
    # Iterate through export parameters
    for parameter in open(export_params_file):
        # Get pattern for export parameter from library
        pattern = patterns[parameter.rstrip('\n')]
        # Iterate through .csv files and check for pattern match
        for sheet_name in os.listdir(reactor_data):
            if re.match(pattern, sheet_name):
                sheet_path = os.path.join(reactor_data, sheet_name)
                df = pd.read_csv(sheet_path, usecols=[0,1], header=2)
                # Reformat column header
                df.rename(columns={"VariableKey": "Time"}, inplace=True)
                # Assign compiled dataframe or concatenate to existing dataframe
                reformatted_data = {}
                unit = re.findall('\(([^)]+)\)', sheet_name)
                line_name = project_name + "_" + worksheet_name
                times = df["Time"].values.tolist()
                earliest_time = datetime.datetime.strptime(earliest_time_in_microseconds, datetime_format)
                
                time_differences = []
                for time in times:
                    time_datetime = datetime.datetime.strptime(time[:-1], datetime_format)
                    difference = time_datetime - earliest_time
                    time_differences.append(difference.total_seconds())
                                
                reformatted_data["Line Name"] = [line_name for _ in df["Time"]]
                reformatted_data["Measurement Type"] = [parameter.rstrip() for _ in df["Time"]]
                reformatted_data["Time"] = time_differences
                reformatted_data["Value"] = df.iloc[:,1]
                reformatted_data["Units"] = [unit[-1].strip() if unit else 'n/a' for _ in df["Time"]]
                
                order = ["Line Name", "Measurement Type", "Time", "Value", "Units"]
                reformatted_df = pd.DataFrame(data=reformatted_data)
                reformatted_df = reformatted_df[order].dropna()
                

                # Export completed dataframe as .csv file
                export_filename = project_name + "_" + worksheet_name + "_" + parameter.rstrip() + ".csv"
                reformatted_df.to_csv(export_filename, index=False)  
            
def compile_bioreactors(directory):
    '''
    Iterates through HT1-HT12 and calls export function
    
    string directory: filepath to directory containing HT1-HT12 data.
    '''
    earliest_time_in_microseconds = find_earliest_time(data_directory)[:-1]
    with open(library_file) as param_library:
        # Load parameter pattern library
        param_patterns = json.load(param_library)
    for filename in os.listdir(directory):
            # Match for bioreactor name (HT1-HT12)
            if re.match('\d{2}', filename):
                write_to_csv(filename, param_patterns, earliest_time_in_microseconds)
    print("All data exported.")

def find_earliest_time(directory):
    times = []
    reactor_data = os.path.join(data_directory, '01')
    for sheet_name in os.listdir(reactor_data):
        sheet_path = os.path.join(reactor_data, sheet_name)
        df = pd.read_csv(sheet_path, usecols=[0,1], header=2)
        times.append(df.iloc[0][0])
    return min(times)

In [19]:
compile_bioreactors(data_directory)

All data exported.
