# Biolector Data Processor

Upload raw data files to [Upload Raw Data Here](http://preston.zymergen.net:8892/tree/biolector_data_processor/Upload%20Raw%20Data%20Here) folder, then run the cell below.

In [None]:
import csv
import os
import pandas as pd
import numpy as np
import ipywidgets as widgets

file_options = os.listdir('Upload Raw Data Here')
file_options = [x for x in file_options if x != '.ipynb_checkpoints']

files = widgets.SelectMultiple(
    options=file_options,
    rows=10,
    description='Select File(s): ',
    disabled=False
)

display(files)

## Once you have selected your file(s), go to "Cell" menu above and select "Run All Below"

Processed data files can be downloaded from [Download Processed Data Here](http://preston.zymergen.net:8892/tree/biolector_data_processor/Download%20Processed%20Data%20Here)

In [None]:
file_names = list(files.value)

In [None]:
def process_biolector_data(file_name):
    
    raw_data = pd.read_excel(str(file_name), sheet_name='raw_data', index_col=None, header=None, skiprows=22)

    rdt = raw_data.T
    d1 = rdt.loc[4:]
    d2 = d1[d1.columns[3:]]
    well_names = list((rdt.loc[0]).loc[3:])
    descriptions = list((rdt.loc[2]).loc[3:])
    channel = list((rdt.loc[3]).loc[3:])

    d2.columns = [well_names, descriptions, channel]
    d2.columns.names = ['well_name', 'description', 'channel']

    cols = rdt.columns.tolist()
    new_cols = cols[-4:]
    new_cols.append(cols[2])
    new_cols = new_cols[-1:] + new_cols[:-1]
    actuals = rdt[new_cols].loc[4:]
    actuals.columns = ['time_h', 'ACT.TEMP', 'ACT.HUMIDITY', 'ACT.O2', 'ACT.CO2']
    actuals.reset_index(drop=True, inplace=True)
    d2.index = actuals
    d2.index.name = 'temp_index'
    d2 = d2[d2.columns[:288]]

    d3 = d2.unstack().reset_index()
    d3.columns = ['well_name', 'description', 'channel', 'temp_index', 'vals']
    d4 = d3.description.str.split('|', expand=True).rename(columns = lambda x: "description_" + str(x+1))
    d5 = d3.join(d4)
    temp_index = pd.DataFrame(d5.temp_index.tolist(), index=d3.index)
    temp_index.columns = ['time_h', 'actual_temp_C', 'actual_humidity_percent', 'actual_O2_percent', 'actual_CO2_percent']
    d6 = d5.join(temp_index)

    d6['index_tuple'] = list(zip(d6.well_name,
                                 d6.description_1,
                                 d6.description_2,
                                 d6.description_3,
                                 d6.description_4,
                                 d6.time_h, 
                                 d6.actual_temp_C, 
                                 d6.actual_humidity_percent, 
                                 d6.actual_O2_percent, 
                                 d6.actual_CO2_percent))

    d7 = d6.pivot(index='index_tuple', columns='channel', values=['vals'])

    d7_cols = list(d7.columns.levels[1])
    for x in range(len(d7_cols)):
        if type(d7_cols[x]) == int:
            d7_cols[x] = 'channel_' + str(d7_cols[x])
        else:
            continue

    d7.columns = d7_cols

    d8 = d7.reset_index()
    index_tuple = pd.DataFrame(d8.index_tuple.tolist(), index=d8.index_tuple)
    index_tuple.reset_index(inplace=True)
    index_tuple.columns = ['index_tuple',
                           'well_name',
                           'description_1',
                           'description_2',
                           'description_3',
                           'description_4',
                           'time_h',
                           'actual_temp_C',
                           'actual_humidity_percent',
                           'actual_O2_percent',
                           'actual_CO2_percent']

    new_data = index_tuple.merge(d8, on='index_tuple').drop(['index_tuple'], axis=1)
    return new_data

In [None]:
for x in file_names:
    os.chdir('Upload Raw Data Here')
    
    new_data = process_biolector_data(x)
    
    os.chdir('..')
    os.chdir('Download Processed Data Here')
    
    csv_name = x[:-5] + '.csv'
    new_data.to_csv(csv_name, index=False)
    
    os.chdir('..')