In [1]:
import os
import sys
import json
import numpy as np
import pandas as pd
import xarray as xr
import math

In [2]:
from typing import List

In [4]:
def all_of_strain(strain_name: str, files: List[str]) -> pd.DataFrame:
    df: pd.DataFrame = pd.read_csv(files.pop(), index_col=0)
    df = df[df['strain_name'] == strain_name]

    for x in files:
        new_df = pd.read_csv(x, index_col=0)
        new_df = new_df[new_df['strain_name'] == strain_name]
        df = pd.concat([df, new_df])
        new_df = None
    return df

In [5]:
def canonical_data_frame(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df.loc[:, 'media'] = df['growth_media_1'].astype(pd.CategoricalDtype(sorted(df['growth_media_1'].unique())))
    df.at[:, 'inc_temp_degrees'] = np.vectorize(lambda x: int(x.split("_")[1]))(df['inc_temp'])
    df.at[:, 'inc_time_1_hrs'] = np.vectorize(lambda x: int(x.split(":")[0]))(df['inc_time_1'])
    df.at[:, 'inc_time_2_hrs'] = np.vectorize(lambda x: int(x.split(":")[0]))(df['inc_time_2'])
    df.at[:, 'well'] = np.vectorize(lambda x: x.split("_")[-1])(df['id'])
    df.loc[:, 'replicate'] = df.groupby(['lab_id', 'plate_id', 'well']).ngroup()
    df.loc[:, 'event'] = df.groupby(['lab_id', 'plate_id', 'well']).cumcount()
    df.drop(columns=['lab', 'plan'], inplace=True)
    reindexed = df.set_index(['strain_name', 'inc_temp_degrees', 'inc_time_2_hrs', 'od', 'lab_id', 'plate_id', 'well', 'replicate', 'event'])
    return reindexed

In [6]:
def files_with_strain(strain_name: str, filename: str = 'accuracy_set.csv') -> List[str]:
    table = pd.read_csv(filename)
    return list(table.query(f'strain_name == "{strain_name}"')['lab_id'].unique())