In [55]:
from modules import data

records = data.from_json('./data/json/exp.json')
# records = [record for record in records if record.trial == 1]
for record in records:
    record.min_quality(90)

In [56]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

def to_df(records) -> pd.DataFrame:
    """
    Creates a dataframe where each column are different features (i.e., compounds). 
    
    A column of `Label` is appended in the last column for classification tasks.
    """
    output = pd.DataFrame()
    synonyms = {
        'Ethyl Alcohol': 'Ethanol',
        'Limonene': 'D-Limonene',
    }
    
    for record in records:
        column_name = f'{record.envr} {record.medium} {record.species} {record.time}h {record.trial}'
        df = record.data.copy(deep=True)
        df.drop(['Retention Time', 'Relative Area', 'ID', 'CAS Number', 'Quality', 'Type', 'Width'], axis=1, inplace=True)
        df.rename({'Area': column_name}, axis=1, inplace=True)
        df.set_index('Compound', inplace=True)
        df.rename(index=synonyms, inplace=True)
        df = df.groupby(df.index).sum()
        output = pd.concat([output, df], axis=1)
        
    output.fillna(0.0, inplace=True)
    output = output.T
    
    # Filtering out significant compounds
    sig_list = pd.read_excel('./data/sigs.xlsx').iloc[:, 0].to_list()   
    output = output.reindex(columns=sig_list, fill_value=0)
    output = output[sig_list]
    output = output.sort_index(axis=1)
    
    # Standard Scaling
    output = pd.DataFrame(StandardScaler().fit_transform(output), index=output.index, columns=output.columns)
    
    output['Envr'] = [record.envr for record in records]
    output['Medium'] = [record.medium for record in records]
    output['Species'] = [record.species for record in records]
    
    return output

records_df = to_df(records)
records_df

Compound,"1,1'-Biphenyl, 2,2',5,5'-Tetramethyl","1,1-Bis(P-Tolyl)Ethane","1-Hexanol, 2-Ethyl",1-Octanol,1R-.Alpha.-Pinene,"2,4-Dimethyl-1-Heptene",2-Octanone,"2-Propenoic Acid, 2-Ethylhexyl Ester","2-Tert-Butyl-3,4,5,6-Tetrahydropyridine","2H-Pyran, 2-Ethoxy-3,4-Dihydro",...,Tetradecanoic Acid,Toluene,"Undecane, 2,6-Dimethyl","Undecane, 2,8-Dimethyl","Undecane, 2-Methyl","Undecane, 3-Methyl","Undecane, 4-Methyl",Envr,Medium,Species
Ae LB BS1 0h 1,0.0,0.0,-0.245523,0.0,-0.136803,-0.2308,0.269411,0.0,0.0,0.0,...,0.0,-0.099504,-0.271708,-0.099504,1.045143,0.0,-0.293811,Ae,LB,BS1
Ae LB BS1 12h 1,0.0,0.0,-0.245523,0.0,-0.136803,-0.2308,0.023551,0.0,0.0,0.0,...,0.0,-0.099504,-0.271708,-0.099504,-0.352940,0.0,-0.293811,Ae,LB,BS1
Ae LB BS1 18h 1,0.0,0.0,-0.245523,0.0,-0.136803,-0.2308,-0.577697,0.0,0.0,0.0,...,0.0,-0.099504,-0.271708,-0.099504,-0.352940,0.0,-0.293811,Ae,LB,BS1
Ae LB BS1 24h 1,0.0,0.0,-0.245523,0.0,-0.136803,-0.2308,-0.577697,0.0,0.0,0.0,...,0.0,-0.099504,-0.271708,-0.099504,-0.352940,0.0,-0.293811,Ae,LB,BS1
Ae LB BS1 30h 1,0.0,0.0,-0.245523,0.0,-0.136803,-0.2308,0.697515,0.0,0.0,0.0,...,0.0,-0.099504,0.168914,-0.099504,-0.352940,0.0,-0.293811,Ae,LB,BS1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
An LB SA 12h 4,0.0,0.0,-0.245523,0.0,-0.136803,-0.2308,1.302707,0.0,0.0,0.0,...,0.0,-0.099504,-0.271708,-0.099504,-0.352940,0.0,-0.293811,An,LB,SA
An LB SA 18h 4,0.0,0.0,-0.245523,0.0,-0.136803,-0.2308,1.181110,0.0,0.0,0.0,...,0.0,-0.099504,-0.271708,-0.099504,-0.352940,0.0,-0.293811,An,LB,SA
An LB SA 24h 4,0.0,0.0,-0.245523,0.0,-0.136803,-0.2308,1.169370,0.0,0.0,0.0,...,0.0,-0.099504,-0.271708,-0.099504,-0.352940,0.0,-0.293811,An,LB,SA
An LB SA 30h 4,0.0,0.0,-0.245523,0.0,-0.136803,-0.2308,1.947549,0.0,0.0,0.0,...,0.0,-0.099504,-0.271708,-0.099504,-0.352940,0.0,-0.293811,An,LB,SA


In [57]:
from sklearn.preprocessing import LabelEncoder

envr_encoder = LabelEncoder().fit(records_df['Envr'])
                                    

In [58]:
from tensorflow.keras.models import load_model
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

mlp = load_model('./data/ffn/multitask.keras')
mlp_pred = mlp.predict(records_df.drop(['Envr', 'Medium', 'Species'], axis=1).values)

media = ['LB', 'MSA', 'TSA', 'TSB']
species = ['B. subtilis', 'Control', 'E. coli', 'S. aureus']

records_df['Predicted Envr.'] = envr_encoder.inverse_transform(np.argmax(mlp_pred[0], axis=1))
records_df['Predicted Medium'] = [media[idx] for idx in np.argmax(mlp_pred[1], axis=1)]
records_df['Predicted Species'] = [species[idx] for idx in np.argmax(mlp_pred[2], axis=1)]



[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step


In [59]:
records_df.to_excel('./data/exposure.xlsx')
records_df

Compound,"1,1'-Biphenyl, 2,2',5,5'-Tetramethyl","1,1-Bis(P-Tolyl)Ethane","1-Hexanol, 2-Ethyl",1-Octanol,1R-.Alpha.-Pinene,"2,4-Dimethyl-1-Heptene",2-Octanone,"2-Propenoic Acid, 2-Ethylhexyl Ester","2-Tert-Butyl-3,4,5,6-Tetrahydropyridine","2H-Pyran, 2-Ethoxy-3,4-Dihydro",...,"Undecane, 2,8-Dimethyl","Undecane, 2-Methyl","Undecane, 3-Methyl","Undecane, 4-Methyl",Envr,Medium,Species,Predicted Envr.,Predicted Medium,Predicted Species
Ae LB BS1 0h 1,0.0,0.0,-0.245523,0.0,-0.136803,-0.2308,0.269411,0.0,0.0,0.0,...,-0.099504,1.045143,0.0,-0.293811,Ae,LB,BS1,An,TSB,Control
Ae LB BS1 12h 1,0.0,0.0,-0.245523,0.0,-0.136803,-0.2308,0.023551,0.0,0.0,0.0,...,-0.099504,-0.352940,0.0,-0.293811,Ae,LB,BS1,Ae,LB,B. subtilis
Ae LB BS1 18h 1,0.0,0.0,-0.245523,0.0,-0.136803,-0.2308,-0.577697,0.0,0.0,0.0,...,-0.099504,-0.352940,0.0,-0.293811,Ae,LB,BS1,An,TSB,B. subtilis
Ae LB BS1 24h 1,0.0,0.0,-0.245523,0.0,-0.136803,-0.2308,-0.577697,0.0,0.0,0.0,...,-0.099504,-0.352940,0.0,-0.293811,Ae,LB,BS1,Ae,TSA,B. subtilis
Ae LB BS1 30h 1,0.0,0.0,-0.245523,0.0,-0.136803,-0.2308,0.697515,0.0,0.0,0.0,...,-0.099504,-0.352940,0.0,-0.293811,Ae,LB,BS1,Ae,TSA,Control
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
An LB SA 12h 4,0.0,0.0,-0.245523,0.0,-0.136803,-0.2308,1.302707,0.0,0.0,0.0,...,-0.099504,-0.352940,0.0,-0.293811,An,LB,SA,An,LB,Control
An LB SA 18h 4,0.0,0.0,-0.245523,0.0,-0.136803,-0.2308,1.181110,0.0,0.0,0.0,...,-0.099504,-0.352940,0.0,-0.293811,An,LB,SA,Ae,TSB,E. coli
An LB SA 24h 4,0.0,0.0,-0.245523,0.0,-0.136803,-0.2308,1.169370,0.0,0.0,0.0,...,-0.099504,-0.352940,0.0,-0.293811,An,LB,SA,An,TSA,B. subtilis
An LB SA 30h 4,0.0,0.0,-0.245523,0.0,-0.136803,-0.2308,1.947549,0.0,0.0,0.0,...,-0.099504,-0.352940,0.0,-0.293811,An,LB,SA,An,TSA,Control
