In [1]:
import pandas as pd
import numpy as np
from copy import deepcopy
from glv_inference import compute_ddt_log_phi, generate_XY_matrics, write_stan_input_file, parse_stan_output

# Generate input files for Stan program (mouse experiments under inulin intervention)

In [2]:
# load meta data
df_meta = pd.read_csv('meta_data.csv').drop(['Vendor','RandomizedGroup'], axis=1)
df_meta.columns = ['sample_id','subject_id','time_point','group']
df_meta = df_meta.set_index('sample_id')

for seq in ['species','otu']:

    # load species or otu abundance data
    df_abun = pd.read_csv("quantitative_abundance_%s.csv"%(seq), index_col=0)
    
    # stan program does not allow special characters
    df_abun.columns = [c.replace('/','_slash_').replace(' ','_space_').replace('[','_leftsquarebracket_').replace(']','_rightsquarebracket_').replace('-','_dash_').replace('.','_dot_').replace('(','_leftroundbracket').replace(')','_rightroundbracket_') for c in df_abun.columns]

    # compute log-derivatives
    df_output = compute_ddt_log_phi(df_meta[(df_meta.group == 'Control') | (df_meta.group == 'Inulin')], df_abun)
    
    # construct X and Y matrices in regression
    Xmat, Ymat, simulated_samples, simulated_taxa, simulated_groups = generate_XY_matrics(df_output, reference_group='Control')
    
    # write to files
    prefix = 'inulin_'+seq
    write_stan_input_file(prefix, prefix, Xmat, Ymat, simulated_taxa, simulated_groups)