<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Simulation-attempt-round-1" data-toc-modified-id="Simulation-attempt-round-1-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Simulation attempt round 1</a></span></li><li><span><a href="#Simulation-attempt-round-2" data-toc-modified-id="Simulation-attempt-round-2-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Simulation attempt round 2</a></span></li></ul></div>

In [1]:
import biom
from biom.util import biom_open
import numpy as np
import pandas as pd
import arviz as az
from util import extract_differentials, ranking, btest, read_kegg_dict
import dask
from dask.distributed import Client
from arviz.utils import Dask
%matplotlib inline

In [2]:
dask.config.set(temporary_directory='/scratch')
Dask.enable_dask(dask_kwargs={"dask": "parallelized", "output_dtypes": [float]})
client = Client(threads_per_worker=2, n_workers=20, memory_limit="20GB")

# Simulation attempt round 1

In [3]:
kang_directory = '../sfari/data/sra/Kang2017'
posterior_file = f'{kang_directory}/week0_ogu/differentials-v8.nc'
posterior = az.from_netcdf(posterior_file)

In [4]:
posterior

In [5]:
med_table = posterior['posterior_predictive']['y_predict'].loc[dict(chain=0, draw=0)]
med_table = med_table.to_dataframe()['y_predict']
med_table = pd.pivot(data=med_table.reset_index(), index='features', columns='samples').round()
med_table = med_table.astype(np.int64)

with biom_open('../sfari/data/sra/Combined/simulated.biom', 'w') as f:
    _, samples = zip(*med_table.columns)
    t = biom.Table(med_table.values, med_table.index, list(samples))
    t.to_hdf5(f, 'simulated')

In [6]:
med_table.sum(axis=1)

features
1000569       1456
1002367      75190
1006000       1620
1007096      46805
1008459         57
            ...   
997894      408891
999386          49
999411         224
999413       30389
999419     1044673
Length: 878, dtype: int64

# Simulation attempt round 2

In [7]:
from q2_matchmaker._stan import _case_control_negative_binomial_sim

In [8]:
params = posterior.mean(dim=['chain', 'draw'])
diff = params['posterior']['diff'].to_dataframe()
disp = params['posterior']['disp'].to_dataframe()
batch_mu = params['posterior']['batch_mu'].to_dataframe()
batch_disp = params['posterior']['batch_disp'].to_dataframe()
control_mu = params['posterior']['control_mu'].to_dataframe()
control_sigma = params['posterior']['control_sigma'].to_dataframe()

In [9]:
disp = pd.pivot(data=disp.reset_index(), index='features', columns='disp_dim_0')
batch_mu = pd.pivot(data=batch_mu.reset_index(), index='features', columns='batch_mu_dim_0')
batch_disp = pd.pivot(data=batch_disp.reset_index(), index='features', columns='batch_disp_dim_0')

In [10]:
params['posterior']

In [18]:
n = 754
b = 8
d = 878
table, md, new_diff = _case_control_negative_binomial_sim(
    n, b, d, depth=10000, state=0,
    params=dict(
        diff=diff.values,
        disp=disp.values.T,
        batch_mu=batch_mu.values.T,
        batch_disp=batch_disp.values.T,
        control_mu=control_mu.values.ravel(),
        control_sigma=control_sigma.values.ravel()
    )
)

In [19]:
with biom_open('../sfari/data/sra/Combined/simulated3.biom', 'w') as f:
    #t = biom.Table(table.values.T, posterior['posterior']['features'], 
    #               posterior['posterior']['samples'])
    t = biom.Table(table.values.T, posterior['posterior']['features'], table.index)
    t.to_hdf5(f, 'simulated')

In [20]:
md.to_csv('../sfari/data/sra/Combined/simulated3_metadata.txt', sep='\t')

In [None]:
md

In [None]:
import seaborn as sns

In [None]:
sns.distplot(diff.values.ravel())

In [None]:
sns.distplot(disp.values.ravel())

In [None]:
sns.distplot(batch_mu.values.ravel())

In [None]:
sns.distplot(batch_disp.values.ravel())

In [None]:
sns.distplot(control_mu.values.ravel())

In [None]:
sns.distplot(control_sigma.values.ravel())

In [None]:
t