In [None]:
from threadpoolctl import threadpool_limits, threadpool_info
threadpool_limits(limits=8)
import os
os.environ['OMP_NUM_THREADS'] = '6'

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from jax import random
import jax
import jax.numpy as jnp
import jax.nn as jnn
import numpyro
import numpyro.distributions as dist
from numpyro.infer import MCMC, NUTS, Predictive, SVI, Trace_ELBO
from numpyro.infer.autoguide import AutoNormal
import optax
from concurrent.futures import ProcessPoolExecutor
from tqdm import tqdm
import statsmodels.formula.api as smf
import arviz as az
from sklearn.preprocessing import LabelEncoder
from scipy.spatial import cKDTree
import scipy.stats as stats
from jax.ops import segment_sum
from numba import njit
import matplotlib.gridspec as gridspec
import random as py_random
from scipy.stats import norm

from bayes.euclid_hbda_doMF import *

In [None]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

sub_alldata = load_data()
coords      = sub_alldata[['x','y','SectionID']]
import os
import glob
import numpy as np

cwd     = os.getcwd()
pattern = os.path.join(cwd, '*_MF.npy')
files   = np.unique(glob.glob(pattern))

trained = [
    os.path.basename(f)[:-len("_model_params_MF.npy")]
    for f in files
]
totrain = np.setdiff1d(sub_alldata.columns[:173].values, trained)

from threadpoolctl import threadpool_limits, threadpool_info
threadpool_limits(limits=2)
import os
os.environ['OMP_NUM_THREADS'] = '2'

config_overrides = [
    {"num_epochs": 2500},
]

for overrides in config_overrides:
    cfg = LipidAnalysisConfig()
    cfg.lipids_to_analyze     = totrain#sub_alldata.columns[:173].values
    cfg.normalize_percentiles = (0.5, 99.5)
    for k,v in overrides.items():
        setattr(cfg, k, v)

    print("Running with config:", cfg_string(cfg))
    results = main(sub_alldata, coords, cfg)

## Analyze and export

In [None]:
from tqdm import tqdm
stindex = np.sort(sub_alldata['supertype'].unique())
aaaaaas = []
bbbbbbs = []
xxxs = []

from jax.nn import sigmoid

for xxx in tqdm(sub_alldata.columns.values[:173]):

    try:
        aaaaaa = np.load(xxx+"_model_params_MF.npy", allow_pickle=True).item()
        bbbbbb = pd.DataFrame(sigmoid(aaaaaa['alpha_supertype_unconst_loc']), index=stindex)
        aaaaaa = pd.DataFrame(aaaaaa['alpha_supertype_susceptibility_loc'], index=stindex)

        aaaaaas.append(aaaaaa)
        bbbbbbs.append(bbbbbb)
        xxxs.append(xxx)
        
    except:
        continue
        
shift = -pd.concat(aaaaaas, axis=1)
baseline = pd.concat(bbbbbbs, axis=1)
shift.columns = xxxs
baseline.columns = xxxs

In [None]:
for lipid_name in sub_alldata.columns.values[:173]:
    sub_alldata = normalize_lipid_column(
        sub_alldata, 
        lipid_name,
        lower_percentile=config.normalize_percentiles[0],
        upper_percentile=config.normalize_percentiles[1]
    )

In [None]:
centroids = sub_alldata.loc[sub_alldata['Condition'] != "female",sub_alldata.columns.values[:173]].groupby(sub_alldata["supertype"]).mean()
centroids2 = sub_alldata.loc[sub_alldata['Condition'] == "female",sub_alldata.columns.values[:173]].groupby(sub_alldata["supertype"]).mean()
centroids2 = centroids2.loc[centroids.index, centroids.columns]
delll = centroids2 - centroids

delll

In [None]:
for xxxxx in np.random.choice(shift.columns, 10):

    plt.scatter(shift[xxxxx], delll.loc[shift.index, xxxxx])
    plt.show()

for xxxxx in np.random.choice(baseline.columns, 10):

    plt.scatter(baseline[xxxxx], centroids.loc[baseline.index, xxxxx])
    plt.show()
    
foldchange = shift / baseline
shift.to_parquet("shift_MF.parquet")
baseline.to_parquet("baseline_MF.parquet")
foldchange.to_parquet("relshift_MF.parquet")

In [None]:
import pandas as pd
import numpy as np
from threadpoolctl import threadpool_limits, threadpool_info
threadpool_limits(limits=8)
import os
os.environ['OMP_NUM_THREADS'] = '6'

pattern = '_MF_shifts_fdr5_vs0.csv'
files = [
    f 
    for f in os.listdir('.') 
    if f.endswith(pattern) 
]

lipid_dfs = {
    f.replace(pattern, ''): pd.read_csv(f, index_col=0).loc[foldchange.index.values.astype(int),:]
    for f in files
}

bottom = [lipid_dfs[xxx]['ci_2.5%'] for xxx in list(lipid_dfs.keys())]
top = [lipid_dfs[xxx]['ci_97.5%'] for xxx in list(lipid_dfs.keys())]

bottom = pd.concat(bottom,axis=1)
bottom.index = foldchange.index.values
bottom.columns = list(lipid_dfs.keys())
top = pd.concat(top,axis=1)
top.index = foldchange.index.values
top.columns = list(lipid_dfs.keys())
significant = ((top > 0) & (bottom > 0)) | ((top < 0) & (bottom < 0))
significant.to_parquet("sign_significance_MF.parquet")
significant