In [13]:
from scipy.stats import f_oneway
import pandas as pd
import numpy as np
import tellurium as te

import statsmodels.api as sm
from statsmodels.formula.api import ols

import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm


### To show that each iteration on the same dataset is statistically the same

In [2]:
import os
os.getcwd()


'c:\\Users\\user\\Documents\\research\\BMCA-pipeline\\notebooks'

In [23]:
######### USER-INPUT SECTION
MODEL_FILE_PATH = '../data/interim/Antimony/Simplified_Teusink_yeast.ant'
iteration_folder = '../data/interim/generated_data/simplTeusink-noReg/iterations/'
######### END OF USER-INPUT SECTION

model = te.loada(MODEL_FILE_PATH)
gt=model.getScaledElasticityMatrix().flatten()

# Establish labels for metabolite and reaction names
m_labels = [m for m in model.getFloatingSpeciesIds()]
r_labels = [r for r in model.getReactionIds()]

ex_labels = np.array([['$\epsilon_{' + '{0},{1}'.format(rlabel, mlabel) + '}$'
                    for mlabel in m_labels] for rlabel in r_labels]).flatten()

ex_file_labels = np.array([['E_' + '{0},{1}'.format(rlabel, mlabel)
                    for mlabel in m_labels] for rlabel in r_labels]).flatten()

# load the predicted Ex dataset
# then reshape into 1000 rows, 176 columns
# plot all data in each column as a scatter plot
# make a plotting method
def run_ANOVA10(colNo, exs101):
    only_iter = []
    for df in exs101:
        only_iter.append(df.loc[:,ex_labels[colNo]])
        # sns.swarmplot(data=df, x=ex_labels[colNo], size=3, alpha=0.8)
    # run ANOVA
    return f_oneway(*only_iter)


def plot_heatmaps(pt_lvl):
    exs101 = []
    for folder in os.listdir(iteration_folder):
        ex101=pd.read_csv(iteration_folder + folder +f'/{pt_lvl}x_PredictedExs.csv', index_col=0).values
        exs101.append(pd.DataFrame(ex101.reshape(1000,176), columns=ex_labels))
    
    fdict= {}
    for i in range(len(ex_labels)):
        fdict[ex_labels[i]]= [run_ANOVA10(i, exs101)[0]]

    a = pd.DataFrame.from_dict(fdict).T
    a['id'] = [idx.strip('$\epsilon_{}') for idx in a.index]
    a.reset_index()
    a['reaction'] = [i.split(',')[0] for i in a['id']]
    a['metabolite'] = [i.split(',')[1] for i in a['id']]
    a.set_index('id', inplace=True)
    a.rename(columns={0:'f-stat'}, inplace=True)
    b = pd.pivot(a, values='f-stat', index='reaction', columns='metabolite')

    sns.heatmap(b, norm=LogNorm())
    plt.title(f'ANOVA f-scores for BMCA iterations of {pt_lvl}x perturbation of data')
    plt.savefig(f'anova_fscores_{pt_lvl}.png')
    plt.close()

In [24]:
pt = [0.1, 0.2, 0.3, 0.4, 0.5, 1.01, 1.5, 3, 5, 7, 10]
for lvl in pt:
    plot_heatmaps(lvl)


In [25]:
os.getcwd()

'c:\\Users\\user\\Documents\\research\\BMCA-pipeline\\notebooks'