In [1]:
import cobra, os
import pandas as pd
import numpy as np

from collections import OrderedDict
from copy import deepcopy

from sklearn.linear_model import LinearRegression

In [3]:
path = './output/'

stds_coeff = ['0.05', '0.1', '0.2', '0.3']

# Load dataframe
dfs = OrderedDict.fromkeys(stds_coeff)
for std in stds_coeff:
    fname = 'ftype-dfFlux_CoeffStd-' + std + '_added.csv'
    fpath = os.path.join(path, fname)
    df = pd.read_csv(fpath, sep='\t', index_col=0)
    df = df.fillna(0)
    dfs[std] = df

# Collect list of reactions
rxns = sum([df.columns.to_list() for df in dfs.values()], [])
rxns = sorted(list(set(rxns)))
temp = deepcopy(dfs)
dfs = OrderedDict.fromkeys(stds_coeff)
for std in stds_coeff:
    dfs[std] = pd.DataFrame(index=range(0,10001), columns=rxns)
    dfs[std].update(temp[std])
    dfs[std] = dfs[std].fillna(0)

# Calculate mean and std
cols = ['Stdev_slope', 'R2']
#cols = ['Mean', 'Stdev']
cols += ['Stdev_'+std for std in stds_coeff]
cols += ['Mean_'+std for std in stds_coeff]
cols += ['Stdevnorm_'+std for std in stds_coeff]

dfFluxStat = pd.DataFrame(index=rxns, columns=cols)
for std in stds_coeff:
    means = dfs[std].mean(axis=0)
    stds_calc = dfs[std].std(axis=0, ddof=0)
    dfFluxStat.loc[:, 'Mean_'+std] = means
    dfFluxStat.loc[:, 'Stdev_'+std] = stds_calc
    dfFluxStat.loc[:, 'Stdevnorm_'+std] = stds_calc.divide(means.abs())

# Linear regression
for rxn in dfFluxStat.index:
    x = [0] + [float(std) for std in stds_coeff]
    y = [0] + [dfFluxStat.loc[rxn, 'Stdevnorm_'+std] for std in stds_coeff]

    if all([pd.isnull(v) == False for v in y]):
        #y = [v if pd.isnull(v) == False else 0 for v in y]
        y = [v if abs(v) > 1e-8 else 0 for v in y]

        LRmodel = LinearRegression(fit_intercept=True)
        x = np.array(x).reshape(-1,1)
        y = np.array(y).reshape(-1,1)

        LRmodel.fit(x,y)
        slope = LRmodel.coef_[0][0]
        r2 = LRmodel.score(x,y)

        dfFluxStat.loc[rxn, 'Stdev_slope'] = slope
        dfFluxStat.loc[rxn, 'R2'] = r2

# Save
dfFluxStat.to_csv('./output/dfFluxStat.csv', sep='\t', index=True)