# Assess the enrichment of metabolic reactions
Compares pairs (control, knockdown) on a reaction level based on flux samples. Calculates p-values indicating significance of difference between reaction activities in a healthy and an infected model with the two-sample Kolmogorov–Smirnov test. Also, calculates fold changes of a reaction between a reaction in a healthy ($R_{control}$) and a reaction in an knockdown model ($R_{kd}$):

$$FC = \frac{\overline{R_{kd}} - \overline{R_{control}}}{\left|\overline{R_{kd}} + \overline{R_{control}}\right|}$$

In [7]:
import pandas as pd
import numpy as np

from scipy.stats import ks_2samp
#import statsmodels.stats.multitest as multi
from helpers import bh

### Basic settings

In [8]:
require_biomass = False
folder_samples = "samples\\biomass" if require_biomass else "samples\\no_biomass"
folder_enrich = "enrichment\\biomass" if require_biomass else "enrichment\\no_biomass"

### Read the data

In [9]:
df_control = pd.read_csv(f"{folder_samples}\\samples_control.csv")
df_kd = pd.read_csv(f"{folder_samples}\\samples_kd.csv")

In [10]:
reactions = sorted(list(set(df_control.columns) | set(df_kd.columns)))
len(reactions)

2271

### Reaction enrichment analysis

In [11]:
df = pd.DataFrame(columns=['reaction', 'FC', 'p', 'q', 'enrichment', 'changed'])
df['reaction']=reactions

n_samples = df_control.shape[0]
for reaction in reactions:
    if reaction in df_control.columns:
        control = df_control[reaction].values
    else:
        control = np.zeros(n_samples)
        
    if reaction in df_kd.columns:
        kd = df_kd[reaction].values
    else:
        kd = np.zeros(n_samples)
        
    
    mean_control = np.mean(control)
    mean_kd = np.mean(kd)
    
    if mean_control != 0 or mean_kd != 0:
        FC = mean_kd-mean_control/(abs(mean_kd + mean_control))
        p = ks_2samp(control,kd)[1]
    else:
        FC = 0
        p = 1     
        
    df.loc[df['reaction']==reaction, 'FC'] = FC
    df.loc[df['reaction']==reaction, 'p'] = p
    
df['q'] = bh(df['p'])
df.loc[(df['FC'] >= 0.82) & (df['q'] < 0.05),'enrichment'] = 1
df.loc[(df['FC'] <= -0.82) & (df['q'] < 0.05),'enrichment'] = -1
df.loc[~df['enrichment'].isna(),'changed'] = 1
df = df.fillna(0)
    
    

In [12]:
df.to_csv(f"{folder_enrich}\\reactions.csv", index=False)

In [13]:
df[df.changed == 0]

Unnamed: 0,reaction,FC,p,q,enrichment,changed
15,2H3MVc,0.000000,1.000000e+00,1.000000e+00,0,0
29,2HBO,0.000000,1.000000e+00,1.000000e+00,0,0
30,2HIVc,0.000000,1.000000e+00,1.000000e+00,0,0
108,ACITL,10.814818,8.689937e-02,8.885568e-02,0,0
147,AHEXASE2ly,0.731360,6.723652e-197,1.295116e-196,0,0
...,...,...,...,...,...,...
1987,TRIODTHYt2,0.000000,1.000000e+00,1.000000e+00,0,0
2021,UGLCNACtg,-0.200915,2.883581e-192,5.259930e-192,0,0
2052,XYLR,0.000000,1.000000e+00,1.000000e+00,0,0
2062,r0173,0.000000,1.000000e+00,1.000000e+00,0,0
