# Differential activity of metabolic reactions
We will compare the reaction pairs (control, knock down) on the basis of flux samples.

We can employ Kolmogorov-Smirnov test, which does not presume normality of data distributions. Statistical differences can be evaluated using p-values.

Additionally, we will observe the magnitudes of changes (fold-changes):

$$FC = \frac{\overline{R_{kd}} - \overline{R_{control}}}{\left|\overline{R_{kd}} + \overline{R_{control}}\right|}$$

In [1]:
import pandas as pd
import numpy as np

from scipy.stats import ks_2samp
#import statsmodels.stats.multitest as multi

import os.path

from helpers import bh

### Basic setting

In [2]:
require_biomass = True
folder_samples = "samples"
folder_enrich = "enrichment"

### Importing the data

In [3]:
df_control = pd.read_csv(os.path.join(f'{folder_samples}','samples_control.csv'))
#df_kd = pd.read_csv(os.path.join(f'{folder_samples}','samples_control2.csv'))
df_kd = pd.read_csv(os.path.join(f'{folder_samples}','samples_kd.csv'))

In [4]:
reactions = sorted(list(set(df_control.columns) | set(df_kd.columns)))
len(reactions) # število reakcij

2282

### Differential activity of metabolic reactions

In [5]:
df = pd.DataFrame(columns=['reaction', 'FC', 'p', 'q', 'enrichment', 'changed'])
df['reaction']=reactions

n_samples = df_control.shape[0]

# we will go through all reactions
for reaction in reactions:
    if reaction in df_control.columns:
        control = df_control[reaction].values
    else:
        # if a reaction is not in the control group, its activity equals 0
        control = np.zeros(n_samples)
        
    if reaction in df_kd.columns:
        kd = df_kd[reaction].values
    else:
        # if a reaction is not in the kd group, its activity equals 0
        kd = np.zeros(n_samples)
        
    # mean activities
    mean_control = np.mean(control)
    mean_kd = np.mean(kd)
    
    # FC - fold change and significance using the 2 sample Kolmogorov-Smirnov test
    if mean_control != 0 or mean_kd != 0:
        FC = (mean_kd-mean_control)/(abs(mean_kd + mean_control))
        p = ks_2samp(control,kd)[1]
    else:
        FC = 0
        p = 1     
        
    df.loc[df['reaction']==reaction, 'FC'] = FC
    df.loc[df['reaction']==reaction, 'p'] = p
    
    
# adjustment of p-values for multiple testing - FDR correction
df['q'] = bh(df['p'])

# signifikanca zahteva vsaj 10-kratno up-/down-regulacijo
df.loc[(df['FC'] >= 0.82) & (df['q'] < 0.05),'enrichment'] = 1
df.loc[(df['FC'] <= -0.82) & (df['q'] < 0.05),'enrichment'] = -1
df.loc[~df['enrichment'].isna(),'changed'] = 1
#df = df.fillna(0)
    
    

In [6]:
df=df.fillna(0)

In [None]:
df.to_csv(os.path.join(f"{folder_enrich}","reactions.csv"), index=False)

In [7]:
df

Unnamed: 0,reaction,FC,p,q,enrichment,changed
0,10FTHF5GLUtl,1.000000,0.000000e+00,0.000000e+00,1,1
1,10FTHF5GLUtm,1.000000,0.000000e+00,0.000000e+00,1,1
2,10FTHFtl,-1.000000,0.000000e+00,0.000000e+00,-1,1
3,10FTHFtm,0.217584,5.205457e-12,5.981296e-12,0,0
4,12DHCHOLabc,-0.554093,3.094078e-138,5.473400e-138,0,0
...,...,...,...,...,...,...
2277,r2519,0.372597,1.352908e-72,2.033818e-72,0,0
2278,r2521,1.000000,0.000000e+00,0.000000e+00,1,1
2279,r2535m,-1.000000,0.000000e+00,0.000000e+00,-1,1
2280,r2537,-0.641050,0.000000e+00,0.000000e+00,0,0
