In [1]:
import pandas as pd
import numpy as np

from scipy.stats import ks_2samp
#import statsmodels.stats.multitest as multi

import os.path

from helpers import bh

In [2]:
folder = '/Users/zivaskof/Documents/MATLAB/mag/fluxSampels/iMATSampels'

In [7]:
filenames = [filename for filename in os.listdir(folder) if not filename.startswith(".")]
filenames.sort()
#filenames = os.listdir(folder)
#filenames.sort()

for i in range(1, len(filenames), 2):  # Iterate with a step of 2 to get consecutive pairs
    file_control = filenames[i]
    file_kd = filenames[i + 1]

    print(file_control)
    print(file_kd)

    df_control = pd.read_csv(os.path.join(folder, file_control))
    df_kd = pd.read_csv(os.path.join(folder, file_kd))

    reactions = sorted(list(set(df_control.columns) | set(df_kd.columns)))
    len(reactions) # število reakcij

    df = pd.DataFrame(columns=['reaction', 'FC', 'p', 'q', 'enrichment', 'changed'])
    df['reaction']=reactions

    n_samples = df_control.shape[0]

    # sprehodimo se čez vse reakcije
    for reaction in reactions:
        if reaction in df_control.columns:
            control = df_control[reaction].values
        else:
            # če reakcije ni v kontrolni skupini, ji pripišemo same ničle
            control = np.zeros(n_samples)

        if reaction in df_kd.columns:
            kd = df_kd[reaction].values
        else:
            # če reakcije ni v kd skupini, ji pripišemo same ničle
            kd = np.zeros(n_samples)

        # iztračunamo sredino za kontrolo in kd
        mean_control = np.mean(control)
        mean_kd = np.mean(kd)

        # izračunamo FC - fold change in signifikanco z uporabo 2 sample Kolmogorov-Smirnov testa
        if mean_control != 0 or mean_kd != 0:
            FC = (mean_kd-mean_control)/(abs(mean_kd + mean_control))
            p = ks_2samp(control,kd)[1]
        else:
            FC = 0
            p = 1     

        df.loc[df['reaction']==reaction, 'FC'] = FC
        df.loc[df['reaction']==reaction, 'p'] = p


    # korigiramo p vrednosti za večkratno testiranje - FDR korekcija
    df['q'] = bh(df['p'])

    # signifikanca zahteva vsaj 10-kratno up-/down-regulacijo
    df.loc[(df['FC'] >= 0.82) & (df['q'] < 0.05),'enrichment'] = 1
    df.loc[(df['FC'] <= -0.82) & (df['q'] < 0.05),'enrichment'] = -1
    df.loc[~df['enrichment'].isna(),'changed'] = 1
    df = df.fillna(0)

    minus_ena = df[df.enrichment == -1]
    print(minus_ena)
    ena = df[df.enrichment == 1]
    print(ena)
    nic = df[df.enrichment == 0]
    print(nic)

    output_path = '/Users/zivaskof/Documents/MATLAB/mag/reaction_dif/iMAT_reaction_diff'
    extracted_string = file_control.split("_", maxsplit=3)[-1][:-5]
    new_file_name = 'iMAT_reaction_diff_' + extracted_string + ".csv"
    print(new_file_name)
    csv_file_path = os.path.join(output_path, new_file_name)
    df.to_csv(csv_file_path, index=False) 
    print("------------------------------")

iMAT_flux_sample_KM_001N.csv
iMAT_flux_sample_KM_001T.csv
      reaction        FC    p    q  enrichment  changed
1     MAR00004 -1.000000  0.0  0.0          -1        1
2     MAR00005 -1.000000  0.0  0.0          -1        1
3     MAR00006 -1.000000  0.0  0.0          -1        1
4     MAR00009 -1.000000  0.0  0.0          -1        1
5     MAR00010 -1.000000  0.0  0.0          -1        1
...        ...       ...  ...  ...         ...      ...
5115  MAR13034 -2.795375  0.0  0.0          -1        1
5117  MAR13037 -1.904412  0.0  0.0          -1        1
5137  MAR13057 -0.919726  0.0  0.0          -1        1
5145  MAR13067 -1.000000  0.0  0.0          -1        1
5146  MAR13075 -1.000000  0.0  0.0          -1        1

[1696 rows x 6 columns]
      reaction        FC    p    q  enrichment  changed
0     MAR00003  1.000000  0.0  0.0           1        1
8     MAR00018  1.000000  0.0  0.0           1        1
18    MAR00052  1.000000  0.0  0.0           1        1
20    MAR00061  1.000

      reaction        FC    p    q  enrichment  changed
3     MAR00020 -1.000000  0.0  0.0          -1        1
6     MAR00031 -1.000000  0.0  0.0          -1        1
7     MAR00032 -1.000000  0.0  0.0          -1        1
10    MAR00041 -1.000000  0.0  0.0          -1        1
12    MAR00047 -1.000000  0.0  0.0          -1        1
...        ...       ...  ...  ...         ...      ...
4467  MAR13054 -1.119417  0.0  0.0          -1        1
4473  MAR13060 -1.022948  0.0  0.0          -1        1
4478  MAR13067 -1.000000  0.0  0.0          -1        1
4479  MAR13075 -1.000000  0.0  0.0          -1        1
4480  MAR13077 -0.963239  0.0  0.0          -1        1

[1115 rows x 6 columns]
      reaction        FC    p    q  enrichment  changed
2     MAR00018  1.000000  0.0  0.0           1        1
4     MAR00025  1.000000  0.0  0.0           1        1
5     MAR00030  1.000000  0.0  0.0           1        1
11    MAR00045  1.000000  0.0  0.0           1        1
15    MAR00061  1.05188

      reaction   FC    p    q  enrichment  changed
2     MAR00018 -1.0  0.0  0.0          -1        1
4     MAR00025 -1.0  0.0  0.0          -1        1
5     MAR00030 -1.0  0.0  0.0          -1        1
8     MAR00035 -1.0  0.0  0.0          -1        1
10    MAR00040 -1.0  0.0  0.0          -1        1
...        ...  ...  ...  ...         ...      ...
4385  MAR13067 -1.0  0.0  0.0          -1        1
4386  MAR13075 -1.0  0.0  0.0          -1        1
4388  MAR13081 -1.0  0.0  0.0          -1        1
4389  MAR13084 -1.0  0.0  0.0          -1        1
4390  MAR13085 -1.0  0.0  0.0          -1        1

[1381 rows x 6 columns]
      reaction        FC              p              q  enrichment  changed
6     MAR00031  1.000000   0.000000e+00   0.000000e+00           1        1
7     MAR00032  1.000000   0.000000e+00   0.000000e+00           1        1
12    MAR00047  1.000000   0.000000e+00   0.000000e+00           1        1
13    MAR00048  1.000000   0.000000e+00   0.000000e+00     

      reaction        FC    p    q  enrichment  changed
3     MAR00020 -1.000000  0.0  0.0          -1        1
4     MAR00031 -1.000000  0.0  0.0          -1        1
5     MAR00032 -1.000000  0.0  0.0          -1        1
6     MAR00035 -1.000000  0.0  0.0          -1        1
18    MAR00089 -1.088813  0.0  0.0          -1        1
...        ...       ...  ...  ...         ...      ...
4461  MAR13051 -1.025584  0.0  0.0          -1        1
4473  MAR13063 -1.001967  0.0  0.0          -1        1
4478  MAR13081 -1.000000  0.0  0.0          -1        1
4479  MAR13084 -1.000000  0.0  0.0          -1        1
4480  MAR13085 -1.000000  0.0  0.0          -1        1

[1229 rows x 6 columns]
      reaction        FC    p    q  enrichment  changed
2     MAR00015  1.000000  0.0  0.0           1        1
7     MAR00036  1.000000  0.0  0.0           1        1
9     MAR00041  1.000000  0.0  0.0           1        1
12    MAR00054  1.000000  0.0  0.0           1        1
14    MAR00063  1.00000

      reaction        FC    p    q  enrichment  changed
5     MAR00031 -1.000000  0.0  0.0          -1        1
6     MAR00032 -1.000000  0.0  0.0          -1        1
12    MAR00054 -1.000000  0.0  0.0          -1        1
20    MAR00089 -1.024198  0.0  0.0          -1        1
23    MAR00096 -1.000000  0.0  0.0          -1        1
...        ...       ...  ...  ...         ...      ...
4646  MAR13042 -0.959250  0.0  0.0          -1        1
4650  MAR13046 -0.961910  0.0  0.0          -1        1
4652  MAR13048 -0.834013  0.0  0.0          -1        1
4661  MAR13057 -0.893579  0.0  0.0          -1        1
4662  MAR13058 -0.890331  0.0  0.0          -1        1

[1278 rows x 6 columns]
      reaction        FC    p    q  enrichment  changed
3     MAR00025  1.000000  0.0  0.0           1        1
4     MAR00030  1.000000  0.0  0.0           1        1
7     MAR00035  1.000000  0.0  0.0           1        1
8     MAR00036  1.000000  0.0  0.0           1        1
10    MAR00045  1.00000

      reaction        FC    p    q  enrichment  changed
4     MAR00018 -1.000000  0.0  0.0          -1        1
6     MAR00031 -1.000000  0.0  0.0          -1        1
7     MAR00032 -1.000000  0.0  0.0          -1        1
8     MAR00036 -1.000000  0.0  0.0          -1        1
10    MAR00045 -1.000000  0.0  0.0          -1        1
...        ...       ...  ...  ...         ...      ...
4365  MAR13055 -1.028956  0.0  0.0          -1        1
4370  MAR13060 -1.034360  0.0  0.0          -1        1
4378  MAR13078 -1.000000  0.0  0.0          -1        1
4379  MAR13084 -1.000000  0.0  0.0          -1        1
4380  MAR13085 -1.000000  0.0  0.0          -1        1

[1071 rows x 6 columns]
      reaction        FC              p              q  enrichment  changed
15    MAR00061  1.534951   0.000000e+00   0.000000e+00           1        1
23    MAR00096  1.000000   0.000000e+00   0.000000e+00           1        1
24    MAR00120  1.000000   0.000000e+00   0.000000e+00           1        1

      reaction        FC    p    q  enrichment  changed
10    MAR00040 -1.000000  0.0  0.0          -1        1
12    MAR00045 -1.000000  0.0  0.0          -1        1
13    MAR00047 -1.000000  0.0  0.0          -1        1
14    MAR00048 -1.000000  0.0  0.0          -1        1
22    MAR00090 -5.451032  0.0  0.0          -1        1
...        ...       ...  ...  ...         ...      ...
4429  MAR13055 -1.000008  0.0  0.0          -1        1
4434  MAR13060 -1.000003  0.0  0.0          -1        1
4440  MAR13067 -1.000000  0.0  0.0          -1        1
4441  MAR13075 -1.000000  0.0  0.0          -1        1
4443  MAR13078 -1.000000  0.0  0.0          -1        1

[939 rows x 6 columns]
      reaction        FC    p    q  enrichment  changed
16    MAR00054  0.914724  0.0  0.0           1        1
21    MAR00089  1.151303  0.0  0.0           1        1
23    MAR00097  1.000000  0.0  0.0           1        1
37    MAR00155  1.670942  0.0  0.0           1        1
38    MAR00156  1.522095

      reaction        FC              p              q  enrichment  changed
5     MAR00045 -1.000000   0.000000e+00   0.000000e+00          -1        1
7     MAR00054 -1.000000   0.000000e+00   0.000000e+00          -1        1
11    MAR00078 -4.786653  1.860063e-284  3.113752e-284          -1        1
25    MAR00165 -0.898910   0.000000e+00   0.000000e+00          -1        1
29    MAR00170 -0.898910   0.000000e+00   0.000000e+00          -1        1
...        ...       ...            ...            ...         ...      ...
4395  MAR13053 -0.972261   0.000000e+00   0.000000e+00          -1        1
4396  MAR13054 -0.920046   0.000000e+00   0.000000e+00          -1        1
4399  MAR13057 -0.943817   0.000000e+00   0.000000e+00          -1        1
4401  MAR13059 -0.827222   0.000000e+00   0.000000e+00          -1        1
4407  MAR13067 -1.000000   0.000000e+00   0.000000e+00          -1        1

[1141 rows x 6 columns]
      reaction         FC    p    q  enrichment  changed
8     

      reaction        FC    p    q  enrichment  changed
2     MAR00012 -1.000000  0.0  0.0          -1        1
3     MAR00015 -1.000000  0.0  0.0          -1        1
10    MAR00041 -1.000000  0.0  0.0          -1        1
22    MAR00103 -1.000000  0.0  0.0          -1        1
23    MAR00120 -1.000000  0.0  0.0          -1        1
...        ...       ...  ...  ...         ...      ...
4358  MAR13063 -1.027599  0.0  0.0          -1        1
4359  MAR13065 -1.000000  0.0  0.0          -1        1
4361  MAR13067 -1.000000  0.0  0.0          -1        1
4362  MAR13075 -1.000000  0.0  0.0          -1        1
4363  MAR13077 -3.109062  0.0  0.0          -1        1

[1284 rows x 6 columns]
      reaction          FC              p              q  enrichment  changed
5     MAR00025    1.000000   0.000000e+00   0.000000e+00           1        1
6     MAR00030    1.000000   0.000000e+00   0.000000e+00           1        1
12    MAR00061    1.000000  1.586001e-134  2.037811e-134           1 

      reaction        FC    p    q  enrichment  changed
0     MAR00003 -0.830883  0.0  0.0          -1        1
3     MAR00020 -0.918847  0.0  0.0          -1        1
6     MAR00036 -1.000000  0.0  0.0          -1        1
8     MAR00040 -1.000000  0.0  0.0          -1        1
15    MAR00061 -1.018983  0.0  0.0          -1        1
...        ...       ...  ...  ...         ...      ...
3931  MAR13061 -1.212814  0.0  0.0          -1        1
3932  MAR13062 -1.000000  0.0  0.0          -1        1
3933  MAR13063 -1.018493  0.0  0.0          -1        1
3934  MAR13066 -2.615630  0.0  0.0          -1        1
3939  MAR13081 -1.000000  0.0  0.0          -1        1

[1019 rows x 6 columns]
      reaction        FC    p    q  enrichment  changed
2     MAR00018  1.000000  0.0  0.0           1        1
4     MAR00031  1.000000  0.0  0.0           1        1
5     MAR00032  1.000000  0.0  0.0           1        1
11    MAR00047  1.000000  0.0  0.0           1        1
12    MAR00048  1.00000

      reaction         FC    p    q  enrichment  changed
4     MAR00031  -1.000000  0.0  0.0          -1        1
5     MAR00032  -1.000000  0.0  0.0          -1        1
8     MAR00045  -1.000000  0.0  0.0          -1        1
10    MAR00054  -1.000000  0.0  0.0          -1        1
12    MAR00064  -1.000000  0.0  0.0          -1        1
...        ...        ...  ...  ...         ...      ...
4516  MAR13053  -1.459002  0.0  0.0          -1        1
4519  MAR13056 -29.787036  0.0  0.0          -1        1
4524  MAR13061  -1.428602  0.0  0.0          -1        1
4525  MAR13062  -0.960141  0.0  0.0          -1        1
4528  MAR13067  -1.000000  0.0  0.0          -1        1

[1423 rows x 6 columns]
      reaction        FC    p    q  enrichment  changed
2     MAR00018  1.000000  0.0  0.0           1        1
6     MAR00036  1.000000  0.0  0.0           1        1
11    MAR00061  1.150715  0.0  0.0           1        1
14    MAR00076  1.000000  0.0  0.0           1        1
18    MAR00

      reaction         FC    p    q  enrichment  changed
2     MAR00016  -1.000000  0.0  0.0          -1        1
9     MAR00037  -1.000000  0.0  0.0          -1        1
10    MAR00040  -1.000000  0.0  0.0          -1        1
11    MAR00045  -1.000000  0.0  0.0          -1        1
12    MAR00052  -1.000000  0.0  0.0          -1        1
...        ...        ...  ...  ...         ...      ...
3693  MAR13046  -0.937181  0.0  0.0          -1        1
3695  MAR13048  -0.966397  0.0  0.0          -1        1
3700  MAR13053 -12.523403  0.0  0.0          -1        1
3702  MAR13055  -1.057234  0.0  0.0          -1        1
3704  MAR13057  -0.863538  0.0  0.0          -1        1

[916 rows x 6 columns]
      reaction        FC    p    q  enrichment  changed
6     MAR00031  1.000000  0.0  0.0           1        1
7     MAR00032  1.000000  0.0  0.0           1        1
14    MAR00061  3.579392  0.0  0.0           1        1
21    MAR00103  1.000000  0.0  0.0           1        1
30    MAR001

      reaction        FC    p    q  enrichment  changed
4     MAR00031 -1.000000  0.0  0.0          -1        1
5     MAR00032 -1.000000  0.0  0.0          -1        1
9     MAR00045 -1.000000  0.0  0.0          -1        1
10    MAR00047 -0.968330  0.0  0.0          -1        1
11    MAR00048 -0.968330  0.0  0.0          -1        1
...        ...       ...  ...  ...         ...      ...
4589  MAR13046 -0.989568  0.0  0.0          -1        1
4593  MAR13050 -0.848305  0.0  0.0          -1        1
4595  MAR13052 -0.962240  0.0  0.0          -1        1
4597  MAR13054 -0.926584  0.0  0.0          -1        1
4612  MAR13078 -1.000000  0.0  0.0          -1        1

[1093 rows x 6 columns]
      reaction        FC              p              q  enrichment  changed
2     MAR00015  1.000000   0.000000e+00   0.000000e+00           1        1
22    MAR00097  1.000000   0.000000e+00   0.000000e+00           1        1
23    MAR00103  1.000000   0.000000e+00   0.000000e+00           1        1

      reaction        FC              p              q  enrichment  changed
6     MAR00025 -1.000000   0.000000e+00   0.000000e+00          -1        1
7     MAR00030 -1.000000   0.000000e+00   0.000000e+00          -1        1
11    MAR00045 -1.000000   0.000000e+00   0.000000e+00          -1        1
17    MAR00077 -1.238252   0.000000e+00   0.000000e+00          -1        1
37    MAR00155 -5.571866  8.728509e-307  1.714263e-306          -1        1
...        ...       ...            ...            ...         ...      ...
4130  MAR13056 -1.078038   0.000000e+00   0.000000e+00          -1        1
4134  MAR13060 -1.015002   0.000000e+00   0.000000e+00          -1        1
4137  MAR13063 -1.003668   0.000000e+00   0.000000e+00          -1        1
4139  MAR13067 -1.000000   0.000000e+00   0.000000e+00          -1        1
4140  MAR13075 -1.000000   0.000000e+00   0.000000e+00          -1        1

[804 rows x 6 columns]
      reaction        FC    p    q  enrichment  changed
2     MA

      reaction        FC    p    q  enrichment  changed
2     MAR00012 -1.000000  0.0  0.0          -1        1
3     MAR00015 -1.000000  0.0  0.0          -1        1
5     MAR00025 -1.000000  0.0  0.0          -1        1
6     MAR00030 -1.000000  0.0  0.0          -1        1
9     MAR00036 -1.000000  0.0  0.0          -1        1
...        ...       ...  ...  ...         ...      ...
4048  MAR13052 -0.992191  0.0  0.0          -1        1
4051  MAR13055 -1.023330  0.0  0.0          -1        1
4053  MAR13057 -0.976951  0.0  0.0          -1        1
4054  MAR13058 -4.336895  0.0  0.0          -1        1
4056  MAR13060 -1.007859  0.0  0.0          -1        1

[1103 rows x 6 columns]
      reaction        FC              p              q  enrichment  changed
16    MAR00054  1.849114  2.202534e-206  3.226471e-206           1        1
17    MAR00061  1.217166   0.000000e+00   0.000000e+00           1        1
24    MAR00094  1.000000   0.000000e+00   0.000000e+00           1        1

      reaction        FC    p    q  enrichment  changed
2     MAR00012 -1.000000  0.0  0.0          -1        1
4     MAR00025 -1.000000  0.0  0.0          -1        1
5     MAR00030 -1.000000  0.0  0.0          -1        1
6     MAR00036 -1.000000  0.0  0.0          -1        1
8     MAR00040 -1.000000  0.0  0.0          -1        1
...        ...       ...  ...  ...         ...      ...
4833  MAR13047 -0.961164  0.0  0.0          -1        1
4840  MAR13054 -0.971301  0.0  0.0          -1        1
4843  MAR13057 -0.947589  0.0  0.0          -1        1
4845  MAR13059 -0.887823  0.0  0.0          -1        1
4850  MAR13066 -1.000000  0.0  0.0          -1        1

[1556 rows x 6 columns]
      reaction        FC    p    q  enrichment  changed
10    MAR00045  1.000000  0.0  0.0           1        1
15    MAR00061  1.000000  0.0  0.0           1        1
20    MAR00081  1.000000  0.0  0.0           1        1
21    MAR00082  1.000000  0.0  0.0           1        1
28    MAR00127  1.00000

      reaction        FC    p    q  enrichment  changed
2     MAR00015 -1.000000  0.0  0.0          -1        1
3     MAR00016 -1.000000  0.0  0.0          -1        1
5     MAR00025 -1.000000  0.0  0.0          -1        1
6     MAR00030 -1.000000  0.0  0.0          -1        1
7     MAR00031 -1.000000  0.0  0.0          -1        1
...        ...       ...  ...  ...         ...      ...
4424  MAR13044 -1.012325  0.0  0.0          -1        1
4427  MAR13047 -1.095702  0.0  0.0          -1        1
4433  MAR13053 -1.012116  0.0  0.0          -1        1
4438  MAR13058 -1.310119  0.0  0.0          -1        1
4440  MAR13060 -1.010915  0.0  0.0          -1        1

[1255 rows x 6 columns]
      reaction         FC    p    q  enrichment  changed
13    MAR00047   1.000000  0.0  0.0           1        1
14    MAR00048   1.000000  0.0  0.0           1        1
16    MAR00054   0.851801  0.0  0.0           1        1
17    MAR00061   1.006303  0.0  0.0           1        1
22    MAR00079   1

      reaction         FC              p              q  enrichment  changed
6     MAR00036  -1.000000   0.000000e+00   0.000000e+00          -1        1
8     MAR00041  -1.000000   0.000000e+00   0.000000e+00          -1        1
9     MAR00045  -1.000000   0.000000e+00   0.000000e+00          -1        1
15    MAR00077 -21.190172   0.000000e+00   0.000000e+00          -1        1
16    MAR00078  -1.101705   0.000000e+00   0.000000e+00          -1        1
...        ...        ...            ...            ...         ...      ...
4446  MAR13020  -1.000000   0.000000e+00   0.000000e+00          -1        1
4452  MAR13034 -11.398864  2.021706e-151  2.738947e-151          -1        1
4460  MAR13044  -1.418553   0.000000e+00   0.000000e+00          -1        1
4464  MAR13048  -0.972736   0.000000e+00   0.000000e+00          -1        1
4481  MAR13067  -1.000000   0.000000e+00   0.000000e+00          -1        1

[1316 rows x 6 columns]
      reaction        FC    p    q  enrichment  cha