# GEM Reconstruction with LC M001-related Transcriptomics — Flux Comparison

***by Kengo Watanabe***  

Priyanka Baloni reconstructed mouse genome-scale metabolic models (GEMs; Khodaee, S. et al. Sci. Rep. 2020) with the preprocessed Longevity Consortium (LC) M001-related transcriptomics dataset (Tyshkovskiy, A. et al. Cell Metab. 2019; adjusted with sex and age), and calculated flux values using flux variability analysis (FVA).  
–> This Jupyter Notebook (with Python 3 kernel) compared the calculated flux values between the control and intervention groups.  

Input files:  
- Flux data (maximum): merged_file_MaxFlux_reactions_Intervention-ori.csv  
- Flux data (minimum): merged_file_MinFlux_reactions_Intervention-ori.csv  
- Model–sample metadata: Metadata_LC.xlsx  
- Sample–mouse metadata: 230215_LC-M001-related-TrOmics-DIRAC-ver3_Preprocessing_onWenc_sample-metadata.tsv  
- Reaction metadata (iMM1865): iMM1685_data.xlsx  
- Gene mapping metadata: iMM1685_data-Max.xlsx  
- Reaction list for the diagram: Reactions-in-diagram.csv  

Output figures and tables:  
- Figure 6a, d  
- Supplementary Data 8  

Original notebook (memo for my future tracing):  
- dalek:\[JupyterLab HOME\]/230315_LC-M001-related-TrOmics-GEM-ver3/230502_LC-M001-related-TrOmics-GEM-ver3-15_FluxAnalysis.ipynb  

In [1]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
#For Arial font
#!conda install -c conda-forge -y mscorefonts
##-> The below was also needed in matplotlib 3.4.2
#import shutil
#import matplotlib
#shutil.rmtree(matplotlib.get_cachedir())
import warnings
warnings.filterwarnings('ignore')
from IPython.display import display
import time
#For exporting .pdf file with editable text
import matplotlib
matplotlib.rcParams['pdf.fonttype']=42
matplotlib.rcParams['ps.fonttype']=42

from sklearn.preprocessing import RobustScaler
from sklearn.decomposition import PCA
from decimal import Decimal, ROUND_HALF_UP
from statsmodels.stats import multitest as multi
from sklearn.preprocessing import MaxAbsScaler
from sklearn.preprocessing import StandardScaler
import re
import matplotlib.patches as mpatches
#!pip install venn
from venn import venn

!conda list

# packages in environment at /opt/conda/envs/arivale-py3:
#
# Name                    Version                   Build  Channel
_libgcc_mutex             0.1                 conda_forge    conda-forge
_openmp_mutex             4.5                       1_gnu    conda-forge
analytics                 0.1                      pypi_0    pypi
argon2-cffi               21.1.0           py39h3811e60_0    conda-forge
arivale-data-interface    0.1.0                    pypi_0    pypi
async_generator           1.10                       py_0    conda-forge
atk-1.0                   2.36.0               h3371d22_4    conda-forge
attrs                     21.2.0             pyhd8ed1ab_0    conda-forge
backcall                  0.2.0              pyh9f0ad1d_0    conda-forge
backports                 1.0                        py_2    conda-forge
backports.functools_lru_cache 1.6.4              pyhd8ed1ab_0    conda-forge
biopython                 1.79             py39h3811e60_0    conda-forge
bleach 

## 1. Clean the original tables

### 1-1. Flux value data

In [None]:
tempD = {}
for flux in ['MaxFlux', 'MinFlux']:
    #Import raw flux data
    fileDir = './ImportData/230426_FVA-ver3-2/'
    fileName = 'merged_file_'+flux+'_reactions_Intervention-ori.csv'
    tempDF = pd.read_csv(fileDir+fileName)
    tempDF = tempDF.rename(columns={'Reaction':'ReactionID'})
    tempDF = tempDF.set_index('ReactionID')
    
    #Clean the model label
    tempDF1 = tempDF.columns.to_series().str.split(pat='_', expand=True)
    tempDF1 = tempDF1.rename(columns={0:'Prefix', 1:'Model_i'})
    tempDF.columns = 'Model_'+tempDF1['Model_i'].str.zfill(2)
    tempDF.columns.name = None
    
    #Sort
    tempL = ['Model_'+str(i+1).zfill(2) for i in range(len(tempDF.columns))]
    tempDF = tempDF[tempL]
    
    print(flux)
    display(tempDF)
    display(tempDF.reset_index().describe(include='all'))
    
    tempD[flux] = tempDF

#Calculate the average
tempDF1 = tempD['MaxFlux']
tempDF2 = tempD['MinFlux']
##Check just in case
print('nMatching rows:', (tempDF1.index==tempDF2.index).sum())
print('nMatching columns:', (tempDF1.columns==tempDF2.columns).sum())
tempDF = (tempDF1 + tempDF2) / 2

print('Average')
display(tempDF)
display(tempDF.reset_index().describe(include='all'))

#Save
fileDir = './ExportData/'
ipynbName = '230502_LC-M001-related-TrOmics-GEM-ver3-15_FluxAnalysis_'
fileName = 'average-flux-data.tsv'
tempDF.to_csv(fileDir+ipynbName+fileName, index=True, sep='\t')

fluxDF = tempDF

### 1-2. Sample metadata

In [None]:
#Import sample-model metadata
fileDir = './ImportData/230419_FVA-ver2/'
fileName = 'Metadata_LC.xlsx'
sheetName = 'Sheet1'
tempDF = pd.read_excel(fileDir+fileName, sheet_name=sheetName, engine='openpyxl')

#Clean column names and model labels
tempDF = tempDF.rename(columns={'Model':'ModelID', 'Data':'SampleID'})
tempDF1 = tempDF['ModelID'].str.split(pat='_', expand=True)
tempDF1 = tempDF1.rename(columns={0:'Prefix', 1:'Model_i'})
tempDF['ModelID'] = 'Model_'+tempDF1['Model_i'].str.zfill(2)
tempS = tempDF.set_index('ModelID')

#Import the cleaned sample-model metadata
fileDir = '../230224_LC-M001-PrOmics-vs-TrOmics-DIRAC-ver3/ImportData/'
ipynbName = '230215_LC-M001-related-TrOmics-DIRAC-ver3_Preprocessing_onWenc_'
fileName = 'sample-metadata.tsv'
tempDF = pd.read_csv(fileDir+ipynbName+fileName, sep='\t').set_index('SampleID')

#Merge
tempDF = pd.merge(tempS, tempDF, left_on='SampleID', right_index=True, how='inner')

display(tempDF)
display(tempDF.reset_index().describe(include='all'))
display(tempDF['Group'].value_counts().sort_index(ascending=True))
tempL = tempDF.sort_values(by='Category', ascending=True)['Category'].unique()
print(' ->', len(tempL), 'categories\n')
for category in tempL:
    tempDF1 = tempDF.loc[tempDF['Category']==category]
    print(category+' category total:', len(tempDF1))
    for condition in ['Intervention']:
        display(tempDF1[condition].value_counts())
    print('')

#Save
fileDir = './ExportData/'
ipynbName = '230502_LC-M001-related-TrOmics-GEM-ver3-15_FluxAnalysis_'
fileName = 'sample-metadata.tsv'
tempDF.to_csv(fileDir+ipynbName+fileName, index=True, sep='\t')

sampleDF = tempDF
categoryL = tempL

### 1-3. Reaction metadata

#### 1-3-1. iMM1865

In [None]:
#Import reaction metadata of the used model
fileDir = '../220606_LC-M001-related-transcriptomics-GEM/ImportData/'
fileName = 'iMM1685_data.xlsx'
sheetName = 'Sheet1'
tempDF = pd.read_excel(fileDir+fileName, sheet_name=sheetName, engine='openpyxl')
tempDF = tempDF.rename(columns={'Reaction':'ReactionID', 'Unnamed: 3':'Reaction'})
tempDF = tempDF.set_index('ReactionID')

display(tempDF)
print(' -> Unique rxn ID:', len(tempDF.index.unique()))

rxnDF = tempDF

In [None]:
#Check
tempS1 = rxnDF['Subsystem'].value_counts()
tempS1.name = 'Overall'
tempS2 = rxnDF['Subsystem'].loc[fluxDF.index].value_counts()
tempS2.name = 'FluxCalculated'
tempDF = pd.merge(tempS1, tempS2, left_index=True, right_index=True, how='left')
display(tempDF.describe())
display(tempDF.loc[tempS1>100])

#### 1-3-2. Retrieve reaction name with BiGG Models API

In [None]:
#Download the latest file
!wget -O ./ImportData/bigg_models_reactions.txt http://bigg.ucsd.edu/static/namespace/bigg_models_reactions.txt

In [None]:
#Import reaction metadata
fileDir = './ImportData/'
fileName = 'bigg_models_reactions.txt'
tempDF = pd.read_csv(fileDir+fileName, sep='\t')
tempDF = tempDF.rename(columns={'bigg_id':'ReactionID', 'name':'ReactionName'})
tempDF = tempDF.set_index('ReactionID')
print('Original nrow:', len(tempDF))
print(' -> Unique rxn ID:', len(tempDF.index.unique()))

#Add the reaction name
tempDF = pd.merge(rxnDF, tempDF['ReactionName'], left_index=True, right_index=True, how='left')
tempDF = tempDF[['Reaction', 'ReactionName', 'GPR', 'Subsystem']]

display(tempDF)
tempDF1 = tempDF.loc[tempDF['ReactionName'].isnull()]
print(' -> No rxn name:', len(tempDF1))
display(tempDF1)

rxnDF = tempDF

#### 1-3-3. Retrieve molecule name with BiGG Models API

In [None]:
#Take all molecules in the reactions
tempS = rxnDF['Reaction'].str.replace('^\'', '')
tempS = tempS.str.replace('\'$', '')
tempDF = tempS.str.split(pat=' ', expand=True)
print('Expanded DF:', tempDF.shape)
t_start = time.time()
tempS = set()
for col_n in tempDF.columns.tolist():
    tempS1 = tempDF[col_n].dropna()#None is considered an NA value in pandas
    tempS1 = tempS1.loc[tempS1!='']#Delete empty
    tempS1 = tempS1.loc[tempS1.str.contains('\\[.*\\]')]
    #Character in square brackets indicates intracellular localization
    tempL = tempS1.str.replace('\\[.*\\]', '').tolist()
    tempS = tempS | set(tempL)
t_elapsed = time.time() - t_start
print(' -> Elapsed time:', round(t_elapsed//60), 'min', round(t_elapsed%60, 1), 'sec')
print(' -> Unique molecule:', len(tempS))
print(list(tempS)[:25])

molS = tempS

In [None]:
#Download the latest file
!wget -O ./ImportData/bigg_models_metabolites.txt http://bigg.ucsd.edu/static/namespace/bigg_models_metabolites.txt

In [None]:
#Import metabolite metadata
fileDir = './ImportData/'
fileName = 'bigg_models_metabolites.txt'
tempDF = pd.read_csv(fileDir+fileName, sep='\t')
tempDF = tempDF.rename(columns={'universal_bigg_id':'MoleculeID', 'name':'MoleculeName'})
tempDF = tempDF.set_index('MoleculeID')
print('Original nrow:', len(tempDF))
print(' -> Unique molecule ID:', len(tempDF.index.unique()))

#Retrieve the molecules within the used model
print('Molecules within the used model', len(molS))
tempS = tempDF['MoleculeName']
tempS = tempS.drop_duplicates()
tempS = tempS.loc[tempS.index.isin(molS)]

tempS = tempS.sort_index(ascending=True)
display(tempS)

molS = tempS

#### 1-3-4. Gene mapping table

In [None]:
#Import reaction metadata of the used model
fileDir = '../220606_LC-M001-related-transcriptomics-GEM/ImportData/'
fileName = 'iMM1685_data-Max.xlsx'
sheetName = 'Sheet1'
tempDF = pd.read_excel(fileDir+fileName, sheet_name=sheetName, engine='openpyxl', header=2)
tempDF = tempDF.rename(columns={'Entrez ID':'EntrezID', 'Gene symbol':'GeneSymbol',
                                'Ensembl gene ID':'EnsemblID'})
tempDF = tempDF[['EntrezID', 'EnsemblID', 'GeneSymbol']]
tempDF = tempDF.dropna()
tempDF['EntrezID'] = tempDF['EntrezID'].astype('int64')
tempDF = tempDF.set_index('EntrezID')
tempDF = tempDF.sort_index(ascending=True)

display(tempDF)
print(' -> Unique Entrez ID:', len(tempDF.index.unique()))
print(' -> Unique Ensembl ID:', len(tempDF['EnsemblID'].unique()))
print(' -> Unique gene name:', len(tempDF['GeneSymbol'].unique()))

geneDF = tempDF

#### 1-3-5. Save as a single .xlsx file

In [None]:
#Prepare a new .xlsx file
fileDir = './ExportData/'
ipynbName = '230502_LC-M001-related-TrOmics-GEM-ver3-15_FluxAnalysis_'
fileName = 'reaction-metadata.xlsx'
sheetName = 'Reaction'
rxnDF.to_excel(fileDir+ipynbName+fileName, sheet_name=sheetName, header=True, index=True)

#Appended the others to the above .xlsx file
with pd.ExcelWriter(fileDir+ipynbName+fileName, mode='a', engine='openpyxl') as writer:
    sheetName = 'Molecule'
    molS.to_excel(writer, sheet_name=sheetName, header=True, index=True)
    sheetName = 'Gene'
    geneDF.to_excel(writer, sheet_name=sheetName, header=True, index=True)

### 1-4. Select the target samples

> This study assesses only the Ctrl, Aca, Rapa, and CR groups, consistent with DIRAC analysis.  

In [None]:
#Select the target samples
tempL = ['Ctrl1', 'Aca', 'Rapa', 'CRdiet']
tempDF1 = sampleDF.loc[sampleDF['Intervention'].isin(tempL)]
tempDF2 = fluxDF.loc[:, fluxDF.columns.isin(tempDF1.index.tolist())]

#Re-define the control category
tempL = tempDF1.sort_values(by='Category', ascending=True)['Category'].unique()

display(tempDF1.describe(include='all'))
display(tempDF1['Group'].value_counts().sort_index(ascending=True))
print(' ->', len(tempL), 'categories\n')
for category in tempL:
    tempDF = tempDF1.loc[tempDF1['Category']==category]
    print(category+' category total:', len(tempDF))
    for condition in ['Intervention']:
        display(tempDF[condition].value_counts())
    print('')

display(tempDF2.describe(include='all'))

#Update
sampleDF = tempDF1
categoryL = tempL
fluxDF = tempDF2

### 1-5. Select biochemical reactions

> The following reacitons are removed in this analysis:  
> - Reactions without subsytem annotation, because they are unfunctional (e.g., biomass reaction). Of note, Priyanka previously confirmed they were not errors.  
> - Reactions in the "Exchange/demand reaction" subsystem, because they are pseudoreactions of GEM.  
> - Reactions in the "Miscellaneous" subsystem, because they are difficult to be interpreted as a functional subsystem.  
> - Reactions in the "Transport, xxx" subsystem, because their functional meaning is dependent on each transporting metabolite and covered by biochemical reactions of the other subsystems.  

In [None]:
tempDF1 = rxnDF
tempDF2 = fluxDF

#Reactions without subsystem annotation
tempDF = tempDF1.loc[tempDF1['Subsystem'].isnull()]
print('Reactions without subsystem annotation')
display(tempDF)
tempL1 = tempDF.index.tolist()

#Exchange/demand reactions
tempDF = tempDF1.loc[tempDF1['Subsystem']=='Exchange/demand reaction']
print('Excange/demand reactions')
display(tempDF)
tempL2 = tempDF.index.tolist()

#Miscellaneous reactions
tempDF = tempDF1.loc[tempDF1['Subsystem']=='Miscellaneous']
print('Miscellaneous reactions')
display(tempDF)
tempL3 = tempDF.index.tolist()

#Transport reactions
tempDF = tempDF1.loc[~tempDF1['Subsystem'].isnull()]#To avoid NA error in the next line
tempDF = tempDF.loc[tempDF['Subsystem'].str.contains('Transport, ')]
print('Transport reactions')
display(tempDF)
tempL4 = tempDF.index.tolist()

#Reactions to be removed
tempL = [rxn for sublist in [tempL1, tempL2, tempL3, tempL4] for rxn in sublist]
print('nReactions to be removed:', len(tempL))

#Remove the reactions from flux data
print('Flux DF before:', tempDF2.shape)
tempDF = tempDF2.loc[~tempDF2.index.isin(tempL)]

display(tempDF)
display(tempDF.reset_index().describe(include='all'))

#Save
fileDir = './ExportData/'
ipynbName = '230502_LC-M001-related-TrOmics-GEM-ver3-15_FluxAnalysis_'
fileName = 'average-flux-data_selected.tsv'
tempDF.to_csv(fileDir+ipynbName+fileName, index=True, sep='\t')

#Check
tempS1 = tempDF1['Subsystem'].value_counts()
tempS1.name = 'Overall'
tempS2 = tempDF1['Subsystem'].loc[tempDF2.index].value_counts()
tempS2.name = 'FluxCalculated'
tempS3 = tempDF1['Subsystem'].loc[tempDF.index].value_counts()
tempS3.name = 'Selected'
tempDF3 = pd.concat([tempS1, tempS2, tempS3], axis=1)
display(tempDF3.describe())
display(tempDF3.loc[tempS1>30])
display(tempDF3.loc[tempS1<=30])

#Update
fluxDF = tempDF

## 2. Check data structure of the flux values

### 2-1. Distribution

In [None]:
#Skewness (and other summary metrics) per sample
tempDF = fluxDF.describe()
tempDF.loc['Skewness'] = fluxDF.agg(stats.skew, axis=0)
display(tempDF.T.describe())

In [None]:
#Distribution
tempL = sampleDF.sort_values(by='Group', ascending=True)['Group'].unique()
global_max = fluxDF.max(axis=1).max()
global_min = fluxDF.min(axis=1).min()
for group in tempL:
    tempDF = sampleDF.loc[sampleDF['Group']==group]
    tempDF = fluxDF.loc[:, tempDF.index.tolist()]
    sns.set(style='ticks', font='Arial', context='notebook')
    plt.figure(figsize=(4, 3))
    for sample in tempDF.columns.tolist():
        sns.distplot(tempDF[sample], label=sample)
    sns.despine()
    plt.xlim(global_min, global_max)
    plt.ylabel('Density')
    plt.xlabel('Maximum flux value')
    plt.title('Group: '+group)
    plt.legend(bbox_to_anchor=(1, 0.5), loc='center left', borderaxespad=1, ncol=1)
    plt.show()

> According to Priyanka, the 0 and ±1,000 values are truly meaningful because they were resolved at least. Also, the negative flux indicates that inverse direction is preferred at the steady state, which is possible only for the reaction that is defined as reversible one like A + B <=> C + D. In GEM, the “irreversible" reaction is differently defined such as X + Y -> Z.  

### 2-2. PCA

#### 2-2-1. Standardization

> Clearly, flux value distribution across analytes is not normal distribution per model, but variables are set as analytes in this PCA. Because the value range is roughly consistent b/w samples, only the simple standardization is applied to analyte value before PCA.  
> –> Instead of StandardScaler (z-score), RobustScaler (removed median and scaled by IQR) is used. (Of note, RobustScaler() returns just the centered (i.e., median-subtracted) values if IQR = 0.)  

In [None]:
#Robust transformation
scaler = RobustScaler(with_centering=True, with_scaling=True, quantile_range=(25, 75),
                      copy=True, unit_variance=False)
tempA = scaler.fit_transform(fluxDF.T)#axis: column
fluxDF_scaled = pd.DataFrame(data=tempA, index=fluxDF.columns, columns=fluxDF.index)
fluxDF_scaled.index.rename('SampleID', inplace=True)

display(fluxDF_scaled)
display(fluxDF_scaled.iloc[:, :15].describe())

#### 2-2-2. PCA

In [None]:
nPCs = 10
model = PCA(n_components=nPCs, svd_solver='randomized', iterated_power='auto', random_state=123)
model.fit(fluxDF_scaled)

#Explained variance
tempS = pd.Series(data=model.explained_variance_ratio_*100,
                  index=['PC'+str(i+1) for i in range(nPCs)], name='ExplainedVariance')
print('Percentage of variance explained by each component:')
display(tempS)

#Scree plot
tempDF = tempS.reset_index()
tempDF['PC'] = [i+1 for i in range(nPCs)]
sns.set(style='ticks', font='Arial', context='talk')
plt.figure(figsize=(4, 3))
p = sns.lineplot(data=tempDF, x='PC', y='ExplainedVariance', color='k')
sns.despine()
p.set(xlim=(0.5, 10.5), xticks=np.arange(1, 10.1, 1))
plt.ylabel('Explained varaince [%]')
plt.xlabel('Principal component number')
plt.show()

#Label
tempL = []
for i in range(nPCs):
    round_value = Decimal(str(tempS['PC'+str(i+1)])).quantize(Decimal('0.1'), rounding=ROUND_HALF_UP)
    tempL.append('PC'+str(i+1)+' ('+str(round_value)+'%)')

#Projected space
pca_space = pd.DataFrame(data=model.transform(fluxDF_scaled), index=fluxDF_scaled.index, columns=tempL)
display(pca_space)

#Principal axes in feature space
pca_comp = pd.DataFrame(data=model.components_, index=tempL, columns=fluxDF_scaled.columns)
display(pca_comp)

#### 2-2-3. Projected space

In [None]:
nPCs = 6

#Prepare DF
tempDF = sampleDF[['Intervention', 'Sex']]
tempDF = pd.merge(tempDF, pca_space.iloc[:, :nPCs],
                  left_index=True, right_index=True, how='left')
tempDF = tempDF.reset_index()

#Color
tempD1 = {'Ctrl1':'tab:blue', 'Aca':'tab:red', '17aE2':'tab:green', 'Prot':'tab:brown',
          'Rapa':'tab:purple', 'CRdiet':'tab:olive', 'Ctrl2':'tab:blue', 'MRdiet':'tab:orange',
          'GHRWT':'tab:gray', 'GHRKO':'tab:pink', 'SnellWT':'black', 'SnellDW':'tab:cyan'}
tempD2 = {'F':'o', 'M':'^'}

#Visualization
sns.set(style='ticks', font='Arial', context='talk')
p = sns.PairGrid(data=tempDF, hue='Intervention', hue_order=tempD1.keys(), palette=tempD1)
p.map_lower(sns.scatterplot, style=tempDF['Sex'], style_order=tempD2.keys(), markers=tempD2)
for i, j in zip(*np.triu_indices_from(p.axes, 0)):
    p.axes[i, j].set_visible(False)
p.add_legend(bbox_to_anchor=(0.7, 0.7), loc='upper right', frameon=True, title='')
plt.show()

> –> Some points are missing???  
> –> They were just completely overlapped.  

## 3. Compare flux values

> 1. Compare flux values between control vs. each intervention using Mann–Whitney U-test.  
>
> To increase the statistical power, samples are grouped by intervention (i.e., strain, age, and sex are pooled). Note that the dataset was adjusted with them before GEM reconstruction. Although tricky, the P-value adjustment is NOT performed across reactions because this step is regarded as an intermediate process for the enrichment analysis to assess each intervention effect on GEM subsystems. As well, the P-value adjustment across interventions is just for reference purpose.  

### 3-1. Mann–Whitney U-test

#### 3-1-1. Perform all statistical tests

> Note that the scipy API (scipy.stats.mannwhitneyu) is used, because only the one-sided Mann–Whitney U-test seems implemented in the current statsmodels API (statsmodels.stats.nonparametric.rank_compare_2indep). Actually, the output objects of Mann–Whitney U-test are same b/w the two APIs, which is contrast to the case of t-test (degrees of freedom is not reported in the scipy API).  

In [None]:
tempL1 = ['Ctrl1', 'Aca', 'Rapa', 'CRdiet']#Target sample groups to be assessed
tempL2 = categoryL#Control type for post-hoc comparisons
tempDF1 = fluxDF
tempDF2 = sampleDF.loc[sampleDF['Intervention'].isin(tempL1)]

#Statistical tests per reaction
t_start = time.time()
tempL3 = []#For test summary
for rxn in tempDF1.index:
    #Select the target reaction
    tempS = tempDF1.loc[rxn]
    tempS.name = 'Flux'
    #Add metadata while selecting the target samples
    tempDF = pd.merge(tempS, tempDF2, left_index=True, right_index=True, how='inner')
    
    #Test per control vs. contrast
    tempDF3 = pd.DataFrame(columns=['Ustat', 'Pval'])
    for control in tempL2:
        for contrast in tempL1:
            tempA = tempDF2['Intervention'].loc[tempDF2['Category']==control].unique()
            if (contrast in tempA) & (control!=contrast):
                tempS1 = tempDF['Flux'].loc[tempDF['Intervention']==control]
                tempS2 = tempDF['Flux'].loc[tempDF['Intervention']==contrast]
                #Two-sided Mann–Whitney U-test
                ustat, pval = stats.mannwhitneyu(tempS2, tempS1,#U-statistic corresponds to the contrast
                                                 use_continuity=True, alternative='two-sided', method='auto')
                tempDF3.loc[contrast+'-vs-'+control] = [ustat, pval]
    ##P-value adjustment across all comparisons per reaction by using Benjamini–Hochberg method (just for reference)
    tempDF3['AdjPval'] = multi.multipletests(tempDF3['Pval'], alpha=0.05, method='fdr_bh',
                                             is_sorted=False, returnsorted=False)[1]
    ##Convert to wide-format
    tempL = []
    for comparison in tempDF3.index:
        tempS = tempDF3.loc[comparison]
        tempS.index = comparison+'_'+tempS.index
        tempS.name = rxn
        tempL.append(tempS)
    tempS = pd.concat(tempL, axis=0)
    tempL3.append(tempS)
t_elapsed = time.time() - t_start
print('Elapsed time for', (len(tempL1)-len(tempL2))*len(tempDF1), 'tests (',
      len(tempL1)-len(tempL2), 'comparisons x', len(tempDF1), 'reactions):',
      round(t_elapsed//60), 'min', round(t_elapsed%60, 1), 'sec')

#Generate test summary table
tempDF3 = pd.concat(tempL3, axis=1).T
tempDF3.index.name = tempDF1.index.name
display(tempDF3)

statDF = tempDF3

In [None]:
tempL1 = ['Ctrl1', 'Aca', 'Rapa', 'CRdiet']#Target sample groups to be summarized
tempDF1 = fluxDF
tempDF2 = sampleDF.loc[sampleDF['Intervention'].isin(tempL1)]

#Calculate general statistics per intervention group
tempL2 = []
for intervention in tempL1:
    #Select the target samples
    tempL = tempDF2.loc[tempDF2['Intervention']==intervention].index.tolist()
    tempDF = tempDF1[tempL]
    #Calculate general statistics
    tempS1 = len(tempL) - tempDF.isnull().sum(axis=1)
    tempS1.name = intervention+'_N'
    tempS2 = tempDF.mean(axis=1)
    tempS2.name = intervention+'_FluxMean'
    tempS3 = tempDF.median(axis=1)
    tempS3.name = intervention+'_FluxMedian'
    tempS4 = tempDF.apply(stats.median_absolute_deviation, axis=1)#Cf. pd.DataFrame.mad() is not median absolute deviation but mean absolute deviation
    tempS4.name = intervention+'_FluxMAD'
    #Merge
    tempDF = pd.concat([tempS1, tempS2, tempS3, tempS4], axis=1)
    tempL2.append(tempDF)
tempDF = pd.concat(tempL2, axis=1)
display(tempDF)

#Merge all the tables
print('General statistics table:', tempDF.shape)
print('Test summary table:', statDF.shape)
tempDF = pd.concat([tempDF, statDF], axis=1)
tempS = rxnDF['ReactionName']
tempDF = pd.merge(tempS, tempDF, left_index=True, right_index=True, how='right')

display(tempDF)

#Save
fileDir = './ExportData/'
ipynbName = '230502_LC-M001-related-TrOmics-GEM-ver3-15_FluxAnalysis_'
fileName = 'flux-comparison_vs-each-control.tsv'
tempDF.to_csv(fileDir+ipynbName+fileName, sep='\t', index=True)

#Update
statDF = tempDF

#### 3-1-2. Potentially changed reactions by each intervention (nominal P-value)

In [None]:
tempDF = statDF

#Take nominal P-value
tempDF1 = tempDF.loc[:, tempDF.columns.str.contains('-vs-.*_Pval$')]
tempDF1.columns = tempDF1.columns.str.replace('_Pval$', '')
tempDF1 = pd.merge(tempDF['ReactionName'], tempDF1,
                   left_index=True, right_index=True, how='left')
print('Nominal P-value:')
display(tempDF1)
display(tempDF1.describe())
tempL = tempDF1.loc[:, tempDF1.columns.str.contains('-vs-')].columns.tolist()
for comparison in tempL:
    tempS = tempDF1[comparison]
    tempS = tempS.loc[tempS<0.05]
    print(' - '+comparison+':', len(tempS))

#Take U-statistic difference from the null (for direction)
tempDF2 = tempDF.loc[:, tempDF.columns.str.contains('-vs-.*_Ustat$')]
tempDF2.columns = tempDF2.columns.str.replace('_Ustat$', '')
for comparison in tempDF2.columns:
    #Take total U of control and contrast
    control = re.sub('^.*-vs-', '', comparison)
    contrast = re.sub('-vs-.*', '', comparison)
    tempS1 = tempDF[control+'_N']
    tempS2 = tempDF[contrast+'_N']
    tempS = tempS1 * tempS2
    #Calculate the difference from the null
    tempDF2[comparison] = tempDF2[comparison] - tempS/2
tempDF2 = pd.merge(tempDF['ReactionName'], tempDF2,
                   left_index=True, right_index=True, how='left')
print('Changed direction (U-statistic difference from the null):')
display(tempDF2)
display(tempDF2.describe())

pvalDF = tempDF1
diffDF = tempDF2

> Check the changed reactions (based on the adjusted P-values across interventions) as reference.  

In [None]:
tempDF = statDF

#Take nominal P-value
tempDF1 = tempDF.loc[:, tempDF.columns.str.contains('-vs-.*_AdjPval$')]
tempDF1.columns = tempDF1.columns.str.replace('_AdjPval$', '')
tempDF1 = pd.merge(tempDF['ReactionName'], tempDF1,
                   left_index=True, right_index=True, how='left')
print('Adjusted P-value:')
display(tempDF1)
display(tempDF1.describe())
tempL = tempDF1.loc[:, tempDF1.columns.str.contains('-vs-')].columns.tolist()
for comparison in tempL:
    tempS = tempDF1[comparison]
    tempS = tempS.loc[tempS<0.05]
    print(' - '+comparison+':', len(tempS))


### 3-2. Visualization: clustermap

#### 3-2-1. Sample-wide

> For visualization purpose, it would be better to use the centered values based on control groups, because around zero value simply indicates no change from control.  
> –> At the same time, scaling is required for good clustering, but RobustScaler() returns just the centered (i.e., median-subtracted) values if IQR = 0. Hence, not IQR but the maximum absolete value is used for scaling. Of note, scaling eliminates the information about how largely an intervention group is different from the control group, but it rather reflects the rank-based statistical tests.  

> ***Skip this!***  

#### 3-2-2. Group-wide 1

> Sample-based clustering would be difficult to be understood.  
> –> Combine samples per intervention group. Of note, not median but mean is used as the summary statistic because the median loses many information in the case of small sample size.  

In [None]:
#Summarize samples per intervention group
tempL = ['Ctrl1', 'Aca', 'Rapa', 'CRdiet']
tempDF = pd.DataFrame(index=fluxDF.index)
for intervention in tempL:
    tempL = sampleDF.loc[sampleDF['Intervention']==intervention].index.tolist()
    tempDF1 = fluxDF.loc[:, tempL]
    #Calculate the mean
    tempS = tempDF1.mean(axis=1)
    tempS.name = intervention
    #Merge
    tempDF = pd.merge(tempDF, tempS, left_index=True, right_index=True, how='left')
print('Summary per intervention group:')
display(tempDF.describe())

#Centering per each control category
tempDF1 = pd.DataFrame(index=tempDF.index)
for category in categoryL:
    tempL = sampleDF['Intervention'].loc[sampleDF['Category']==category].unique().tolist()
    tempDF2 = tempDF.loc[:, tempL]
    #Centering
    tempS = tempDF2[category]
    tempDF2 = tempDF2.T - tempS
    #Merge
    tempDF1 = pd.merge(tempDF1, tempDF2.T, left_index=True, right_index=True, how='left')

#Scaling
scaler = MaxAbsScaler(copy=True)
tempA = scaler.fit_transform(tempDF1.T)#axis: column
tempDF2 = pd.DataFrame(data=tempA, index=tempDF1.columns, columns=tempDF1.index)
tempDF2 = tempDF2.T
tempL = ['Ctrl1', 'Aca', 'Rapa', 'CRdiet']
tempDF2 = tempDF2[tempL]#Sort

print('After centering and scaling:')
display(tempDF2)
display(tempDF2.describe())

fluxDF_scaled = tempDF2

> –> Remove control groups because it is meaningless after centering.  

In [None]:
#Prepare DF to plot
tempDF = fluxDF_scaled.copy()
##Remove control groups
tempDF = tempDF.drop(columns=categoryL)

#Prepare color labels for the changed reactions
regulation = 'Changed'
tempD0 = {'Ctrl1':'Control', 'Aca':'Acarbose',
          'Rapa':'Rapamycin', 'CRdiet':'CR diet'}
tempDF.columns = tempDF.columns.map(tempD0)
tempD1 = {'Control':'tab:blue', 'Acarbose':'tab:red',
          'Rapamycin':'tab:purple', 'CR diet':'tab:olive'}
tempDF1 = pvalDF.loc[tempDF.index.tolist(), pvalDF.columns.str.contains('-vs-')]
for col_n in tempDF1.columns.tolist():
    tempS1 = pvalDF[col_n]
    tempS2 = diffDF[col_n]
    if regulation=='Changed':
        tempS2 = tempS2.loc[(tempS1<0.05)]
    elif regulation=='Increased':
        tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2>0)]
    elif regulation=='Decreased':
        tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2<0)]
    #Replace dummy value (P-value) with color code
    label = tempD0[re.sub('-vs-.*', '', col_n)]
    tempL = []
    count = 0#just for checking
    for rxn in tempDF1.index.tolist():
        if rxn in tempS2.index.tolist():
            tempL.append(tempD1[label])
            count += 1#Just for checking
        else:
            tempL.append('white')
    tempDF1[label] = tempL
    print(regulation+' reaction in '+col_n)
    print(' -> nominal P < 0.05:', count)
tempDF1 = tempDF1.loc[:, tempDF1.columns.isin(tempD1.keys())]
##Remove the group showing no changed reactions
#for col_n in tempDF1.columns.tolist():
#    tempS = tempDF1[col_n]
#    tempS = (tempS=='white')
#    if tempS.sum()==len(tempS):
#        tempDF1 = tempDF1.drop(columns=col_n)

#Clustermap
sns.set(style='ticks', font='Arial', context='talk')
cm = sns.clustermap(tempDF.T, method='ward', metric='euclidean', cmap='RdBu_r',
                    row_cluster=True, col_cluster=True, row_linkage=None, col_linkage=None,
                    row_colors=None, col_colors=tempDF1, xticklabels=False, yticklabels=True,
                    dendrogram_ratio=(0.025, 0.2), colors_ratio=(0.01, 0.1),
                    cbar_pos=(1.02, 0.25, 0.3, 0.06), cbar_kws={'orientation': 'horizontal'},
                    figsize=(15, 3), **{'vmin':-1, 'vmax':1})
cm.cax.set_title('Group mean of flux values (vs. Control; scaled)', size='medium',
                 verticalalignment='bottom', horizontalalignment='center')
cm.cax.tick_params(labelsize='small')
bottom, top = cm.ax_heatmap.get_ylim()
#cm.ax_heatmap.set_ylim(bottom + 0.5, top - 0.5)##To avoid half cut of first and last rows
hm = cm.ax_heatmap.get_position()
rd = cm.ax_row_dendrogram.get_position()
cd = cm.ax_col_dendrogram.get_position()
cm.ax_heatmap.set_position([hm.x0, hm.y0, hm.width, hm.height])
cm.ax_row_dendrogram.set_position([rd.x0, rd.y0, rd.width, rd.height])
cm.ax_col_dendrogram.set_position([cd.x0, cd.y0, cd.width, cd.height])
cm.ax_heatmap.set_xlabel('Reaction')
cm.ax_heatmap.set_ylabel('')
##row/column color bar legend (axis is same with cm.cax!)
tempL = []
for group in tempD1.keys():
    if group in tempDF1.columns.tolist():
        tempL.append(mpatches.Patch(color=tempD1[group], label='by '+group))
legend = plt.legend(handles=tempL, fontsize='small', labelspacing=0.2, ncol=1,
                    title='Changed reactions (vs. Control)', title_fontsize='medium',
                    bbox_to_anchor=(0.5, 1.0), loc='lower center', borderaxespad=3, frameon=False)
plt.gca().add_artist(legend)
##Save
fileDir = './ExportFigures/'
ipynbName = '230502_LC-M001-related-TrOmics-GEM-ver3-15_FluxAnalysis_'
fileName = 'flux-clustermap-across-groups.pdf'
plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04, transparent=True)
plt.show()

#Save label order
tempDF = pvalDF.loc[tempDF.index[cm.dendrogram_col.reordered_ind]]
tempDF = tempDF.reset_index()
tempDF.index.name = 'Xcoord'
display(tempDF)
fileDir = './ExportData/'
ipynbName = '230502_LC-M001-related-TrOmics-GEM-ver3-15_FluxAnalysis_'
fileName = 'flux-clustermap-across-groups_ticks-order.tsv'
tempDF.to_csv(fileDir+ipynbName+fileName, index=True, sep='\t')

#### 3-2-3. Group-wide 2

> Centering by each control may highlight the difference too strongly, because each control group is forced to be around zero.  
> –> Check simple standardization (Z-score) across all groups.  

In [None]:
#Summarize samples per intervention group
tempL = ['Ctrl1', 'Aca', 'Rapa', 'CRdiet']
tempDF = pd.DataFrame(index=fluxDF.index)
for intervention in tempL:
    tempL = sampleDF.loc[sampleDF['Intervention']==intervention].index.tolist()
    tempDF1 = fluxDF.loc[:, tempL]
    #Calculate the mean
    tempS = tempDF1.mean(axis=1)
    tempS.name = intervention
    #Merge
    tempDF = pd.merge(tempDF, tempS, left_index=True, right_index=True, how='left')
print('Summary per intervention group:')
display(tempDF.describe())

#Z-score transformation
scaler = StandardScaler(copy=True, with_mean=True, with_std=True)
tempA = scaler.fit_transform(tempDF.T)#axis: column
tempDF1 = pd.DataFrame(data=tempA, index=tempDF.columns, columns=tempDF.index)
tempDF1 = tempDF1.T
tempL = ['Ctrl1', 'Aca', 'Rapa', 'CRdiet']
tempDF1 = tempDF1[tempL]#Sort

print('After centering and scaling:')
display(tempDF1)
display(tempDF1.describe())

fluxDF_scaled = tempDF1

In [None]:
#Prepare DF to plot
tempDF = fluxDF_scaled.copy()
##Remove control groups
#tempDF = tempDF.drop(columns=categoryL)

#Prepare color labels for the changed reactions
regulation = 'Changed'
tempD0 = {'Ctrl1':'Control', 'Aca':'Acarbose',
          'Rapa':'Rapamycin', 'CRdiet':'CR diet'}
tempDF.columns = tempDF.columns.map(tempD0)
tempD1 = {'Control':'tab:blue', 'Acarbose':'tab:red',
          'Rapamycin':'tab:purple', 'CR diet':'tab:olive'}
tempDF1 = pvalDF.loc[tempDF.index.tolist(), pvalDF.columns.str.contains('-vs-')]
for col_n in tempDF1.columns.tolist():
    tempS1 = pvalDF[col_n]
    tempS2 = diffDF[col_n]
    if regulation=='Changed':
        tempS2 = tempS2.loc[(tempS1<0.05)]
    elif regulation=='Increased':
        tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2>0)]
    elif regulation=='Decreased':
        tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2<0)]
    #Replace dummy value (P-value) with color code
    label = tempD0[re.sub('-vs-.*', '', col_n)]
    tempL = []
    count = 0#just for checking
    for rxn in tempDF1.index.tolist():
        if rxn in tempS2.index.tolist():
            tempL.append(tempD1[label])
            count += 1#Just for checking
        else:
            tempL.append('white')
    tempDF1[label] = tempL
    print(regulation+' reaction in '+col_n)
    print(' -> nominal P < 0.05:', count)
tempDF1 = tempDF1.loc[:, tempDF1.columns.isin(tempD1.keys())]
##Remove the group showing no changed reactions
#for col_n in tempDF1.columns.tolist():
#    tempS = tempDF1[col_n]
#    tempS = (tempS=='white')
#    if tempS.sum()==len(tempS):
#        tempDF1 = tempDF1.drop(columns=col_n)

#Clustermap
sns.set(style='ticks', font='Arial', context='talk')
cm = sns.clustermap(tempDF.T, method='ward', metric='euclidean', cmap='RdBu_r',
                    row_cluster=True, col_cluster=True, row_linkage=None, col_linkage=None,
                    row_colors=None, col_colors=tempDF1, xticklabels=False, yticklabels=True,
                    dendrogram_ratio=(0.025, 0.2), colors_ratio=(0.01, 0.1),
                    cbar_pos=(1.02, 0.25, 0.3, 0.06), cbar_kws={'orientation': 'horizontal'},
                    figsize=(15, 3), **{'vmin':-2, 'vmax':2})
cm.cax.set_title('Group mean of flux values ('+r'$Z$'+'-score)', size='medium',
                 verticalalignment='bottom', horizontalalignment='center')
cm.cax.tick_params(labelsize='small')
bottom, top = cm.ax_heatmap.get_ylim()
#cm.ax_heatmap.set_ylim(bottom + 0.5, top - 0.5)##To avoid half cut of first and last rows
hm = cm.ax_heatmap.get_position()
rd = cm.ax_row_dendrogram.get_position()
cd = cm.ax_col_dendrogram.get_position()
cm.ax_heatmap.set_position([hm.x0, hm.y0, hm.width, hm.height])
cm.ax_row_dendrogram.set_position([rd.x0, rd.y0, rd.width, rd.height])
cm.ax_col_dendrogram.set_position([cd.x0, cd.y0, cd.width, cd.height])
cm.ax_heatmap.set_xlabel('Reaction')
cm.ax_heatmap.set_ylabel('')
##row/column color bar legend (axis is same with cm.cax!)
tempL = []
for group in tempD1.keys():
    if group in tempDF1.columns.tolist():
        tempL.append(mpatches.Patch(color=tempD1[group], label='by '+group))
legend = plt.legend(handles=tempL, fontsize='small', labelspacing=0.2, ncol=1,
                    title='Changed reactions (vs. Control)', title_fontsize='medium',
                    bbox_to_anchor=(0.5, 1.0), loc='lower center', borderaxespad=3, frameon=False)
plt.gca().add_artist(legend)
##Save
fileDir = './ExportFigures/'
ipynbName = '230502_LC-M001-related-TrOmics-GEM-ver3-15_FluxAnalysis_'
fileName = 'flux-clustermap-across-groups.pdf'
#plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04, transparent=True)
plt.show()

#Save label order
tempDF = pvalDF.loc[tempDF.index[cm.dendrogram_col.reordered_ind]]
tempDF = tempDF.reset_index()
tempDF.index.name = 'Xcoord'
display(tempDF)
fileDir = './ExportData/'
ipynbName = '230502_LC-M001-related-TrOmics-GEM-ver3-15_FluxAnalysis_'
fileName = 'flux-clustermap-across-groups_ticks-order.tsv'
#tempDF.to_csv(fileDir+ipynbName+fileName, index=True, sep='\t')

> –> This representation is informative for us, but probably normal readers would feel difficulty to understand the figure message...  

### 3-3. Visualization: venn diagram

In [None]:
#Prepare label and color
tempD0 = {'Ctrl1':'Control', 'Aca':'Acarbose',
          'Rapa':'Rapamycin', 'CRdiet':'CR diet'}
tempD1 = {'Acarbose':'tab:red', 'Rapamycin':'tab:purple', 'CR diet':'tab:olive'}

#Visualization per direction
for regulation in ['Changed', 'Increased', 'Decreased']:
    #Prepare reaction sets
    tempL = statDF.index.tolist()
    tempDF1 = pvalDF.loc[tempL, pvalDF.columns.str.contains('-vs-')]
    tempDF2 = diffDF.loc[tempL, diffDF.columns.str.contains('-vs-')]
    tempD2 = {}
    count = 0
    for col_n in tempDF1.columns.tolist():
        tempS1 = tempDF1[col_n]
        tempS2 = tempDF2[col_n]
        if regulation=='Changed':
            tempS2 = tempS2.loc[(tempS1<0.05)]
        elif regulation=='Increased':
            tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2>0)]
        elif regulation=='Decreased':
            tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2<0)]
        if len(tempS2)>0:
            count += 1
        label = tempD0[re.sub('-vs-.*', '', col_n)]
        tempD2[label] = set(tempS2.index.tolist())
    if count==0:
        print(regulation+' reactions: no significance in any comparisons')
        continue
    ##Sort to make consistent order in manual legend generation
    tempD = {}
    count = 0
    for label in tempD1.keys():
        tempD[label] = tempD2[label]
        if len(tempD2[label])>0:
            count += 1
    
    #Skip the followings if no significant reaction
    if count==0:
        print(regulation+' reactions: no significance in any target comparisons')
        continue
    
    #Venn diagram
    sns.set(style='ticks', font='Arial', context='talk')
    fig, ax = plt.subplots(figsize=(4, 4))
    venn(tempD, fmt='{size:,}', cmap=list(tempD1.values()), legend_loc=None, ax=ax)
    plt.setp(ax, ylim=(0.05, 0.975))#Otherwise, weird space...
    ##Add legend annotation
    x_coord = [0.1, 0.9, 0.8]
    y_coord = [0.8, 0.8, 0.25]
    h_align = ['right', 'left', 'left']
    v_align = ['bottom', 'bottom', 'top']
    for i in range(len(tempD1)):
        key = list(tempD1.keys())[i]
        total = f'{len(tempD[key]):,}'
        ax.text(x_coord[i], y_coord[i], key+'\n('+total+' modules)',
                fontsize='small', multialignment='center',
                horizontalalignment=h_align[i], verticalalignment=v_align[i],
                bbox={'boxstyle':'round', 'facecolor':tempD1[key], 'pad':0.2, 'alpha':0.5})
    title = regulation+' reactions (vs. Control)'
    ax.set_title(title, fontsize='medium')
    ##Save
    if regulation!='Changed':
        fileDir = './ExportFigures/'
        ipynbName = '230502_LC-M001-related-TrOmics-GEM-ver3-15_FluxAnalysis_'
        fileName = 'rxn-venn-'+regulation.lower()+'.pdf'
        plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04, transparent=True)
    plt.show()

In [None]:
#Export reaction list in each subset in the venn diagram
for regulation in ['Increased', 'Decreased']:
    #Prepare reaction sets
    tempL = statDF.index.tolist()
    tempDF1 = pvalDF.loc[tempL, pvalDF.columns.str.contains('-vs-')]
    tempDF2 = diffDF.loc[tempL, diffDF.columns.str.contains('-vs-')]
    tempD = {}
    count = 0
    for col_n in tempDF1.columns.tolist():
        tempS1 = tempDF1[col_n]
        tempS2 = tempDF2[col_n]
        if regulation=='Changed':
            tempS2 = tempS2.loc[(tempS1<0.05)]
        elif regulation=='Increased':
            tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2>0)]
        elif regulation=='Decreased':
            tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2<0)]
        if len(tempS2)>0:
            count += 1
        tempD[col_n] = set(tempS2.index.tolist())
    
    #Skip the followings if no significant reaction
    if count==0:
        print(regulation+' reactions: no significance in any target comparisons')
        continue
    
    #Prepare a new .xlsx file (dummy README)
    tempL1 = [len(tempD[key]) for key in tempD.keys()]
    tempDF = pd.DataFrame({'Group':tempD.keys(), 'nReactions':tempL1})
    tempDF = tempDF.reset_index().rename(columns={'index':'VennOrder'})
    tempDF['VennOrder'] = tempDF['VennOrder'] + 1
    fileDir = './ExportData/'
    ipynbName = '230502_LC-M001-related-TrOmics-GEM-ver3-15_FluxAnalysis_'
    fileName = 'rxn-venn-'+regulation.lower()+'.xlsx'
    tempDF.to_excel(fileDir+ipynbName+fileName, sheet_name='README', header=True, index=False)
    display(tempDF)#Check
    
    t_start = time.time()
    #Extract overall set
    for key_i in range(len(tempD)):
        key = list(tempD.keys())[key_i]
        tempS = tempD[key]
        tempDF = statDF.loc[statDF.index.isin(tempS)]
        #Save summary table by appended to the above .xlsx file
        ##Prepare sheet name
        tempL1 = ['NA' for i in range(len(tempD))]
        tempL1[key_i] = '1'
        setName = '('+','.join(tempL1)+')'
        with pd.ExcelWriter(fileDir+ipynbName+fileName, mode='a', engine='openpyxl') as writer:
            tempDF.to_excel(writer, sheet_name=setName, header=True, index=True)
        print(' - '+setName+':', len(tempDF))
    
    #Extract subset
    tempL1 = ['1', '0']
    tempL2 = [[k1, k2, k3] for k1 in tempL1 for k2 in tempL1 for k3 in tempL1]
    #tempL2.remove(['0', '0', '0', '0', '0'])
    for tempL1 in tempL2:
        #Positive reaction set
        tempL3 = [list(tempD.values())[key_i] for key_i, binary in enumerate(tempL1) if binary=='1']
        tempS1 = set(statDF.index.tolist())#Initialize
        for tempS in tempL3:
            tempS1 = tempS1 & tempS
        #Negative reaction set
        tempL3 = [list(tempD.values())[key_i] for key_i, binary in enumerate(tempL1) if binary=='0']
        tempS2 = set()#Initialize
        for tempS in tempL3:
            tempS2 = tempS2 | tempS
        #Extract subset
        tempS = tempS1 - tempS2
        tempDF = statDF.loc[statDF.index.isin(tempS)]
        #Save summary table by appended to the above .xlsx file
        ##Prepare sheet name
        setName = '('+','.join(tempL1)+')'
        with pd.ExcelWriter(fileDir+ipynbName+fileName, mode='a', engine='openpyxl') as writer:
            tempDF.to_excel(writer, sheet_name=setName, header=True, index=True)
        print(' - '+setName+':', len(tempDF))
    
    t_elapsed = time.time() - t_start
    print(' - Elapsed time:', round(t_elapsed//60), 'min', round(t_elapsed%60, 1), 'sec')

### 3-4. Visualization: boxplot

#### 3-4-1. Reactions increased by Aca and CR

In [None]:
#Prepare the target reaction set
posL = ['Aca-vs-Ctrl1', 'CRdiet-vs-Ctrl1']
negL = ['']
regulation = 'Increased'
tempL = statDF.index.tolist()
tempDF1 = pvalDF.loc[tempL, pvalDF.columns.str.contains('-vs-')]
tempDF2 = diffDF.loc[tempL, diffDF.columns.str.contains('-vs-')]
tempS = pd.Series(np.repeat(True, len(tempDF1)), index=tempDF1.index)#Initialize
for col_n in tempDF1.columns.tolist():
    tempS1 = tempDF1[col_n]
    tempS2 = tempDF2[col_n]
    if col_n in posL:
        if regulation=='Increased':
            tempS1 = (tempS1<0.05) & (tempS2>0)
        elif regulation=='Decreased':
            tempS1 = (tempS1<0.05) & (tempS2<0)
    elif col_n in negL:
        tempS3 = (tempS1>=0.05)
        #Significance for inverse regulation
        if regulation=='Increased':
            tempS1 = (tempS1<0.05) & (tempS2<0)
        elif regulation=='Decreased':
            tempS1 = (tempS1<0.05) & (tempS2>0)
        tempS1 = tempS3 | tempS1
    else:
        tempS1 = (tempS1>=0.0)
    #Update True
    tempS = tempS & tempS1
tempL = tempS.loc[tempS.tolist()].index.tolist()
print(len(tempL), regulation.lower()+' reactions with significance in', posL, 'but not in', negL)

#Select representatives
topX = np.min([30, len(tempL)])
topX_plot = np.min([5, len(tempL)])
tempDF = statDF.loc[:, statDF.columns.str.contains('Pval$')]
tempDF = pd.merge(rxnDF[['ReactionName', 'Subsystem']], tempDF,
                  left_index=True, right_index=True, how='left')
tempL1 = [comparison+'_Pval' for comparison in posL]
tempDF = tempDF.loc[tempL].sort_values(by=tempL1, ascending=True)
print('Top', topX, 'reactions (sort by', posL, '):')
display(tempDF.iloc[:topX])
plotL = tempDF.index.tolist()[:topX_plot]

#Prepare DF for plot
tempDF = fluxDF.reset_index().melt(var_name='ModelID', value_name='Flux', id_vars='ReactionID')
tempDF1 = sampleDF.reset_index()[['ModelID', 'Intervention']]
tempDF = pd.merge(tempDF, tempDF1, on='ModelID', how='left')

#Prepare label and color
tempD0 = {'Ctrl1':'Control', 'Aca':'Acarbose',
          'Rapa':'Rapamycin', 'CRdiet':'CR diet'}
tempDF['Group'] = tempDF['Intervention'].map(tempD0)
tempD1 = {'Control':'tab:blue', 'Acarbose':'tab:red',
          'Rapamycin':'tab:purple', 'CR diet':'tab:olive'}

#Prepare adjusted P-value across groups
tempDF0 = statDF.loc[:, statDF.columns.str.contains('-vs-.*_AdjPval$')]
tempDF0.columns = tempDF0.columns.str.replace('_AdjPval$', '')

#Visualize each representative
for rank_i in range(len(plotL)):
    print(' - Rank '+str(rank_i+1)+' (sort by', posL, '):')
    rxn = plotL[rank_i]
    #Check reaction summary
    tempDF1 = pd.DataFrame(rxnDF.loc[rxn]).T
    display(tempDF1)
    
    #Select Flux
    tempDF1 = tempDF.loc[tempDF['ReactionID']==rxn]
    
    #Check Flux summary
    tempDF2 = tempDF1.groupby(['Group'])['Flux'].agg(['count', 'mean', 'std'])
    tempL1 = []
    tempL2 = []
    for row_n in tempDF2.index.tolist():
        count, mean, std = tempDF2.loc[row_n]
        tempL1.append(mean - 1.96*std/np.sqrt(count))
        tempL2.append(mean + 1.96*std/np.sqrt(count))
    tempDF2['0.025'] = tempL1
    tempDF2['0.975'] = tempL2
    tempDF2 = tempDF2.loc[list(tempD1.keys())]#Sort
    display(tempDF2)
    
    #Prepare significance labels
    ##Retrieve statistical significance
    tempS = tempDF0.loc[rxn, tempDF0.columns.str.contains('-vs-')]
    tempS.name = 'AdjPval'
    ##Clean
    tempDF2 = tempS.index.to_series().str.split(pat='-vs-', expand=True)
    tempDF2 = tempDF2.rename(columns={0:'Contrast', 1:'Baseline'})
    tempDF2 = pd.merge(tempDF2, tempS, left_index=True, right_index=True, how='left')
    tempDF2['Contrast'] = tempDF2['Contrast'].map(tempD0)
    tempDF2['Baseline'] = tempDF2['Baseline'].map(tempD0)
    ##Convert p-value to label
    tempL = []
    for row_i in range(len(tempDF2)):
        pval = tempDF2['AdjPval'].iloc[row_i]
        if pval<0.001:
            tempL.append('***')
        elif pval<0.01:
            tempL.append('**')
        elif pval<0.05:
            tempL.append('*')
        else:
            pval_text = Decimal(str(pval)).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
            tempL.append(r'$P$ = '+str(pval_text))
    tempDF2['SignifLabel'] = tempL
    ##Add the y-position level in figure
    tempDF2['YposLevel'] = [0, 1, 2]
    display(tempDF2)
    
    #Visualization
    sns.set(style='ticks', font='Arial', context='talk')
    plt.figure(figsize=(2, 4))
    sns.boxplot(data=tempDF1, x='Group', y='Flux', order=list(tempD1.keys()), palette=tempD1,
                dodge=False, showfliers=False, showcaps=True, notch=False)
    sns.pointplot(data=tempDF1, x='Group', y='Flux', order=list(tempD1.keys()),
                  markers='x', color='0.25',#To match with the default sns.boxplot saturation=0.75
                  dodge=False, join=False, estimator=np.mean, ci=None)#Add only the mean
    p = sns.stripplot(data=tempDF1, x='Group', y='Flux',
                      order=list(tempD1.keys()), palette=tempD1, dodge=False, jitter=0.3,
                      size=5, edgecolor='black', linewidth=1, **{'marker':'o', 'alpha':0.5})
    ##Set axis
    sns.despine()
    plt.setp(p.get_xticklabels(), rotation=70, horizontalalignment='right',
             verticalalignment='center', rotation_mode='anchor')
    ###Reset the axis range for the labels
    ymin, ymax = p.get_ylim()
    tempL = p.get_yticks().tolist()
    yinter = int(tempL[1]) - int(tempL[0])
    tempL = [int(y) for y in tempL if (y>ymin)&(y<ymax)]#seaborn seems to prepare wider range!?
    p.set(ylim=(ymin, ymax+(yinter*1.5)), yticks=tempL)
    p.set_yticklabels(['{:,}'.format(int(y)) for y in tempL])
    ##Add significance labels
    aline_ymin = tempDF1['Flux'].max()
    aline_ymargin = yinter/2
    for row_i in range(len(tempDF2)):
        #Baseline
        group_0 = tempDF2['Baseline'].iloc[row_i]
        index_0 = list(tempD1.keys()).index(group_0)
        xcoord_0 = index_0
        #Contrast
        group_1 = tempDF2['Contrast'].iloc[row_i]
        index_1 = list(tempD1.keys()).index(group_1)
        xcoord_1 = index_1
        #Standard point of marker
        xcoord = (xcoord_0+xcoord_1)/2
        ycoord = aline_ymin + aline_ymargin*tempDF2['YposLevel'].iloc[row_i]
        label = tempDF2['SignifLabel'].iloc[row_i]
        #Add annotation lines
        aline_offset = yinter/10
        aline_length = yinter/10 + aline_offset
        plt.plot([xcoord_0, xcoord_0, xcoord_1, xcoord_1],
                 [ycoord+aline_offset, ycoord+aline_length, ycoord+aline_length, ycoord+aline_offset],
                 lw=1.5, c='k')
        #Add annotation text
        if label in ['***', '**', '*']:
            text_offset = yinter/4
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='center',
                       fontsize='medium', color='k')
        else:
            text_offset = yinter/5
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='bottom',
                       fontsize='x-small', color='k')
    ##Set axis label and title
    plt.setp(p, xlabel='', ylabel='Sample flux value [a.u.]')
    p.set_title(rxn, {'fontsize':'medium'})
    ##Save
    fileDir = './ExportFigures/'
    ipynbName = '230502_LC-M001-related-TrOmics-GEM-ver3-15_FluxAnalysis_'
    fileName = 'flux-boxplot('+rxn+').pdf'
    #plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04, transparent=True)
    plt.show()
    print('')

#### 3-4-2. Reactions decreased by Aca and CR

In [None]:
#Prepare the target reaction set
posL = ['Aca-vs-Ctrl1', 'CRdiet-vs-Ctrl1']
negL = ['']
regulation = 'Decreased'
tempL = statDF.index.tolist()
tempDF1 = pvalDF.loc[tempL, pvalDF.columns.str.contains('-vs-')]
tempDF2 = diffDF.loc[tempL, diffDF.columns.str.contains('-vs-')]
tempS = pd.Series(np.repeat(True, len(tempDF1)), index=tempDF1.index)#Initialize
for col_n in tempDF1.columns.tolist():
    tempS1 = tempDF1[col_n]
    tempS2 = tempDF2[col_n]
    if col_n in posL:
        if regulation=='Increased':
            tempS1 = (tempS1<0.05) & (tempS2>0)
        elif regulation=='Decreased':
            tempS1 = (tempS1<0.05) & (tempS2<0)
    elif col_n in negL:
        tempS3 = (tempS1>=0.05)
        #Significance for inverse regulation
        if regulation=='Increased':
            tempS1 = (tempS1<0.05) & (tempS2<0)
        elif regulation=='Decreased':
            tempS1 = (tempS1<0.05) & (tempS2>0)
        tempS1 = tempS3 | tempS1
    else:
        tempS1 = (tempS1>=0.0)
    #Update True
    tempS = tempS & tempS1
tempL = tempS.loc[tempS.tolist()].index.tolist()
print(len(tempL), regulation.lower()+' reactions with significance in', posL, 'but not in', negL)

#Select representatives
topX = np.min([30, len(tempL)])
topX_plot = np.min([5, len(tempL)])
tempDF = statDF.loc[:, statDF.columns.str.contains('Pval$')]
tempDF = pd.merge(rxnDF[['ReactionName', 'Subsystem']], tempDF,
                  left_index=True, right_index=True, how='left')
tempL1 = [comparison+'_Pval' for comparison in posL]
tempDF = tempDF.loc[tempL].sort_values(by=tempL1, ascending=True)
print('Top', topX, 'reactions (sort by', posL, '):')
display(tempDF.iloc[:topX])
plotL = tempDF.index.tolist()[:topX_plot]

#Prepare DF for plot
tempDF = fluxDF.reset_index().melt(var_name='ModelID', value_name='Flux', id_vars='ReactionID')
tempDF1 = sampleDF.reset_index()[['ModelID', 'Intervention']]
tempDF = pd.merge(tempDF, tempDF1, on='ModelID', how='left')

#Prepare label and color
tempD0 = {'Ctrl1':'Control', 'Aca':'Acarbose',
          'Rapa':'Rapamycin', 'CRdiet':'CR diet'}
tempDF['Group'] = tempDF['Intervention'].map(tempD0)
tempD1 = {'Control':'tab:blue', 'Acarbose':'tab:red',
          'Rapamycin':'tab:purple', 'CR diet':'tab:olive'}

#Prepare adjusted P-value across groups
tempDF0 = statDF.loc[:, statDF.columns.str.contains('-vs-.*_AdjPval$')]
tempDF0.columns = tempDF0.columns.str.replace('_AdjPval$', '')

#Visualize each representative
for rank_i in range(len(plotL)):
    print(' - Rank '+str(rank_i+1)+' (sort by', posL, '):')
    rxn = plotL[rank_i]
    #Check reaction summary
    tempDF1 = pd.DataFrame(rxnDF.loc[rxn]).T
    display(tempDF1)
    
    #Select Flux
    tempDF1 = tempDF.loc[tempDF['ReactionID']==rxn]
    
    #Check Flux summary
    tempDF2 = tempDF1.groupby(['Group'])['Flux'].agg(['count', 'mean', 'std'])
    tempL1 = []
    tempL2 = []
    for row_n in tempDF2.index.tolist():
        count, mean, std = tempDF2.loc[row_n]
        tempL1.append(mean - 1.96*std/np.sqrt(count))
        tempL2.append(mean + 1.96*std/np.sqrt(count))
    tempDF2['0.025'] = tempL1
    tempDF2['0.975'] = tempL2
    tempDF2 = tempDF2.loc[list(tempD1.keys())]#Sort
    display(tempDF2)
    
    #Prepare significance labels
    ##Retrieve statistical significance
    tempS = tempDF0.loc[rxn, tempDF0.columns.str.contains('-vs-')]
    tempS.name = 'AdjPval'
    ##Clean
    tempDF2 = tempS.index.to_series().str.split(pat='-vs-', expand=True)
    tempDF2 = tempDF2.rename(columns={0:'Contrast', 1:'Baseline'})
    tempDF2 = pd.merge(tempDF2, tempS, left_index=True, right_index=True, how='left')
    tempDF2['Contrast'] = tempDF2['Contrast'].map(tempD0)
    tempDF2['Baseline'] = tempDF2['Baseline'].map(tempD0)
    ##Convert p-value to label
    tempL = []
    for row_i in range(len(tempDF2)):
        pval = tempDF2['AdjPval'].iloc[row_i]
        if pval<0.001:
            tempL.append('***')
        elif pval<0.01:
            tempL.append('**')
        elif pval<0.05:
            tempL.append('*')
        else:
            pval_text = Decimal(str(pval)).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
            tempL.append(r'$P$ = '+str(pval_text))
    tempDF2['SignifLabel'] = tempL
    ##Add the y-position level in figure
    tempDF2['YposLevel'] = [0, 1, 2]
    display(tempDF2)
    
    #Visualization
    sns.set(style='ticks', font='Arial', context='talk')
    plt.figure(figsize=(2, 4))
    sns.boxplot(data=tempDF1, x='Group', y='Flux', order=list(tempD1.keys()), palette=tempD1,
                dodge=False, showfliers=False, showcaps=True, notch=False)
    sns.pointplot(data=tempDF1, x='Group', y='Flux', order=list(tempD1.keys()),
                  markers='x', color='0.25',#To match with the default sns.boxplot saturation=0.75
                  dodge=False, join=False, estimator=np.mean, ci=None)#Add only the mean
    p = sns.stripplot(data=tempDF1, x='Group', y='Flux',
                      order=list(tempD1.keys()), palette=tempD1, dodge=False, jitter=0.3,
                      size=5, edgecolor='black', linewidth=1, **{'marker':'o', 'alpha':0.5})
    ##Set axis
    sns.despine()
    plt.setp(p.get_xticklabels(), rotation=70, horizontalalignment='right',
             verticalalignment='center', rotation_mode='anchor')
    ###Reset the axis range for the labels
    ymin, ymax = p.get_ylim()
    tempL = p.get_yticks().tolist()
    yinter = int(tempL[1]) - int(tempL[0])
    tempL = [int(y) for y in tempL if (y>ymin)&(y<ymax)]#seaborn seems to prepare wider range!?
    p.set(ylim=(ymin, ymax+(yinter*1.5)), yticks=tempL)
    p.set_yticklabels(['{:,}'.format(int(y)) for y in tempL])
    ##Add significance labels
    aline_ymin = tempDF1['Flux'].max()
    aline_ymargin = yinter/2
    for row_i in range(len(tempDF2)):
        #Baseline
        group_0 = tempDF2['Baseline'].iloc[row_i]
        index_0 = list(tempD1.keys()).index(group_0)
        xcoord_0 = index_0
        #Contrast
        group_1 = tempDF2['Contrast'].iloc[row_i]
        index_1 = list(tempD1.keys()).index(group_1)
        xcoord_1 = index_1
        #Standard point of marker
        xcoord = (xcoord_0+xcoord_1)/2
        ycoord = aline_ymin + aline_ymargin*tempDF2['YposLevel'].iloc[row_i]
        label = tempDF2['SignifLabel'].iloc[row_i]
        #Add annotation lines
        aline_offset = yinter/10
        aline_length = yinter/10 + aline_offset
        plt.plot([xcoord_0, xcoord_0, xcoord_1, xcoord_1],
                 [ycoord+aline_offset, ycoord+aline_length, ycoord+aline_length, ycoord+aline_offset],
                 lw=1.5, c='k')
        #Add annotation text
        if label in ['***', '**', '*']:
            text_offset = yinter/4
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='center',
                       fontsize='medium', color='k')
        else:
            text_offset = yinter/5
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='bottom',
                       fontsize='x-small', color='k')
    ##Set axis label and title
    plt.setp(p, xlabel='', ylabel='Sample flux value [a.u.]')
    p.set_title(rxn, {'fontsize':'medium'})
    ##Save
    fileDir = './ExportFigures/'
    ipynbName = '230502_LC-M001-related-TrOmics-GEM-ver3-15_FluxAnalysis_'
    fileName = 'flux-boxplot('+rxn+').pdf'
    #plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04, transparent=True)
    plt.show()
    print('')

#### 3-4-3. Reactions of special interest

> Reactions included in the Priyanka's diagram, related to glycolysis, TCA cycle, fatty acid metabolism etc., are checked.  

In [None]:
#Import reactions in the diagram
fileDir = './ImportData/'
fileName = 'Reactions-in-diagram.csv'
tempDF = pd.read_csv(fileDir+fileName)
display(tempDF)
print('-> Unique ID:', len(tempDF['ReactionID'].unique()))

#Check
tempDF1 = rxnDF
tempDF2 = fluxDF
tempS1 = tempDF1['Subsystem'].value_counts()
tempS1.name = 'Overall'
tempDF3 = tempDF1.loc[tempDF1.index.isin(tempDF['ReactionID'])]
print('-> Included in the generic model:', len(tempDF3))
display(tempDF.loc[~tempDF['ReactionID'].isin(tempDF1.index)])
tempS2 = tempDF3['Subsystem'].value_counts()
tempS2.name = 'InDiagram'
tempDF3 = tempDF3.loc[tempDF3.index.isin(tempDF2.index)]
print('-> Flux-calculated and selected:', len(tempDF3))
tempS3 = tempDF3['Subsystem'].value_counts()
tempS3.name = 'FluxCalculated-InDiagram'
tempDF = pd.concat([tempS1, tempS2, tempS3], axis=1)
tempDF = tempDF.dropna(subset=['InDiagram'])
display(tempDF)

plotL = tempDF3.index.tolist()

In [None]:
#Select the changed reactions
tempDF = statDF.loc[:, statDF.columns.str.contains('Pval$')]
tempDF = pd.merge(rxnDF[['ReactionName', 'Subsystem']], tempDF,
                  left_index=True, right_index=True, how='left')
tempDF = tempDF.loc[plotL]
print(len(tempDF), 'target reactions:')
tempL = statDF.loc[:, statDF.columns.str.contains('Pval$')].columns.tolist()
tempS1 = set()
for col_n in tempL:
    tempS = tempDF[col_n]
    tempS = tempS.loc[tempS<0.05]
    print(' - '+col_n+':', len(tempS))
    tempS1 = tempS1 | set(tempS.index.tolist())
tempDF = tempDF.loc[tempS1]
print(' -> Unique reactions:', len(tempDF))
tempL = statDF.loc[:, statDF.columns.str.contains('_Pval$')].columns.tolist()
tempDF = tempDF.sort_values(by=tempL, ascending=True)
display(tempDF)
plotL = tempDF.index.tolist()#Update by the sorted order

#Prepare DF for plot
tempDF = fluxDF.reset_index().melt(var_name='ModelID', value_name='Flux', id_vars='ReactionID')
tempDF1 = sampleDF.reset_index()[['ModelID', 'Intervention']]
tempDF = pd.merge(tempDF, tempDF1, on='ModelID', how='left')

#Prepare label and color
tempD0 = {'Ctrl1':'Control', 'Aca':'Acarbose',
          'Rapa':'Rapamycin', 'CRdiet':'CR diet'}
tempDF['Group'] = tempDF['Intervention'].map(tempD0)
tempD1 = {'Control':'tab:blue', 'Acarbose':'tab:red',
          'Rapamycin':'tab:purple', 'CR diet':'tab:olive'}

#Prepare adjusted P-value across groups
tempDF0 = statDF.loc[:, statDF.columns.str.contains('-vs-.*_AdjPval$')]
tempDF0.columns = tempDF0.columns.str.replace('_AdjPval$', '')

#Visualize each representative
for rank_i in range(len(plotL)):
    print(' - Rank '+str(rank_i+1)+' (sort by nominal P-value):')
    rxn = plotL[rank_i]
    #Check reaction summary
    tempDF1 = pd.DataFrame(rxnDF.loc[rxn]).T
    display(tempDF1)
    
    #Select Flux
    tempDF1 = tempDF.loc[tempDF['ReactionID']==rxn]
    
    #Check Flux summary
    tempDF2 = tempDF1.groupby(['Group'])['Flux'].agg(['count', 'mean', 'std'])
    tempL1 = []
    tempL2 = []
    for row_n in tempDF2.index.tolist():
        count, mean, std = tempDF2.loc[row_n]
        tempL1.append(mean - 1.96*std/np.sqrt(count))
        tempL2.append(mean + 1.96*std/np.sqrt(count))
    tempDF2['0.025'] = tempL1
    tempDF2['0.975'] = tempL2
    tempDF2 = tempDF2.loc[list(tempD1.keys())]#Sort
    display(tempDF2)
    
    #Prepare significance labels
    ##Retrieve statistical significance
    tempS = tempDF0.loc[rxn, tempDF0.columns.str.contains('-vs-')]
    tempS.name = 'AdjPval'
    ##Clean
    tempDF2 = tempS.index.to_series().str.split(pat='-vs-', expand=True)
    tempDF2 = tempDF2.rename(columns={0:'Contrast', 1:'Baseline'})
    tempDF2 = pd.merge(tempDF2, tempS, left_index=True, right_index=True, how='left')
    tempDF2['Contrast'] = tempDF2['Contrast'].map(tempD0)
    tempDF2['Baseline'] = tempDF2['Baseline'].map(tempD0)
    ##Convert p-value to label
    tempL = []
    for row_i in range(len(tempDF2)):
        pval = tempDF2['AdjPval'].iloc[row_i]
        if pval<0.001:
            tempL.append('***')
        elif pval<0.01:
            tempL.append('**')
        elif pval<0.05:
            tempL.append('*')
        else:
            pval_text = Decimal(str(pval)).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
            tempL.append(r'$P$ = '+str(pval_text))
    tempDF2['SignifLabel'] = tempL
    ##Add the y-position level in figure
    tempDF2['YposLevel'] = [0, 1, 2]
    display(tempDF2)
    
    #Visualization
    sns.set(style='ticks', font='Arial', context='talk')
    plt.figure(figsize=(2, 4))
    sns.boxplot(data=tempDF1, x='Group', y='Flux', order=list(tempD1.keys()), palette=tempD1,
                dodge=False, showfliers=False, showcaps=True, notch=False)
    sns.pointplot(data=tempDF1, x='Group', y='Flux', order=list(tempD1.keys()),
                  markers='x', color='0.25',#To match with the default sns.boxplot saturation=0.75
                  dodge=False, join=False, estimator=np.mean, ci=None)#Add only the mean
    p = sns.stripplot(data=tempDF1, x='Group', y='Flux',
                      order=list(tempD1.keys()), palette=tempD1, dodge=False, jitter=0.3,
                      size=5, edgecolor='black', linewidth=1, **{'marker':'o', 'alpha':0.5})
    ##Set axis
    sns.despine()
    plt.setp(p.get_xticklabels(), rotation=70, horizontalalignment='right',
             verticalalignment='center', rotation_mode='anchor')
    ###Reset the axis range for the labels
    ymin, ymax = p.get_ylim()
    tempL = p.get_yticks().tolist()
    yinter = int(tempL[1]) - int(tempL[0])
    tempL = [int(y) for y in tempL if (y>ymin)&(y<ymax)]#seaborn seems to prepare wider range!?
    p.set(ylim=(ymin, ymax+(yinter*1.5)), yticks=tempL)
    p.set_yticklabels(['{:,}'.format(int(y)) for y in tempL])
    ##Add significance labels
    aline_ymin = tempDF1['Flux'].max()
    aline_ymargin = yinter/2
    for row_i in range(len(tempDF2)):
        #Baseline
        group_0 = tempDF2['Baseline'].iloc[row_i]
        index_0 = list(tempD1.keys()).index(group_0)
        xcoord_0 = index_0
        #Contrast
        group_1 = tempDF2['Contrast'].iloc[row_i]
        index_1 = list(tempD1.keys()).index(group_1)
        xcoord_1 = index_1
        #Standard point of marker
        xcoord = (xcoord_0+xcoord_1)/2
        ycoord = aline_ymin + aline_ymargin*tempDF2['YposLevel'].iloc[row_i]
        label = tempDF2['SignifLabel'].iloc[row_i]
        #Add annotation lines
        aline_offset = yinter/10
        aline_length = yinter/10 + aline_offset
        plt.plot([xcoord_0, xcoord_0, xcoord_1, xcoord_1],
                 [ycoord+aline_offset, ycoord+aline_length, ycoord+aline_length, ycoord+aline_offset],
                 lw=1.5, c='k')
        #Add annotation text
        if label in ['***', '**', '*']:
            text_offset = yinter/4
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='center',
                       fontsize='medium', color='k')
        else:
            text_offset = yinter/5
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='bottom',
                       fontsize='x-small', color='k')
    ##Set axis label and title
    plt.setp(p, xlabel='', ylabel='Sample flux value [a.u.]')
    p.set_title(rxn, {'fontsize':'medium'})
    ##Save
    fileDir = './ExportFigures/'
    ipynbName = '230502_LC-M001-related-TrOmics-GEM-ver3-15_FluxAnalysis_'
    fileName = 'flux-boxplot('+rxn+').pdf'
    plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04, transparent=True)
    plt.show()
    print('')

# — End of notebook —