# GEM Reconstruction with LC M001-related Transcriptomics — Flux Comparison

***by Kengo Watanabe***  

Priyanka Baloni reconstructed mouse genome-scale metabolic models (GEMs; Khodaee, S. et al. Sci. Rep. 2020) with the Longevity Consortium (LC) M001-related transcriptomics dataset (Tyshkovskiy, A. et al. Cell Metab. 2019), and calculated flux values using flux variability analysis (FVA).  
–> In this notebook, the calculated maximum flux values are compared between the context-specific GEMs (i.e., sample groups).  
> To maintain the consistency with the DIRAC analyses, statistical tests are performed in a different notebook with R kernel.  

Input:  
* Flux data: merged_file_Maxflux_reactions_Intervention.csv  
* Sample–model metadata: RNAseq_samples_annotation.csv  
* Reaction metadata (iMM1865): iMM1685_data.xlsx  
* Gene mapping metadata: iMM1685_data-Max.xlsx  
* Statistical test summary: 220610_LC-M001-related-transcriptomics-GEM-ver2-4_StatisticalTest_flux-comparison.xlsx (Supplementary Data 3)  

Output:  
* Cleaned flux data, which is used in stasitical analysis (R sub-notebook)  
* Cleaned sample–model metadata, which is used in R sub-notebook  
* Cleaned reaction metadata, which is incorporated into Supplementary Data 3 in R sub-notebook  
* Figure 4a–d  
* Supplementary Figure 2c–f  

Original notebook (memo for my future tracing):  
* dalek:[JupyterLab HOME]/220606_LC-M001-related-transcriptomics-GEM/220610_LC-M001-related-transcriptomics-GEM-ver2-4_FluxAnalysis.ipynb  

In [None]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
#For Arial font
#!conda install -c conda-forge -y mscorefonts
##-> The below was also needed in matplotlib 3.4.2
#import shutil
#import matplotlib
#shutil.rmtree(matplotlib.get_cachedir())
import warnings
warnings.filterwarnings('ignore')
from IPython.display import display
import time

from sklearn.preprocessing import RobustScaler
from sklearn.decomposition import PCA
from decimal import Decimal, ROUND_HALF_UP
from sklearn.preprocessing import MaxAbsScaler
from sklearn.preprocessing import StandardScaler
import re
import matplotlib.patches as mpatches
#!pip install venn
from venn import venn

!conda list

## 1. Clean the original tables

### 1-1. Maximum flux value (raw) data

In [None]:
#Import flux data
fileDir = './ImportData/'
fileName = 'merged_file_Maxflux_reactions_Intervention.csv'
tempDF = pd.read_csv(fileDir+fileName)
tempDF = tempDF.rename(columns={'V1':'ReactionID'})
tempDF = tempDF.set_index('ReactionID')

#Clean the model label
tempDF1 = tempDF.columns.to_series().str.split(pat='_', expand=True)
tempDF1 = tempDF1.rename(columns={0:'Prefix', 1:'Model_i'})
tempDF.columns = 'Model_'+tempDF1['Model_i'].str.zfill(2)
tempDF.columns.set_names('', inplace=True)

#Sort
tempL = ['Model_'+str(i+1).zfill(2) for i in range(len(tempDF.columns))]
tempDF = tempDF[tempL]

display(tempDF)

fluxDF = tempDF

In [None]:
#Save
fileDir = './ExportData/'
ipynbName = '220610_LC-M001-related-transcriptomics-GEM-ver2-4_FluxAnalysis_'
fileName = 'max-flux-raw-data.tsv'
fluxDF.to_csv(fileDir+ipynbName+fileName, index=True, sep='\t')

### 1-2. Sample–model metadata

> Prepare control-based category, which enables to easily make comparison labels.  

In [None]:
#Import sample-model metadata
fileDir = './ImportData/'
fileName = 'RNAseq_samples_annotation.csv'
tempDF = pd.read_csv(fileDir+fileName)
display(tempDF.describe(include='all'))

#Clean the model label
tempDF1 = tempDF['Model'].str.split(pat='_', expand=True)
tempDF1 = tempDF1.rename(columns={0:'Prefix', 1:'Model_i'})
tempDF['ModelID'] = 'Model_'+tempDF1['Model_i'].str.zfill(2)
tempDF = tempDF.set_index('ModelID')

#Clean the condition labels
tempDF = tempDF.rename(columns={'ID':'SampleID'})
tempDF = tempDF[['SampleID', 'Strain', 'Age', 'Sex', 'Intervention']]
tempD = {'UMHet3':'U', '(C57BL6/JxBALB/cByJ)/F2':'C', '(PW/JxC3H/HeJ)/F2':'P'}
tempDF['Strain'] = tempDF['Strain'].map(tempD)
tempDF['Age'] = tempDF['Age'].astype(str).str.zfill(2)+'M'
tempD = {'No intervention':'Con1', 'Acarbose':'Acar',
         '17-alpha-estradiol':'17aE', 'Protandim':'Prot',
         'Rapamycin':'Rapa', 'CR':'CalR',
         'MR_control':'Con2', 'MR':'MetR',
         'GHRKO_control':'GHRw', 'GHRKO':'GHRk',
         'Snell_control':'SneW', 'Snell':'SneD'}
tempDF['Intervention'] = tempDF['Intervention'].map(tempD)
tempDF['Group'] = tempDF['Strain']+'-'+tempDF['Age']+'-'+tempDF['Sex']+'-'+tempDF['Intervention']

#Corresponding control
tempD = {'Con1':'Con1', 'Acar':'Con1', '17aE':'Con1', 'Prot':'Con1', 'Rapa':'Con1', 'CalR':'Con1',
         'Con2':'Con2', 'MetR':'Con2', 'GHRw':'GHRw', 'GHRk':'GHRw',
         'SneW':'SneW', 'SneD':'SneW'}
tempDF['Control'] = tempDF['Strain']+'-'+tempDF['Age']+'-'+tempDF['Sex']+'-'+tempDF['Intervention'].map(tempD)

#Categorize conditions
tempDF['Category'] = tempDF['Control'].str.split(pat='-', expand=True).iloc[:, 3]

display(tempDF)
tempL = tempDF.sort_values(by='Category', ascending=True)['Category'].unique()
print(' ->', len(tempL), 'categories\n')
for category in tempL:
    tempDF1 = tempDF.loc[tempDF['Category']==category]
    print(category+' category total:', len(tempDF1))
    for condition in ['Intervention']:
        display(tempDF1[condition].value_counts())
    print('')

sampleDF = tempDF
categoryL = tempL

In [None]:
#Save
fileDir = './ExportData/'
ipynbName = '220610_LC-M001-related-transcriptomics-GEM-ver2-4_FluxAnalysis_'
fileName = 'sample-metadata.tsv'
sampleDF.to_csv(fileDir+ipynbName+fileName, index=True, sep='\t')

### 1-3. Reaction metadata

#### 1-3-1. iMM1865

In [None]:
#Import reaction metadata of the used model
fileDir = './ImportData/'
fileName = 'iMM1685_data.xlsx'
sheetName = 'Sheet1'
tempDF = pd.read_excel(fileDir+fileName, sheet_name=sheetName, engine='openpyxl')
tempDF = tempDF.rename(columns={'Reaction':'ReactionID', 'Unnamed: 3':'Reaction'})
tempDF = tempDF.set_index('ReactionID')

display(tempDF)
print(' -> Unique rxn ID:', len(tempDF.index.unique()))

rxnDF = tempDF

In [None]:
#Check
tempS1 = rxnDF['Subsystem'].value_counts()
tempS1.name = 'Overall'
tempS2 = rxnDF['Subsystem'].loc[fluxDF.index].value_counts()
tempS2.name = 'FluxCalculated'
tempDF = pd.merge(tempS1, tempS2, left_index=True, right_index=True, how='left')
display(tempDF.describe())
display(tempDF.loc[tempS1>100])

#### 1-3-2. Retrieve reaction name with BiGG Models API

In [None]:
#Download the latest file
!wget -O ./ImportData/bigg_models_reactions.txt http://bigg.ucsd.edu/static/namespace/bigg_models_reactions.txt

In [None]:
#Import reaction metadata
fileDir = './ImportData/'
fileName = 'bigg_models_reactions.txt'
tempDF = pd.read_csv(fileDir+fileName, sep='\t')
tempDF = tempDF.rename(columns={'bigg_id':'ReactionID', 'name':'ReactionName'})
tempDF = tempDF.set_index('ReactionID')
print('Original nrow:', len(tempDF))
print(' -> Unique rxn ID:', len(tempDF.index.unique()))

#Add the reaction name
tempDF = pd.merge(rxnDF, tempDF['ReactionName'], left_index=True, right_index=True, how='left')
tempDF = tempDF[['Reaction', 'ReactionName', 'GPR', 'Subsystem']]

display(tempDF)
tempDF1 = tempDF.loc[tempDF['ReactionName'].isnull()]
print(' -> No rxn name:', len(tempDF1))
display(tempDF1)

rxnDF = tempDF

#### 1-3-3. Retrieve molecule name with BiGG Models API

In [None]:
#Take all molecules in the reactions
tempS = rxnDF['Reaction'].str.replace('^\'', '')
tempS = tempS.str.replace('\'$', '')
tempDF = tempS.str.split(pat=' ', expand=True)
print('Expanded DF:', tempDF.shape)
t_start = time.time()
tempS = set()
for col_n in tempDF.columns.tolist():
    tempS1 = tempDF[col_n].dropna()#None is considered an NA value in pandas
    tempS1 = tempS1.loc[tempS1!='']#Delete empty
    tempS1 = tempS1.loc[tempS1.str.contains('\\[.*\\]')]
    #Character in square brackets indicates intracellular localization
    tempL = tempS1.str.replace('\\[.*\\]', '').tolist()
    tempS = tempS | set(tempL)
t_elapsed = time.time() - t_start
print(' -> Elapsed time:', round(t_elapsed//60), 'min', round(t_elapsed%60, 1), 'sec')
print(' -> Unique molecule:', len(tempS))
print(list(tempS)[:25])

molS = tempS

In [None]:
#Download the latest file
!wget -O ./ImportData/bigg_models_metabolites.txt http://bigg.ucsd.edu/static/namespace/bigg_models_metabolites.txt

In [None]:
#Import metabolite metadata
fileDir = './ImportData/'
fileName = 'bigg_models_metabolites.txt'
tempDF = pd.read_csv(fileDir+fileName, sep='\t')
tempDF = tempDF.rename(columns={'universal_bigg_id':'MoleculeID', 'name':'MoleculeName'})
tempDF = tempDF.set_index('MoleculeID')
print('Original nrow:', len(tempDF))
print(' -> Unique molecule ID:', len(tempDF.index.unique()))

#Retrieve the molecules within the used model
print('Molecules within the used model', len(molS))
tempS = tempDF['MoleculeName']
tempS = tempS.drop_duplicates()
tempS = tempS.loc[tempS.index.isin(molS)]

tempS = tempS.sort_index(ascending=True)
display(tempS)

molS = tempS

#### 1-3-4. Gene mapping table

In [None]:
#Import reaction metadata of the used model
fileDir = './ImportData/'
fileName = 'iMM1685_data-Max.xlsx'
sheetName = 'Sheet1'
tempDF = pd.read_excel(fileDir+fileName, sheet_name=sheetName, engine='openpyxl', header=2)
tempDF = tempDF.rename(columns={'Entrez ID':'EntrezID', 'Gene symbol':'GeneSymbol',
                                'Ensembl gene ID':'EnsemblID'})
tempDF = tempDF[['EntrezID', 'EnsemblID', 'GeneSymbol']]
tempDF = tempDF.dropna()
tempDF['EntrezID'] = tempDF['EntrezID'].astype('int64')
tempDF = tempDF.set_index('EntrezID')
tempDF = tempDF.sort_index(ascending=True)

display(tempDF)
print(' -> Unique Entrez ID:', len(tempDF.index.unique()))
print(' -> Unique Ensembl ID:', len(tempDF['EnsemblID'].unique()))
print(' -> Unique gene name:', len(tempDF['GeneSymbol'].unique()))

geneDF = tempDF

#### 1-3-5. Save as a single .xlsx file

In [None]:
#Prepare a new .xlsx file
fileDir = './ExportData/'
ipynbName = '220610_LC-M001-related-transcriptomics-GEM-ver2-4_FluxAnalysis_'
fileName = 'reaction-metadata.xlsx'
sheetName = 'Reaction'
rxnDF.to_excel(fileDir+ipynbName+fileName, sheet_name=sheetName, header=True, index=True)

#Appended the others to the above .xlsx file
with pd.ExcelWriter(fileDir+ipynbName+fileName, mode='a', engine='openpyxl') as writer:
    sheetName = 'Molecule'
    molS.to_excel(writer, sheet_name=sheetName, header=True, index=True)
    sheetName = 'Gene'
    geneDF.to_excel(writer, sheet_name=sheetName, header=True, index=True)

## 2. Check data structure of the flux values

> Note: this lengthy section was eliminated from the original notebook, because the outputs were not used in the manuscript.  

## 3. Compare flux values

> 1. Test the rank of maximum flux values across interventions for each reaction using Kruskal–Wallis H-test.  
> 2. Then, perform post-hoc comparisons between control vs. each intervention using Dunn's test.  
>
> To increase the statistical power, samples are grouped by intervention (i.e., strain, age, and sex are pooled) in the Kruskal–Wallis H-test. Note that Kruskal-Wallis H-test can shrink the variance utilizing all samples (per reaction), whose statistical power is better than the repeated Mann–Whitney U-tests (a.k.a., Wilcoxon rank-sum tests) in the case of small sample size. Although tricky, the P-value adjustment in (1) is performed across all reactions under the assumption that reactions are independent, which would be more conservative and less likely raise referees' eyebrows than using nominal P-value cutoff. Also, the reactions assigned with invariable flux values across all groups are eliminated from tests in advance, which can reduce the number of hypotheses. Because the post-hoc comparisons (2) are to address the effect of each intervention within a specific reaction, the P-values are adjusted across interventions only within the reaction (not across reactions). 

# — †1. Go to the top of the R sub-notebook —  

### 3-1. Kruskal–Wallis H-test (Flux ~ Intervention), followed by Dunn's test (Intervention)

#### 3-1-1/2/3/4. Import the summary tables

In [None]:
#Import the summary tables
fileDir = './ExportData/'
ipynbName = '220610_LC-M001-related-transcriptomics-GEM-ver2-4_StatisticalTest_'
fileName = 'flux-comparison.xlsx'
sheetName = 'Main_vs-each-control'
tempDF = pd.read_excel(fileDir+ipynbName+fileName, sheet_name=sheetName, engine='openpyxl')
tempDF = tempDF.set_index('ReactionID')
display(tempDF)

statDF = tempDF

#### 3-1-5. Changed reactions (Kruskal–Wallis H-test)

In [None]:
#Prepare variables in the model
tempS = statDF.loc[:, statDF.columns.str.contains('_Hstat')].columns.to_series()
variableL = tempS.str.replace('_Hstat', '').tolist()

#Changed reactions
for variable in variableL:
    tempDF = statDF.loc[statDF[variable+'_AdjPval']<0.05]
    tempDF = tempDF.sort_values(by=variable+'_AdjPval', ascending=True)
    tempL1 = tempDF.loc[:, tempDF.columns.str.contains('_FluxMedian')].columns.tolist()
    tempL2 = tempDF.loc[:, tempDF.columns.str.contains('^'+variable+'_')].columns.tolist()
    tempDF = tempDF[[col_n for subL in [['ReactionName'], tempL1, tempL2] for col_n in subL]]
    print(variable+' (adjusted P < 0.05):', len(tempDF))
    display(tempDF)
    print('')

#### 3-1-6. Changed reactions by each intervention (Dunn's test)

In [None]:
#Clean DF for adjusted P-value and z-statistic
variable = 'Intervention'
##Take adjusted P-value
tempDF1 = statDF.loc[:, statDF.columns.str.contains('-vs-.*_AdjPval$')]
tempDF1.columns = tempDF1.columns.str.replace('_AdjPval$', '')
tempDF1 = pd.merge(statDF[['ReactionName', variable+'_AdjPval']], tempDF1,
                   left_index=True, right_index=True, how='left')
tempDF1 = tempDF1.sort_values(by=variable+'_AdjPval', ascending=True)
print('Adjusted P-value:')
display(tempDF1)
##Take z-statistic
tempDF2 = statDF.loc[:, statDF.columns.str.contains('-vs-.*_zStat$')]
tempDF2.columns = tempDF2.columns.str.replace('_zStat$', '')
tempDF2 = pd.merge(statDF[['ReactionName', variable+'_AdjPval']], tempDF2,
                   left_index=True, right_index=True, how='left')
tempDF2 = tempDF2.sort_values(by=variable+'_AdjPval', ascending=True)
print('Changed direction (z-statistic):')
display(tempDF2)

pvalDF = tempDF1
diffDF = tempDF2

> –> When z-statistic is positive, the flux median of contrast is larger than that of baseline.  

In [None]:
#Extract only the changed reactions
variable = 'Intervention'
tempDF = pvalDF.loc[pvalDF[variable+'_AdjPval']<0.05]
print(variable+' (adjusted P < 0.05):', len(tempDF))

tempL = tempDF.loc[:, tempDF.columns.str.contains('-vs-')].columns.tolist()
for comparison in tempL:
    tempS = tempDF[comparison]
    tempS = tempS.loc[tempS<0.05]
    print(' - '+comparison+':', len(tempS))

> Check the changed reactions (nominal P-value < 0.05) as reference.  

In [None]:
#Extract only the changed reactions
variable = 'Intervention'
tempL = statDF.loc[statDF[variable+'_Pval']<0.05].index.tolist()
tempDF = pvalDF.loc[tempL]
print(variable+' (nominal P < 0.05):', len(tempDF))

tempL = tempDF.loc[:, tempDF.columns.str.contains('-vs-')].columns.tolist()
for comparison in tempL:
    tempS = tempDF[comparison]
    tempS = tempS.loc[tempS<0.05]
    print(' - '+comparison+':', len(tempS))

### 3-2. Visualization: clustermap

#### 3-2-1. Sample-wide

> For visualization purpose, it would be better to use the centered values based on control groups, because around zero value simply indicates no change from control.  
> –> At the same time, scaling is required for good clustering, but RobustScaler() returns just the centered (i.e., median-subtracted) values if IQR = 0. Hence, not IQR but the maximum absolete value is used for scaling. Of note, scaling eliminates the information about how largely an intervention group is different from the control group, but it rather reflects the rank-based statistical tests.  
> –> In addition, only the changed reactions are used for good clustering.  

In [None]:
#Centering per each control category
tempDF = pd.DataFrame(index=fluxDF.index)
for category in categoryL:
    tempL = sampleDF.loc[sampleDF['Category']==category].index.tolist()
    tempDF1 = fluxDF.loc[:, tempL]
    #Calculate the median of control group
    tempL = sampleDF.loc[sampleDF['Group']==sampleDF['Control']].index.tolist()
    tempDF2 = tempDF1.loc[:, tempDF1.columns.isin(tempL)]
    tempS = tempDF2.median(axis=1)
    #Centering
    tempDF1 = tempDF1.T - tempS
    #Merge
    tempDF = pd.merge(tempDF, tempDF1.T, left_index=True, right_index=True, how='left')

#Scaling
scaler = MaxAbsScaler(copy=True)
tempA = scaler.fit_transform(tempDF.T)#axis: column
tempDF1 = pd.DataFrame(data=tempA, index=tempDF.columns, columns=tempDF.index)
tempDF1 = tempDF1.T

display(tempDF1)
display(tempDF1.iloc[:, :15].describe())

fluxDF_scaled = tempDF1

In [None]:
#Prepare DF to plot only the changed reactions
variable = 'Intervention'
tempL = statDF.loc[statDF[variable+'_Pval']<0.05].index.tolist()
tempDF = fluxDF_scaled.loc[tempL]
print(variable+' (nominal P < 0.05):', len(tempDF))

#Prepare color labels for the changed reactions
regulation = 'Changed'
tempD0 = {'Con1':'Control 1', 'Acar':'Acarbose',
          '17aE':'17'+r'$\alpha$'+'-Estradiol', 'Prot':'Protandim',
          'Rapa':'Rapamycin', 'CalR':'CR diet',
          'Con2':'Control 2', 'MetR':'MR diet',
          'GHRw':'GHR WT', 'GHRk':'GHR KO',
          'SneW':'Snell WT', 'SneD':'Snell dwarf'}
tempD1 = {'Control 1':'tab:blue', 'Acarbose':'tab:red',
          '17'+r'$\alpha$'+'-Estradiol':'tab:green', 'Protandim':'tab:brown',
          'Rapamycin':'tab:purple', 'CR diet':'tab:olive',
          'Control 2':'tab:blue', 'MR diet':'tab:orange',
          'GHR WT':'tab:gray', 'GHR KO':'tab:pink',
          'Snell WT':'black', 'Snell dwarf':'tab:cyan'}
tempD2 = {}
for label in tempD1.keys():
    if tempD1[label]=='tab:blue':
        tempD2[label] = plt.get_cmap('tab20')(1)
    elif tempD1[label]=='tab:orange':
        tempD2[label] = plt.get_cmap('tab20')(3)
    elif tempD1[label]=='tab:green':
        tempD2[label] = plt.get_cmap('tab20')(5)
    elif tempD1[label]=='tab:red':
        tempD2[label] = plt.get_cmap('tab20')(7)
    elif tempD1[label]=='tab:purple':
        tempD2[label] = plt.get_cmap('tab20')(9)
    elif tempD1[label]=='tab:brown':
        tempD2[label] = plt.get_cmap('tab20')(11)
    elif tempD1[label]=='tab:pink':
        tempD2[label] = plt.get_cmap('tab20')(13)
    elif tempD1[label]=='tab:gray':
        tempD2[label] = plt.get_cmap('tab20')(15)
    elif tempD1[label]=='tab:olive':
        tempD2[label] = plt.get_cmap('tab20')(17)
    elif tempD1[label]=='tab:cyan':
        tempD2[label] = plt.get_cmap('tab20')(19)
    else:
        tempD2[label] = 'dimgray'
tempL1 = statDF.loc[statDF[variable+'_AdjPval']<0.05].index.tolist()
tempL2 = statDF.loc[statDF[variable+'_Pval']<0.05].index.tolist()
tempDF1 = pvalDF.loc[tempDF.index.tolist(), pvalDF.columns.str.contains('-vs-')]
for col_n in tempDF1.columns.tolist():
    tempS1 = pvalDF[col_n]
    tempS2 = diffDF[col_n]
    if regulation=='Changed':
        tempS2 = tempS2.loc[(tempS1<0.05)]
    elif regulation=='Increased':
        tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2>0)]
    elif regulation=='Decreased':
        tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2<0)]
    #Replace dummy value (P-value) with color code
    label = tempD0[re.sub('-vs-.*', '', col_n)]
    tempL = []
    count1 = 0#just for checking
    count2 = 0#just for checking
    for rxn in tempDF1.index.tolist():
        if rxn in tempS2.index.tolist():
            if rxn in tempL1:#Adjusted P-value
                tempL.append(tempD1[label])
                count1 += 1#Just for checking
            else:#Nominal P-value
                tempL.append(tempD2[label])
            count2 += 1#Just for checking
        else:
            tempL.append('white')
    tempDF1[label] = tempL
    print(regulation+' reaction in '+col_n)
    print(' -> adjusted P < 0.05:', count1)
    print(' -> nominal P < 0.05:', count2)
tempDF1 = tempDF1.loc[:, tempDF1.columns.isin(tempD1.keys())]
##Remove the group showing no changed reactions
for col_n in tempDF1.columns.tolist():
    tempS = tempDF1[col_n]
    tempS = (tempS=='white')
    if tempS.sum()==len(tempS):
        tempDF1 = tempDF1.drop(columns=col_n)

#Prepare color labels for the models
tempS = sampleDF['Intervention'].map(tempD0).map(tempD1)

#Clustermap
sns.set(style='ticks', font='Arial', context='talk')
cm = sns.clustermap(tempDF, method='ward', metric='euclidean', cmap='RdBu_r',
                    row_cluster=True, col_cluster=True, row_linkage=None, col_linkage=None,
                    row_colors=tempDF1, col_colors=tempS, xticklabels=False, yticklabels=False,
                    dendrogram_ratio=(0.1, 0.1), colors_ratio=(0.03, 0.03),
                    cbar_pos=(0.05, -0.075, 0.3, 0.025), cbar_kws={'orientation': 'horizontal'},
                    figsize=(12, 12), **{'vmin':-1, 'vmax':1})
cm.cax.set_title('Scaled flux value', size='medium',
                 verticalalignment='bottom', horizontalalignment='center')
cm.cax.tick_params(labelsize='small')
bottom, top = cm.ax_heatmap.get_ylim()
#cm.ax_heatmap.set_ylim(bottom + 0.5, top - 0.5)##To avoid half cut of first and last rows
hm = cm.ax_heatmap.get_position()
rd = cm.ax_row_dendrogram.get_position()
cd = cm.ax_col_dendrogram.get_position()
cm.ax_heatmap.set_position([hm.x0, hm.y0, hm.width, hm.height])
cm.ax_row_dendrogram.set_position([rd.x0, rd.y0, rd.width, rd.height])
cm.ax_col_dendrogram.set_position([cd.x0, cd.y0, cd.width, cd.height])
cm.ax_heatmap.set_xlabel('Sample')
cm.ax_heatmap.set_ylabel('Reaction')
##row/column color bar legend (axis is same with cm.cax!)
tempL = []
for group in tempD1.keys():
    tempL.append(mpatches.Patch(color=tempD1[group], label=group))
legend1 = plt.legend(handles=tempL, fontsize='small', labelspacing=0.2, ncol=2,
                     title='Intervention', title_fontsize='medium',
                     bbox_to_anchor=(1, -2), loc='lower left', borderaxespad=3.5, frameon=False)
plt.gca().add_artist(legend1)
tempL = []
for group in tempD1.keys():
    if group in tempDF1.columns.tolist():
        tempL.append(mpatches.Patch(color=tempD1[group],
                                    label='by '+group+' (adjusted '+r'$P$'+' < 0.05)'))
for group in tempD2.keys():
    if group in tempDF1.columns.tolist():
        tempL.append(mpatches.Patch(color=tempD2[group], label='(nominal '+r'$P$'+' < 0.05)'))
legend2 = plt.legend(handles=tempL, fontsize='small', labelspacing=0.2, ncol=2,
                     title='Changed reactions (vs. each control)', title_fontsize='medium',
                     bbox_to_anchor=(1, 2.5), loc='upper left', borderaxespad=3.5, frameon=False)
plt.gca().add_artist(legend2)
##Save
fileDir = './ExportFigures/'
ipynbName = '220610_LC-M001-related-transcriptomics-GEM-ver2-4_FluxAnalysis_'
fileName = 'flux-clustermap-across-samples.tif'
plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04,
                  pil_kwargs={'compression':'tiff_lzw'})
plt.show()

#Save label order
tempDF = pvalDF.loc[tempDF.index[cm.dendrogram_row.reordered_ind]]
tempDF = tempDF.reset_index()
tempDF.index.name = 'Ycoord'
display(tempDF)
fileDir = './ExportData/'
ipynbName = '220610_LC-M001-related-transcriptomics-GEM-ver2-4_FluxAnalysis_'
fileName = 'flux-clustermap-across-samples_ticks-order.tsv'
tempDF.to_csv(fileDir+ipynbName+fileName, index=True, sep='\t')

#### 3-2-2. Group-wide 1

> Sample-based clustering would be difficult to be understood.  
> –> Combine samples per intervention group. Of note, not median but mean is used as the summary statistic because the median loses many information in the case of small sample size.  

In [None]:
#Summarize samples per intervention group
tempL = ['Con1', 'Acar', '17aE', 'Prot', 'Rapa', 'CalR',
         'Con2', 'MetR', 'GHRw', 'GHRk', 'SneW', 'SneD']
tempDF = pd.DataFrame(index=fluxDF.index)
for intervention in tempL:
    tempL = sampleDF.loc[sampleDF['Intervention']==intervention].index.tolist()
    tempDF1 = fluxDF.loc[:, tempL]
    #Calculate the mean
    tempS = tempDF1.mean(axis=1)
    tempS.name = intervention
    #Merge
    tempDF = pd.merge(tempDF, tempS, left_index=True, right_index=True, how='left')
print('Summary per intervention group:')
display(tempDF.describe())

#Centering per each control category
tempDF1 = pd.DataFrame(index=tempDF.index)
for category in categoryL:
    tempL = sampleDF['Intervention'].loc[sampleDF['Category']==category].unique().tolist()
    tempDF2 = tempDF.loc[:, tempL]
    #Centering
    tempS = tempDF2[category]
    tempDF2 = tempDF2.T - tempS
    #Merge
    tempDF1 = pd.merge(tempDF1, tempDF2.T, left_index=True, right_index=True, how='left')

#Scaling
scaler = MaxAbsScaler(copy=True)
tempA = scaler.fit_transform(tempDF1.T)#axis: column
tempDF2 = pd.DataFrame(data=tempA, index=tempDF1.columns, columns=tempDF1.index)
tempDF2 = tempDF2.T
tempL = ['Con1', 'Acar', '17aE', 'Prot', 'Rapa', 'CalR',
         'Con2', 'MetR', 'GHRw', 'GHRk', 'SneW', 'SneD']
tempDF2 = tempDF2[tempL]#Sort

print('After centering and scaling:')
display(tempDF2)
display(tempDF2.iloc[:, :15].describe())

fluxDF_scaled = tempDF2

In [None]:
#Prepare DF to plot only the changed reactions
variable = 'Intervention'
tempL = statDF.loc[statDF[variable+'_Pval']<0.05].index.tolist()
tempDF = fluxDF_scaled.loc[tempL]
print(variable+' (nominal P < 0.05):', len(tempDF))

#Prepare color labels for the changed reactions
regulation = 'Changed'
tempD0 = {'Con1':'Control 1', 'Acar':'Acarbose',
          '17aE':'17'+r'$\alpha$'+'-Estradiol', 'Prot':'Protandim',
          'Rapa':'Rapamycin', 'CalR':'CR diet',
          'Con2':'Control 2', 'MetR':'MR diet',
          'GHRw':'GHR WT', 'GHRk':'GHR KO',
          'SneW':'Snell WT', 'SneD':'Snell dwarf'}
tempDF.columns = tempDF.columns.map(tempD0)
tempD1 = {'Control 1':'tab:blue', 'Acarbose':'tab:red',
          '17'+r'$\alpha$'+'-Estradiol':'tab:green', 'Protandim':'tab:brown',
          'Rapamycin':'tab:purple', 'CR diet':'tab:olive',
          'Control 2':'tab:blue', 'MR diet':'tab:orange',
          'GHR WT':'tab:gray', 'GHR KO':'tab:pink',
          'Snell WT':'black', 'Snell dwarf':'tab:cyan'}
tempD2 = {}
for label in tempD1.keys():
    if tempD1[label]=='tab:blue':
        tempD2[label] = plt.get_cmap('tab20')(1)
    elif tempD1[label]=='tab:orange':
        tempD2[label] = plt.get_cmap('tab20')(3)
    elif tempD1[label]=='tab:green':
        tempD2[label] = plt.get_cmap('tab20')(5)
    elif tempD1[label]=='tab:red':
        tempD2[label] = plt.get_cmap('tab20')(7)
    elif tempD1[label]=='tab:purple':
        tempD2[label] = plt.get_cmap('tab20')(9)
    elif tempD1[label]=='tab:brown':
        tempD2[label] = plt.get_cmap('tab20')(11)
    elif tempD1[label]=='tab:pink':
        tempD2[label] = plt.get_cmap('tab20')(13)
    elif tempD1[label]=='tab:gray':
        tempD2[label] = plt.get_cmap('tab20')(15)
    elif tempD1[label]=='tab:olive':
        tempD2[label] = plt.get_cmap('tab20')(17)
    elif tempD1[label]=='tab:cyan':
        tempD2[label] = plt.get_cmap('tab20')(19)
    else:
        tempD2[label] = 'dimgray'
tempL1 = statDF.loc[statDF[variable+'_AdjPval']<0.05].index.tolist()
tempL2 = statDF.loc[statDF[variable+'_Pval']<0.05].index.tolist()
tempDF1 = pvalDF.loc[tempDF.index.tolist(), pvalDF.columns.str.contains('-vs-')]
for col_n in tempDF1.columns.tolist():
    tempS1 = pvalDF[col_n]
    tempS2 = diffDF[col_n]
    if regulation=='Changed':
        tempS2 = tempS2.loc[(tempS1<0.05)]
    elif regulation=='Increased':
        tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2>0)]
    elif regulation=='Decreased':
        tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2<0)]
    #Replace dummy value (P-value) with color code
    label = tempD0[re.sub('-vs-.*', '', col_n)]
    tempL = []
    count1 = 0#just for checking
    count2 = 0#just for checking
    for rxn in tempDF1.index.tolist():
        if rxn in tempS2.index.tolist():
            if rxn in tempL1:#Adjusted P-value
                tempL.append(tempD1[label])
                count1 += 1#Just for checking
            else:#Nominal P-value
                tempL.append(tempD2[label])
            count2 += 1#Just for checking
        else:
            tempL.append('white')
    tempDF1[label] = tempL
    print(regulation+' reaction in '+col_n)
    print(' -> adjusted P < 0.05:', count1)
    print(' -> nominal P < 0.05:', count2)
tempDF1 = tempDF1.loc[:, tempDF1.columns.isin(tempD1.keys())]
##Remove the group showing no changed reactions
for col_n in tempDF1.columns.tolist():
    tempS = tempDF1[col_n]
    tempS = (tempS=='white')
    if tempS.sum()==len(tempS):
        tempDF1 = tempDF1.drop(columns=col_n)

#Clustermap
sns.set(style='ticks', font='Arial', context='talk')
cm = sns.clustermap(tempDF.T, method='ward', metric='euclidean', cmap='RdBu_r',
                    row_cluster=True, col_cluster=True, row_linkage=None, col_linkage=None,
                    row_colors=None, col_colors=tempDF1, xticklabels=False, yticklabels=True,
                    dendrogram_ratio=(0.02, 0.1), colors_ratio=(0.03, 0.05),
                    cbar_pos=(0.08, -0.15, 0.3, 0.05), cbar_kws={'orientation': 'horizontal'},
                    figsize=(15, 5), **{'vmin':-1, 'vmax':1})
cm.cax.set_title('Group mean of flux values\n(vs. each control; scaled)', size='medium',
                 verticalalignment='bottom', horizontalalignment='center')
cm.cax.tick_params(labelsize='small')
bottom, top = cm.ax_heatmap.get_ylim()
#cm.ax_heatmap.set_ylim(bottom + 0.5, top - 0.5)##To avoid half cut of first and last rows
hm = cm.ax_heatmap.get_position()
rd = cm.ax_row_dendrogram.get_position()
cd = cm.ax_col_dendrogram.get_position()
cm.ax_heatmap.set_position([hm.x0, hm.y0, hm.width, hm.height])
cm.ax_row_dendrogram.set_position([rd.x0, rd.y0, rd.width, rd.height])
cm.ax_col_dendrogram.set_position([cd.x0, cd.y0, cd.width, cd.height])
cm.ax_heatmap.set_xlabel('Reaction')
cm.ax_heatmap.set_ylabel('')
##row/column color bar legend (axis is same with cm.cax!)
tempL = []
for group in tempD1.keys():
    if group in tempDF1.columns.tolist():
        tempL.append(mpatches.Patch(color=tempD1[group],
                                    label='by '+group+' (adjusted '+r'$P$'+' < 0.05)'))
for group in tempD2.keys():
    if group in tempDF1.columns.tolist():
        tempL.append(mpatches.Patch(color=tempD2[group], label='(nominal '+r'$P$'+' < 0.05)'))
legend2 = plt.legend(handles=tempL, fontsize='small', labelspacing=0.2, ncol=2,
                     title='Changed reactions (vs. each control)', title_fontsize='medium',
                     bbox_to_anchor=(1, 0.5), loc='center left', borderaxespad=7, frameon=False)
plt.gca().add_artist(legend2)
##Save
#fileDir = './ExportFigures/'
#ipynbName = '220610_LC-M001-related-transcriptomics-GEM-ver2-4_FluxAnalysis_'
#fileName = 'flux-clustermap-across-interventions(vs-each-control).tif'
#plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04,
#                  pil_kwargs={'compression':'tiff_lzw'})
plt.show()

#Save label order
tempDF = pvalDF.loc[tempDF.index[cm.dendrogram_col.reordered_ind]]
tempDF = tempDF.reset_index()
tempDF.index.name = 'Xcoord'
display(tempDF)
#fileDir = './ExportData/'
#ipynbName = '220610_LC-M001-related-transcriptomics-GEM-ver2-4_FluxAnalysis_'
#fileName = 'flux-clustermap-across-interventions(vs-each-control)_ticks-order.tsv'
#tempDF.to_csv(fileDir+ipynbName+fileName, index=True, sep='\t')

> –> Remove control groups because it is meaningless after centering.  

In [None]:
#Prepare DF to plot only the changed reactions
variable = 'Intervention'
tempL = statDF.loc[statDF[variable+'_Pval']<0.05].index.tolist()
tempDF = fluxDF_scaled.loc[tempL]
print(variable+' (nominal P < 0.05):', len(tempDF))
##Remove control groups
tempDF = tempDF.drop(columns=categoryL)

#Prepare color labels for the changed reactions
regulation = 'Changed'
tempD0 = {'Con1':'Control 1', 'Acar':'Acarbose',
          '17aE':'17'+r'$\alpha$'+'-Estradiol', 'Prot':'Protandim',
          'Rapa':'Rapamycin', 'CalR':'CR diet',
          'Con2':'Control 2', 'MetR':'MR diet',
          'GHRw':'GHR WT', 'GHRk':'GHR KO',
          'SneW':'Snell WT', 'SneD':'Snell dwarf'}
tempDF.columns = tempDF.columns.map(tempD0)
tempD1 = {'Control 1':'tab:blue', 'Acarbose':'tab:red',
          '17'+r'$\alpha$'+'-Estradiol':'tab:green', 'Protandim':'tab:brown',
          'Rapamycin':'tab:purple', 'CR diet':'tab:olive',
          'Control 2':'tab:blue', 'MR diet':'tab:orange',
          'GHR WT':'tab:gray', 'GHR KO':'tab:pink',
          'Snell WT':'black', 'Snell dwarf':'tab:cyan'}
tempD2 = {}
for label in tempD1.keys():
    if tempD1[label]=='tab:blue':
        tempD2[label] = plt.get_cmap('tab20')(1)
    elif tempD1[label]=='tab:orange':
        tempD2[label] = plt.get_cmap('tab20')(3)
    elif tempD1[label]=='tab:green':
        tempD2[label] = plt.get_cmap('tab20')(5)
    elif tempD1[label]=='tab:red':
        tempD2[label] = plt.get_cmap('tab20')(7)
    elif tempD1[label]=='tab:purple':
        tempD2[label] = plt.get_cmap('tab20')(9)
    elif tempD1[label]=='tab:brown':
        tempD2[label] = plt.get_cmap('tab20')(11)
    elif tempD1[label]=='tab:pink':
        tempD2[label] = plt.get_cmap('tab20')(13)
    elif tempD1[label]=='tab:gray':
        tempD2[label] = plt.get_cmap('tab20')(15)
    elif tempD1[label]=='tab:olive':
        tempD2[label] = plt.get_cmap('tab20')(17)
    elif tempD1[label]=='tab:cyan':
        tempD2[label] = plt.get_cmap('tab20')(19)
    else:
        tempD2[label] = 'dimgray'
tempL1 = statDF.loc[statDF[variable+'_AdjPval']<0.05].index.tolist()
tempL2 = statDF.loc[statDF[variable+'_Pval']<0.05].index.tolist()
tempDF1 = pvalDF.loc[tempDF.index.tolist(), pvalDF.columns.str.contains('-vs-')]
for col_n in tempDF1.columns.tolist():
    tempS1 = pvalDF[col_n]
    tempS2 = diffDF[col_n]
    if regulation=='Changed':
        tempS2 = tempS2.loc[(tempS1<0.05)]
    elif regulation=='Increased':
        tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2>0)]
    elif regulation=='Decreased':
        tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2<0)]
    #Replace dummy value (P-value) with color code
    label = tempD0[re.sub('-vs-.*', '', col_n)]
    tempL = []
    count1 = 0#just for checking
    count2 = 0#just for checking
    for rxn in tempDF1.index.tolist():
        if rxn in tempS2.index.tolist():
            if rxn in tempL1:#Adjusted P-value
                tempL.append(tempD1[label])
                count1 += 1#Just for checking
            else:#Nominal P-value
                tempL.append(tempD2[label])
            count2 += 1#Just for checking
        else:
            tempL.append('white')
    tempDF1[label] = tempL
    print(regulation+' reaction in '+col_n)
    print(' -> adjusted P < 0.05:', count1)
    print(' -> nominal P < 0.05:', count2)
tempDF1 = tempDF1.loc[:, tempDF1.columns.isin(tempD1.keys())]
##Remove the group showing no changed reactions
for col_n in tempDF1.columns.tolist():
    tempS = tempDF1[col_n]
    tempS = (tempS=='white')
    if tempS.sum()==len(tempS):
        tempDF1 = tempDF1.drop(columns=col_n)

#Clustermap
sns.set(style='ticks', font='Arial', context='talk')
cm = sns.clustermap(tempDF.T, method='ward', metric='euclidean', cmap='RdBu_r',
                    row_cluster=True, col_cluster=True, row_linkage=None, col_linkage=None,
                    row_colors=None, col_colors=tempDF1, xticklabels=False, yticklabels=True,
                    dendrogram_ratio=(0.02, 0.1), colors_ratio=(0.03, 0.06),
                    cbar_pos=(0.08, -0.175, 0.3, 0.06), cbar_kws={'orientation': 'horizontal'},
                    figsize=(15, 4), **{'vmin':-1, 'vmax':1})
cm.cax.set_title('Group mean of flux values\n(vs. each control; scaled)', size='medium',
                 verticalalignment='bottom', horizontalalignment='center')
cm.cax.tick_params(labelsize='small')
bottom, top = cm.ax_heatmap.get_ylim()
#cm.ax_heatmap.set_ylim(bottom + 0.5, top - 0.5)##To avoid half cut of first and last rows
hm = cm.ax_heatmap.get_position()
rd = cm.ax_row_dendrogram.get_position()
cd = cm.ax_col_dendrogram.get_position()
cm.ax_heatmap.set_position([hm.x0, hm.y0, hm.width, hm.height])
cm.ax_row_dendrogram.set_position([rd.x0, rd.y0, rd.width, rd.height])
cm.ax_col_dendrogram.set_position([cd.x0, cd.y0, cd.width, cd.height])
cm.ax_heatmap.set_xlabel('Reaction')
cm.ax_heatmap.set_ylabel('')
##row/column color bar legend (axis is same with cm.cax!)
tempL = []
for group in tempD1.keys():
    if group in tempDF1.columns.tolist():
        tempL.append(mpatches.Patch(color=tempD1[group],
                                    label='by '+group+' (adjusted '+r'$P$'+' < 0.05)'))
for group in tempD2.keys():
    if group in tempDF1.columns.tolist():
        tempL.append(mpatches.Patch(color=tempD2[group], label='(nominal '+r'$P$'+' < 0.05)'))
legend2 = plt.legend(handles=tempL, fontsize='small', labelspacing=0.2, ncol=2,
                     title='Changed reactions (vs. each control)', title_fontsize='medium',
                     bbox_to_anchor=(1, 0.5), loc='center left', borderaxespad=7, frameon=False)
plt.gca().add_artist(legend2)
##Save
fileDir = './ExportFigures/'
ipynbName = '220610_LC-M001-related-transcriptomics-GEM-ver2-4_FluxAnalysis_'
fileName = 'flux-clustermap-across-interventions(vs-each-control).tif'
plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04,
                  pil_kwargs={'compression':'tiff_lzw'})
plt.show()

#Save label order
tempDF = pvalDF.loc[tempDF.index[cm.dendrogram_col.reordered_ind]]
tempDF = tempDF.reset_index()
tempDF.index.name = 'Xcoord'
display(tempDF)
fileDir = './ExportData/'
ipynbName = '220610_LC-M001-related-transcriptomics-GEM-ver2-4_FluxAnalysis_'
fileName = 'flux-clustermap-across-interventions(vs-each-control)_ticks-order.tsv'
tempDF.to_csv(fileDir+ipynbName+fileName, index=True, sep='\t')

#### 3-2-3. Group-wide 2

> Centering by each control may highlight the difference too strongly, because each control group is forced to be around zero.  
> –> Check simple standardization (Z-score) across all groups.  

In [None]:
#Summarize samples per intervention group
tempL = ['Con1', 'Acar', '17aE', 'Prot', 'Rapa', 'CalR',
         'Con2', 'MetR', 'GHRw', 'GHRk', 'SneW', 'SneD']
tempDF = pd.DataFrame(index=fluxDF.index)
for intervention in tempL:
    tempL = sampleDF.loc[sampleDF['Intervention']==intervention].index.tolist()
    tempDF1 = fluxDF.loc[:, tempL]
    #Calculate the mean
    tempS = tempDF1.mean(axis=1)
    tempS.name = intervention
    #Merge
    tempDF = pd.merge(tempDF, tempS, left_index=True, right_index=True, how='left')
print('Summary per intervention group:')
display(tempDF.describe())

#Z-score transformation
scaler = StandardScaler(copy=True, with_mean=True, with_std=True)
tempA = scaler.fit_transform(tempDF.T)#axis: column
tempDF1 = pd.DataFrame(data=tempA, index=tempDF.columns, columns=tempDF.index)
tempDF1 = tempDF1.T
tempL = ['Con1', 'Acar', '17aE', 'Prot', 'Rapa', 'CalR',
         'Con2', 'MetR', 'GHRw', 'GHRk', 'SneW', 'SneD']
tempDF1 = tempDF1[tempL]#Sort

print('After centering and scaling:')
display(tempDF1)
display(tempDF1.iloc[:, :15].describe())

fluxDF_scaled = tempDF1

In [None]:
#Prepare DF to plot only the changed reactions
variable = 'Intervention'
tempL = statDF.loc[statDF[variable+'_Pval']<0.05].index.tolist()
tempDF = fluxDF_scaled.loc[tempL]
print(variable+' (nominal P < 0.05):', len(tempDF))

#Prepare color labels for the changed reactions
regulation = 'Changed'
tempD0 = {'Con1':'Control 1', 'Acar':'Acarbose',
          '17aE':'17'+r'$\alpha$'+'-Estradiol', 'Prot':'Protandim',
          'Rapa':'Rapamycin', 'CalR':'CR diet',
          'Con2':'Control 2', 'MetR':'MR diet',
          'GHRw':'GHR WT', 'GHRk':'GHR KO',
          'SneW':'Snell WT', 'SneD':'Snell dwarf'}
tempDF.columns = tempDF.columns.map(tempD0)
tempD1 = {'Control 1':'tab:blue', 'Acarbose':'tab:red',
          '17'+r'$\alpha$'+'-Estradiol':'tab:green', 'Protandim':'tab:brown',
          'Rapamycin':'tab:purple', 'CR diet':'tab:olive',
          'Control 2':'tab:blue', 'MR diet':'tab:orange',
          'GHR WT':'tab:gray', 'GHR KO':'tab:pink',
          'Snell WT':'black', 'Snell dwarf':'tab:cyan'}
tempD2 = {}
for label in tempD1.keys():
    if tempD1[label]=='tab:blue':
        tempD2[label] = plt.get_cmap('tab20')(1)
    elif tempD1[label]=='tab:orange':
        tempD2[label] = plt.get_cmap('tab20')(3)
    elif tempD1[label]=='tab:green':
        tempD2[label] = plt.get_cmap('tab20')(5)
    elif tempD1[label]=='tab:red':
        tempD2[label] = plt.get_cmap('tab20')(7)
    elif tempD1[label]=='tab:purple':
        tempD2[label] = plt.get_cmap('tab20')(9)
    elif tempD1[label]=='tab:brown':
        tempD2[label] = plt.get_cmap('tab20')(11)
    elif tempD1[label]=='tab:pink':
        tempD2[label] = plt.get_cmap('tab20')(13)
    elif tempD1[label]=='tab:gray':
        tempD2[label] = plt.get_cmap('tab20')(15)
    elif tempD1[label]=='tab:olive':
        tempD2[label] = plt.get_cmap('tab20')(17)
    elif tempD1[label]=='tab:cyan':
        tempD2[label] = plt.get_cmap('tab20')(19)
    else:
        tempD2[label] = 'dimgray'
tempL1 = statDF.loc[statDF[variable+'_AdjPval']<0.05].index.tolist()
tempL2 = statDF.loc[statDF[variable+'_Pval']<0.05].index.tolist()
tempDF1 = pvalDF.loc[tempDF.index.tolist(), pvalDF.columns.str.contains('-vs-')]
for col_n in tempDF1.columns.tolist():
    tempS1 = pvalDF[col_n]
    tempS2 = diffDF[col_n]
    if regulation=='Changed':
        tempS2 = tempS2.loc[(tempS1<0.05)]
    elif regulation=='Increased':
        tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2>0)]
    elif regulation=='Decreased':
        tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2<0)]
    #Replace dummy value (P-value) with color code
    label = tempD0[re.sub('-vs-.*', '', col_n)]
    tempL = []
    count1 = 0#just for checking
    count2 = 0#just for checking
    for rxn in tempDF1.index.tolist():
        if rxn in tempS2.index.tolist():
            if rxn in tempL1:#Adjusted P-value
                tempL.append(tempD1[label])
                count1 += 1#Just for checking
            else:#Nominal P-value
                tempL.append(tempD2[label])
            count2 += 1#Just for checking
        else:
            tempL.append('white')
    tempDF1[label] = tempL
    print(regulation+' reaction in '+col_n)
    print(' -> adjusted P < 0.05:', count1)
    print(' -> nominal P < 0.05:', count2)
tempDF1 = tempDF1.loc[:, tempDF1.columns.isin(tempD1.keys())]
##Remove the group showing no changed reactions
for col_n in tempDF1.columns.tolist():
    tempS = tempDF1[col_n]
    tempS = (tempS=='white')
    if tempS.sum()==len(tempS):
        tempDF1 = tempDF1.drop(columns=col_n)

#Clustermap
sns.set(style='ticks', font='Arial', context='talk')
cm = sns.clustermap(tempDF.T, method='ward', metric='euclidean', cmap='RdBu_r',
                    row_cluster=True, col_cluster=True, row_linkage=None, col_linkage=None,
                    row_colors=None, col_colors=tempDF1, xticklabels=False, yticklabels=True,
                    dendrogram_ratio=(0.02, 0.1), colors_ratio=(0.03, 0.05),
                    cbar_pos=(0.08, -0.15, 0.3, 0.05), cbar_kws={'orientation': 'horizontal'},
                    figsize=(15, 5), **{'vmin':-3, 'vmax':3})
cm.cax.set_title('Group mean of flux values\n('+r'$Z$'+'-score)', size='medium',
                 verticalalignment='bottom', horizontalalignment='center')
cm.cax.tick_params(labelsize='small')
bottom, top = cm.ax_heatmap.get_ylim()
#cm.ax_heatmap.set_ylim(bottom + 0.5, top - 0.5)##To avoid half cut of first and last rows
hm = cm.ax_heatmap.get_position()
rd = cm.ax_row_dendrogram.get_position()
cd = cm.ax_col_dendrogram.get_position()
cm.ax_heatmap.set_position([hm.x0, hm.y0, hm.width, hm.height])
cm.ax_row_dendrogram.set_position([rd.x0, rd.y0, rd.width, rd.height])
cm.ax_col_dendrogram.set_position([cd.x0, cd.y0, cd.width, cd.height])
cm.ax_heatmap.set_xlabel('Reaction')
cm.ax_heatmap.set_ylabel('')
##row/column color bar legend (axis is same with cm.cax!)
tempL = []
for group in tempD1.keys():
    if group in tempDF1.columns.tolist():
        tempL.append(mpatches.Patch(color=tempD1[group],
                                    label='by '+group+' (adjusted '+r'$P$'+' < 0.05)'))
for group in tempD2.keys():
    if group in tempDF1.columns.tolist():
        tempL.append(mpatches.Patch(color=tempD2[group], label='(nominal '+r'$P$'+' < 0.05)'))
legend2 = plt.legend(handles=tempL, fontsize='small', labelspacing=0.2, ncol=2,
                     title='Changed reactions (vs. each control)', title_fontsize='medium',
                     bbox_to_anchor=(1, 0.5), loc='center left', borderaxespad=7, frameon=False)
plt.gca().add_artist(legend2)
##Save
fileDir = './ExportFigures/'
ipynbName = '220610_LC-M001-related-transcriptomics-GEM-ver2-4_FluxAnalysis_'
fileName = 'flux-clustermap-across-interventions.tif'
plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04,
                  pil_kwargs={'compression':'tiff_lzw'})
plt.show()

#Save label order
tempDF = pvalDF.loc[tempDF.index[cm.dendrogram_col.reordered_ind]]
tempDF = tempDF.reset_index()
tempDF.index.name = 'Xcoord'
display(tempDF)
fileDir = './ExportData/'
ipynbName = '220610_LC-M001-related-transcriptomics-GEM-ver2-4_FluxAnalysis_'
fileName = 'flux-clustermap-across-interventions_ticks-order.tsv'
tempDF.to_csv(fileDir+ipynbName+fileName, index=True, sep='\t')

> –> This representation is informative for us, but probably normal readers would feel difficulty to understand the figure message...  

### 3-3. Visualization: venn diagram

In [None]:
#Prepare label and color
tempD0 = {'Con1':'Control 1', 'Acar':'Acarbose',
          '17aE':'17'+r'$\alpha$'+'-Estradiol', 'Prot':'Protandim',
          'Rapa':'Rapamycin', 'CalR':'CR diet',
          'Con2':'Control 2', 'MetR':'MR diet',
          'GHRw':'GHR WT', 'GHRk':'GHR KO',
          'SneW':'Snell WT', 'SneD':'Snell dwarf'}
tempD1 = {'Acarbose':'tab:red', 'Rapamycin':'tab:purple', 'MR diet':'tab:orange',
          'GHR KO':'tab:pink', 'Snell dwarf':'tab:cyan'}

#Visualization per direction
for regulation in ['Changed', 'Increased', 'Decreased']:
    #Prepare reaction sets
    variable = 'Intervention'
    tempL = statDF.loc[statDF[variable+'_AdjPval']<0.05].index.tolist()
    tempDF1 = pvalDF.loc[tempL, pvalDF.columns.str.contains('-vs-')]
    tempDF2 = diffDF.loc[tempL, diffDF.columns.str.contains('-vs-')]
    tempD2 = {}
    count = 0
    for col_n in tempDF1.columns.tolist():
        tempS1 = tempDF1[col_n]
        tempS2 = tempDF2[col_n]
        if regulation=='Changed':
            tempS2 = tempS2.loc[(tempS1<0.05)]
        elif regulation=='Increased':
            tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2>0)]
        elif regulation=='Decreased':
            tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2<0)]
        if len(tempS2)>0:
            count += 1
        label = tempD0[re.sub('-vs-.*', '', col_n)]
        tempD2[label] = set(tempS2.index.tolist())
    if count==0:
        print(regulation+' reactions: no significance in any comparisons')
        continue
    ##Sort to make consistent order in manual legend generation
    tempD = {}
    count = 0
    for label in tempD1.keys():
        tempD[label] = tempD2[label]
        if len(tempD2[label])>0:
            count += 1
    
    #Skip the followings if no significant reaction
    if count==0:
        print(regulation+' reactions: no significance in any target comparisons')
        continue
    
    #Venn diagram
    sns.set(style='ticks', font='Arial', context='talk')
    fig, ax = plt.subplots(figsize=(4, 4))
    venn(tempD, fmt='{size:,}', cmap=list(tempD1.values()), legend_loc=None, ax=ax)
    plt.setp(ax, ylim=(0.0, 1.0))#Otherwise, weird space...
    ##Add legend annotation
    x_coord = [0.25, 0.75, 0.9, 0.5, 0.1]
    y_coord = [0.8, 0.8, 0.35, 0.0, 0.35]
    h_align = ['right', 'left', 'left', 'center', 'right']
    v_align = ['bottom', 'bottom', 'top', 'top', 'top']
    for i in range(len(tempD1)):
        key = list(tempD1.keys())[i]
        total = f'{len(tempD[key]):,}'
        ax.text(x_coord[i], y_coord[i], key+'\n('+total+' reactions)',
                fontsize='small', multialignment='center',
                horizontalalignment=h_align[i], verticalalignment=v_align[i],
                bbox={'boxstyle':'round', 'facecolor':tempD1[key], 'pad':0.2, 'alpha':0.5})
    title = regulation+' reactions (vs. each control)'
    ax.set_title(title, fontsize='medium')
    ##Save
    if regulation!='Changed':
        fileDir = './ExportFigures/'
        ipynbName = '220610_LC-M001-related-transcriptomics-GEM-ver2-4_FluxAnalysis_'
        fileName = 'rxn-venn-'+regulation.lower()+'.tif'
        plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04,
                          pil_kwargs={'compression':'tiff_lzw'})
    plt.show()

In [None]:
#Export reaction list in each subset in the venn diagram
for regulation in ['Increased', 'Decreased']:
    #Prepare reaction sets
    variable = 'Intervention'
    tempL1 = statDF.loc[statDF[variable+'_AdjPval']<0.05].index.tolist()
    tempL2 = ['Acar-vs-Con1', 'Rapa-vs-Con1', 'MetR-vs-Con2', 'GHRk-vs-GHRw', 'SneD-vs-SneW']
    tempDF1 = pvalDF.loc[tempL1, tempL2]
    tempDF2 = diffDF.loc[tempL1, tempL2]
    tempD = {}
    count = 0
    for col_n in tempDF1.columns.tolist():
        tempS1 = tempDF1[col_n]
        tempS2 = tempDF2[col_n]
        if regulation=='Changed':
            tempS2 = tempS2.loc[(tempS1<0.05)]
        elif regulation=='Increased':
            tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2>0)]
        elif regulation=='Decreased':
            tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2<0)]
        if len(tempS2)>0:
            count += 1
        tempD[col_n] = set(tempS2.index.tolist())
    
    #Skip the followings if no significant reaction
    if count==0:
        print(regulation+' reactions: no significance in any target comparisons')
        continue
    
    #Prepare a new .xlsx file (dummy README)
    tempL1 = [len(tempD[key]) for key in tempD.keys()]
    tempDF = pd.DataFrame({'Group':tempD.keys(), 'nReactions':tempL1})
    tempDF = tempDF.reset_index().rename(columns={'index':'VennOrder'})
    tempDF['VennOrder'] = tempDF['VennOrder'] + 1
    fileDir = './ExportData/'
    ipynbName = '220610_LC-M001-related-transcriptomics-GEM-ver2-4_FluxAnalysis_'
    fileName = 'rxn-venn-'+regulation.lower()+'.xlsx'
    tempDF.to_excel(fileDir+ipynbName+fileName, sheet_name='README', header=True, index=False)
    display(tempDF)#Check
    
    t_start = time.time()
    #Extract overall set
    for key_i in range(len(tempD)):
        key = list(tempD.keys())[key_i]
        tempS = tempD[key]
        tempDF = statDF.loc[statDF.index.isin(tempS)]
        #Save summary table by appended to the above .xlsx file
        ##Prepare sheet name
        tempL1 = ['NA' for i in range(len(tempD))]
        tempL1[key_i] = '1'
        setName = '('+','.join(tempL1)+')'
        with pd.ExcelWriter(fileDir+ipynbName+fileName, mode='a', engine='openpyxl') as writer:
            tempDF.to_excel(writer, sheet_name=setName, header=True, index=True)
        print(' - '+setName+':', len(tempDF))
    
    #Extract subset
    tempL1 = ['1', '0']
    tempL2 = [[k1, k2, k3, k4, k5] for k1 in tempL1 for k2 in tempL1 for k3 in tempL1 for k4 in tempL1 for k5 in tempL1]
    #tempL2.remove(['0', '0', '0', '0', '0'])
    for tempL1 in tempL2:
        #Positive reaction set
        tempL3 = [list(tempD.values())[key_i] for key_i, binary in enumerate(tempL1) if binary=='1']
        tempS1 = set(statDF.loc[statDF[variable+'_AdjPval']<0.05].index.tolist())#Initialize
        for tempS in tempL3:
            tempS1 = tempS1 & tempS
        #Negative reaction set
        tempL3 = [list(tempD.values())[key_i] for key_i, binary in enumerate(tempL1) if binary=='0']
        tempS2 = set()#Initialize
        for tempS in tempL3:
            tempS2 = tempS2 | tempS
        #Extract subset
        tempS = tempS1 - tempS2
        tempDF = statDF.loc[statDF.index.isin(tempS)]
        #Save summary table by appended to the above .xlsx file
        ##Prepare sheet name
        setName = '('+','.join(tempL1)+')'
        with pd.ExcelWriter(fileDir+ipynbName+fileName, mode='a', engine='openpyxl') as writer:
            tempDF.to_excel(writer, sheet_name=setName, header=True, index=True)
        print(' - '+setName+':', len(tempDF))
    
    t_elapsed = time.time() - t_start
    print(' - Elapsed time:', round(t_elapsed//60), 'min', round(t_elapsed%60, 1), 'sec')

### 3-4. Visualization: boxplot

#### 3-4-1. Reactions increased by MR, GHRKO, SnellDW

In [None]:
#Prepare the target reaction set
posL = ['MetR-vs-Con2', 'GHRk-vs-GHRw', 'SneD-vs-SneW']
negL = ['']
regulation = 'Increased'
tempL = statDF.loc[statDF[variable+'_AdjPval']<0.05].index.tolist()
tempDF1 = pvalDF.loc[tempL, pvalDF.columns.str.contains('-vs-')]
tempDF2 = diffDF.loc[tempL, diffDF.columns.str.contains('-vs-')]
tempS = pd.Series(np.repeat(True, len(tempDF1)), index=tempDF1.index)#Initialize
for col_n in tempDF1.columns.tolist():
    tempS1 = tempDF1[col_n]
    tempS2 = tempDF2[col_n]
    if col_n in posL:
        if regulation=='Increased':
            tempS1 = (tempS1<0.05) & (tempS2>0)
        elif regulation=='Decreased':
            tempS1 = (tempS1<0.05) & (tempS2<0)
    elif col_n in negL:
        tempS3 = (tempS1>=0.05)
        #Significance for inverse regulation
        if regulation=='Increased':
            tempS1 = (tempS1<0.05) & (tempS2<0)
        elif regulation=='Decreased':
            tempS1 = (tempS1<0.05) & (tempS2>0)
        tempS1 = tempS3 | tempS1
    else:
        tempS1 = (tempS1>=0.0)
    #Update True
    tempS = tempS & tempS1
tempL = tempS.loc[tempS.tolist()].index.tolist()
print(len(tempL), regulation.lower()+' reactions with significance in', posL, 'but not in', negL)

#Select representatives
topX = np.min([30, len(tempL)])
topX_plot = np.min([5, len(tempL)])
tempDF = statDF.loc[:, statDF.columns.str.contains('_AdjPval$')]
tempDF = pd.merge(statDF[['ReactionName', variable+'_Pval']], tempDF,
                  left_index=True, right_index=True, how='left')
tempDF = tempDF.loc[tempL].sort_values(by=variable+'_AdjPval', ascending=True)
print('Top', topX, 'reactions (sort by the main effect of '+variable+'):')
display(tempDF.iloc[:topX])
plotL = tempDF.index.tolist()[:topX_plot]

#Prepare DF for plot
tempDF = fluxDF.reset_index().melt(var_name='ModelID', value_name='Flux', id_vars='ReactionID')
tempDF1 = sampleDF.reset_index()[['ModelID', 'Intervention']]
tempDF = pd.merge(tempDF, tempDF1, on='ModelID', how='left')

#Prepare label and color
tempD0 = {'Con1':'Control 1', 'Acar':'Acarbose',
          '17aE':'17'+r'$\alpha$'+'-Estradiol', 'Prot':'Protandim',
          'Rapa':'Rapamycin', 'CalR':'CR diet',
          'Con2':'Control 2', 'MetR':'MR diet',
          'GHRw':'GHR WT', 'GHRk':'GHR KO',
          'SneW':'Snell WT', 'SneD':'Snell dwarf'}
tempDF['Group'] = tempDF['Intervention'].map(tempD0)
tempD1 = {'Control 1':'tab:blue', 'Acarbose':'tab:red',
          '17'+r'$\alpha$'+'-Estradiol':'tab:green', 'Protandim':'tab:brown',
          'Rapamycin':'tab:purple', 'CR diet':'tab:olive',
          'Control 2':'tab:blue', 'MR diet':'tab:orange',
          'GHR WT':'tab:gray', 'GHR KO':'tab:pink',
          'Snell WT':'black', 'Snell dwarf':'tab:cyan'}

#Visualize each representative
for rank_i in range(len(plotL)):
    print(' - Rank '+str(rank_i+1)+' (sort by the main effect of '+variable+'):')
    rxn = plotL[rank_i]
    #Check reaction summary
    tempDF1 = pd.DataFrame(rxnDF.loc[rxn]).T
    display(tempDF1)
    
    #Select Flux
    tempDF1 = tempDF.loc[tempDF['ReactionID']==rxn]
    
    #Check Flux summary
    tempDF2 = tempDF1.groupby(['Group'])['Flux'].agg(['count', 'mean', 'std'])
    tempL1 = []
    tempL2 = []
    for row_n in tempDF2.index.tolist():
        count, mean, std = tempDF2.loc[row_n]
        tempL1.append(mean - 1.96*std/np.sqrt(count))
        tempL2.append(mean + 1.96*std/np.sqrt(count))
    tempDF2['0.025'] = tempL1
    tempDF2['0.975'] = tempL2
    tempDF2 = tempDF2.loc[list(tempD1.keys())]#Sort
    display(tempDF2)
    
    #Prepare significance labels
    ##Retrieve statistical significance
    tempS = pvalDF.loc[rxn, pvalDF.columns.str.contains('-vs-')]
    tempS.name = 'AdjPval'
    ##Clean
    tempDF2 = tempS.index.to_series().str.split(pat='-vs-', expand=True)
    tempDF2 = tempDF2.rename(columns={0:'Contrast', 1:'Baseline'})
    tempDF2 = pd.merge(tempDF2, tempS, left_index=True, right_index=True, how='left')
    tempDF2['Contrast'] = tempDF2['Contrast'].map(tempD0)
    tempDF2['Baseline'] = tempDF2['Baseline'].map(tempD0)
    ##Convert p-value to label
    tempL = []
    for row_i in range(len(tempDF2)):
        pval = tempDF2['AdjPval'].iloc[row_i]
        if pval<0.001:
            tempL.append('***')
        elif pval<0.01:
            tempL.append('**')
        elif pval<0.05:
            tempL.append('*')
        else:
            pval_text = Decimal(str(pval)).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
            tempL.append(r'$P$ = '+str(pval_text))
    tempDF2['SignifLabel'] = tempL
    ##Add the y-position level in figure
    tempDF2['YposLevel'] = [0, 1, 2, 3, 4, 2, 3, 4]
    display(tempDF2)
    
    #Visualization
    sns.set(style='ticks', font='Arial', context='talk')
    plt.figure(figsize=(4, 4))
    sns.boxplot(data=tempDF1, x='Group', y='Flux', order=list(tempD1.keys()), palette=tempD1,
                dodge=False, showfliers=False, showcaps=True, notch=False)
    p = sns.stripplot(data=tempDF1, x='Group', y='Flux',
                      order=list(tempD1.keys()), palette=tempD1, dodge=False, jitter=0.3,
                      size=5, edgecolor='black', linewidth=1, **{'marker':'o', 'alpha':0.5})
    ##Add border line
    for xcoord in [5.5, 7.5, 9.5]:
        p.axvline(x=xcoord, **{'linestyle':'dotted', 'color':'black', 'zorder':0})
    ##Set axis
    sns.despine()
    plt.setp(p.get_xticklabels(), rotation=70, horizontalalignment='right',
             verticalalignment='center', rotation_mode='anchor')
    ###Reset the axis range for the labels
    ymin, ymax = p.get_ylim()
    tempL = p.get_yticks().tolist()
    yinter = int(tempL[1]) - int(tempL[0])
    tempL = [int(y) for y in tempL if (y>ymin)&(y<ymax)]#seaborn seems to prepare wider range!?
    p.set(ylim=(ymin, ymax+(yinter*2.5)), yticks=tempL)
    p.set_yticklabels(['{:,}'.format(int(y)) for y in tempL])
    ##Add significance labels
    aline_ymin = tempDF1['Flux'].max()
    aline_ymargin = yinter/2
    for row_i in range(len(tempDF2)):
        #Baseline
        group_0 = tempDF2['Baseline'].iloc[row_i]
        index_0 = list(tempD1.keys()).index(group_0)
        xcoord_0 = index_0
        #Contrast
        group_1 = tempDF2['Contrast'].iloc[row_i]
        index_1 = list(tempD1.keys()).index(group_1)
        xcoord_1 = index_1
        #Standard point of marker
        xcoord = (xcoord_0+xcoord_1)/2
        ycoord = aline_ymin + aline_ymargin*tempDF2['YposLevel'].iloc[row_i]
        label = tempDF2['SignifLabel'].iloc[row_i]
        #Add annotation lines
        aline_offset = yinter/10
        aline_length = yinter/10 + aline_offset
        plt.plot([xcoord_0, xcoord_0, xcoord_1, xcoord_1],
                 [ycoord+aline_offset, ycoord+aline_length, ycoord+aline_length, ycoord+aline_offset],
                 lw=1.5, c='k')
        #Add annotation text
        if label in ['***', '**', '*']:
            text_offset = yinter/4
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='center',
                       fontsize='medium', color='k')
        else:
            text_offset = yinter/5
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='bottom',
                       fontsize='x-small', color='k')
    ##Set axis label and title
    plt.setp(p, xlabel='', ylabel='Sample flux value [a.u.]')
    p.set_title(rxn, {'fontsize':'medium'})
    ##Save
    fileDir = './ExportFigures/'
    ipynbName = '220610_LC-M001-related-transcriptomics-GEM-ver2-4_FluxAnalysis_'
    fileName = 'flux-boxplot('+rxn+').tif'
    plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04,
                      pil_kwargs={'compression':'tiff_lzw'})
    plt.show()
    print('')

#### 3-4-2. Reactions decreased by MR, GHRKO, SnellDW

In [None]:
#Prepare the target reaction set
posL = ['MetR-vs-Con2', 'GHRk-vs-GHRw', 'SneD-vs-SneW']
negL = ['']
regulation = 'Decreased'
tempL = statDF.loc[statDF[variable+'_AdjPval']<0.05].index.tolist()
tempDF1 = pvalDF.loc[tempL, pvalDF.columns.str.contains('-vs-')]
tempDF2 = diffDF.loc[tempL, diffDF.columns.str.contains('-vs-')]
tempS = pd.Series(np.repeat(True, len(tempDF1)), index=tempDF1.index)#Initialize
for col_n in tempDF1.columns.tolist():
    tempS1 = tempDF1[col_n]
    tempS2 = tempDF2[col_n]
    if col_n in posL:
        if regulation=='Increased':
            tempS1 = (tempS1<0.05) & (tempS2>0)
        elif regulation=='Decreased':
            tempS1 = (tempS1<0.05) & (tempS2<0)
    elif col_n in negL:
        tempS3 = (tempS1>=0.05)
        #Significance for inverse regulation
        if regulation=='Increased':
            tempS1 = (tempS1<0.05) & (tempS2<0)
        elif regulation=='Decreased':
            tempS1 = (tempS1<0.05) & (tempS2>0)
        tempS1 = tempS3 | tempS1
    else:
        tempS1 = (tempS1>=0.0)
    #Update True
    tempS = tempS & tempS1
tempL = tempS.loc[tempS.tolist()].index.tolist()
print(len(tempL), regulation.lower()+' reactions with significance in', posL, 'but not in', negL)

#Select representatives
topX = np.min([30, len(tempL)])
topX_plot = np.min([5, len(tempL)])
tempDF = statDF.loc[:, statDF.columns.str.contains('_AdjPval$')]
tempDF = pd.merge(statDF[['ReactionName', variable+'_Pval']], tempDF,
                  left_index=True, right_index=True, how='left')
tempDF = tempDF.loc[tempL].sort_values(by=variable+'_AdjPval', ascending=True)
print('Top', topX, 'reactions (sort by the main effect of '+variable+'):')
display(tempDF.iloc[:topX])
plotL = tempDF.index.tolist()[:topX_plot]

#Prepare DF for plot
tempDF = fluxDF.reset_index().melt(var_name='ModelID', value_name='Flux', id_vars='ReactionID')
tempDF1 = sampleDF.reset_index()[['ModelID', 'Intervention']]
tempDF = pd.merge(tempDF, tempDF1, on='ModelID', how='left')

#Prepare label and color
tempD0 = {'Con1':'Control 1', 'Acar':'Acarbose',
          '17aE':'17'+r'$\alpha$'+'-Estradiol', 'Prot':'Protandim',
          'Rapa':'Rapamycin', 'CalR':'CR diet',
          'Con2':'Control 2', 'MetR':'MR diet',
          'GHRw':'GHR WT', 'GHRk':'GHR KO',
          'SneW':'Snell WT', 'SneD':'Snell dwarf'}
tempDF['Group'] = tempDF['Intervention'].map(tempD0)
tempD1 = {'Control 1':'tab:blue', 'Acarbose':'tab:red',
          '17'+r'$\alpha$'+'-Estradiol':'tab:green', 'Protandim':'tab:brown',
          'Rapamycin':'tab:purple', 'CR diet':'tab:olive',
          'Control 2':'tab:blue', 'MR diet':'tab:orange',
          'GHR WT':'tab:gray', 'GHR KO':'tab:pink',
          'Snell WT':'black', 'Snell dwarf':'tab:cyan'}

#Visualize each representative
for rank_i in range(len(plotL)):
    print(' - Rank '+str(rank_i+1)+' (sort by the main effect of '+variable+'):')
    rxn = plotL[rank_i]
    #Check reaction summary
    tempDF1 = pd.DataFrame(rxnDF.loc[rxn]).T
    display(tempDF1)
    
    #Select Flux
    tempDF1 = tempDF.loc[tempDF['ReactionID']==rxn]
    
    #Check Flux summary
    tempDF2 = tempDF1.groupby(['Group'])['Flux'].agg(['count', 'mean', 'std'])
    tempL1 = []
    tempL2 = []
    for row_n in tempDF2.index.tolist():
        count, mean, std = tempDF2.loc[row_n]
        tempL1.append(mean - 1.96*std/np.sqrt(count))
        tempL2.append(mean + 1.96*std/np.sqrt(count))
    tempDF2['0.025'] = tempL1
    tempDF2['0.975'] = tempL2
    tempDF2 = tempDF2.loc[list(tempD1.keys())]#Sort
    display(tempDF2)
    
    #Prepare significance labels
    ##Retrieve statistical significance
    tempS = pvalDF.loc[rxn, pvalDF.columns.str.contains('-vs-')]
    tempS.name = 'AdjPval'
    ##Clean
    tempDF2 = tempS.index.to_series().str.split(pat='-vs-', expand=True)
    tempDF2 = tempDF2.rename(columns={0:'Contrast', 1:'Baseline'})
    tempDF2 = pd.merge(tempDF2, tempS, left_index=True, right_index=True, how='left')
    tempDF2['Contrast'] = tempDF2['Contrast'].map(tempD0)
    tempDF2['Baseline'] = tempDF2['Baseline'].map(tempD0)
    ##Convert p-value to label
    tempL = []
    for row_i in range(len(tempDF2)):
        pval = tempDF2['AdjPval'].iloc[row_i]
        if pval<0.001:
            tempL.append('***')
        elif pval<0.01:
            tempL.append('**')
        elif pval<0.05:
            tempL.append('*')
        else:
            pval_text = Decimal(str(pval)).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
            tempL.append(r'$P$ = '+str(pval_text))
    tempDF2['SignifLabel'] = tempL
    ##Add the y-position level in figure
    tempDF2['YposLevel'] = [0, 1, 2, 3, 4, 2, 3, 4]
    display(tempDF2)
    
    #Visualization
    sns.set(style='ticks', font='Arial', context='talk')
    plt.figure(figsize=(4, 4))
    sns.boxplot(data=tempDF1, x='Group', y='Flux', order=list(tempD1.keys()), palette=tempD1,
                dodge=False, showfliers=False, showcaps=True, notch=False)
    p = sns.stripplot(data=tempDF1, x='Group', y='Flux',
                      order=list(tempD1.keys()), palette=tempD1, dodge=False, jitter=0.3,
                      size=5, edgecolor='black', linewidth=1, **{'marker':'o', 'alpha':0.5})
    ##Add border line
    for xcoord in [5.5, 7.5, 9.5]:
        p.axvline(x=xcoord, **{'linestyle':'dotted', 'color':'black', 'zorder':0})
    ##Set axis
    sns.despine()
    plt.setp(p.get_xticklabels(), rotation=70, horizontalalignment='right',
             verticalalignment='center', rotation_mode='anchor')
    ###Reset the axis range for the labels
    ymin, ymax = p.get_ylim()
    tempL = p.get_yticks().tolist()
    yinter = int(tempL[1]) - int(tempL[0])
    tempL = [int(y) for y in tempL if (y>ymin)&(y<ymax)]#seaborn seems to prepare wider range!?
    p.set(ylim=(ymin, ymax+(yinter*2.5)), yticks=tempL)
    p.set_yticklabels(['{:,}'.format(int(y)) for y in tempL])
    ##Add significance labels
    aline_ymin = tempDF1['Flux'].max()
    aline_ymargin = yinter/2
    for row_i in range(len(tempDF2)):
        #Baseline
        group_0 = tempDF2['Baseline'].iloc[row_i]
        index_0 = list(tempD1.keys()).index(group_0)
        xcoord_0 = index_0
        #Contrast
        group_1 = tempDF2['Contrast'].iloc[row_i]
        index_1 = list(tempD1.keys()).index(group_1)
        xcoord_1 = index_1
        #Standard point of marker
        xcoord = (xcoord_0+xcoord_1)/2
        ycoord = aline_ymin + aline_ymargin*tempDF2['YposLevel'].iloc[row_i]
        label = tempDF2['SignifLabel'].iloc[row_i]
        #Add annotation lines
        aline_offset = yinter/10
        aline_length = yinter/10 + aline_offset
        plt.plot([xcoord_0, xcoord_0, xcoord_1, xcoord_1],
                 [ycoord+aline_offset, ycoord+aline_length, ycoord+aline_length, ycoord+aline_offset],
                 lw=1.5, c='k')
        #Add annotation text
        if label in ['***', '**', '*']:
            text_offset = yinter/4
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='center',
                       fontsize='medium', color='k')
        else:
            text_offset = yinter/5
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='bottom',
                       fontsize='x-small', color='k')
    ##Set axis label and title
    plt.setp(p, xlabel='', ylabel='Sample flux value [a.u.]')
    p.set_title(rxn, {'fontsize':'medium'})
    ##Save
    fileDir = './ExportFigures/'
    ipynbName = '220610_LC-M001-related-transcriptomics-GEM-ver2-4_FluxAnalysis_'
    fileName = 'flux-boxplot('+rxn+').tif'
    plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04,
                      pil_kwargs={'compression':'tiff_lzw'})
    plt.show()
    print('')

#### 3-4-3. Reactions increased by Aca, GHRKO

In [None]:
#Prepare the target reaction set
posL = ['Acar-vs-Con1', 'GHRk-vs-GHRw']
negL = ['']
regulation = 'Increased'
tempL = statDF.loc[statDF[variable+'_AdjPval']<0.05].index.tolist()
tempDF1 = pvalDF.loc[tempL, pvalDF.columns.str.contains('-vs-')]
tempDF2 = diffDF.loc[tempL, diffDF.columns.str.contains('-vs-')]
tempS = pd.Series(np.repeat(True, len(tempDF1)), index=tempDF1.index)#Initialize
for col_n in tempDF1.columns.tolist():
    tempS1 = tempDF1[col_n]
    tempS2 = tempDF2[col_n]
    if col_n in posL:
        if regulation=='Increased':
            tempS1 = (tempS1<0.05) & (tempS2>0)
        elif regulation=='Decreased':
            tempS1 = (tempS1<0.05) & (tempS2<0)
    elif col_n in negL:
        tempS3 = (tempS1>=0.05)
        #Significance for inverse regulation
        if regulation=='Increased':
            tempS1 = (tempS1<0.05) & (tempS2<0)
        elif regulation=='Decreased':
            tempS1 = (tempS1<0.05) & (tempS2>0)
        tempS1 = tempS3 | tempS1
    else:
        tempS1 = (tempS1>=0.0)
    #Update True
    tempS = tempS & tempS1
tempL = tempS.loc[tempS.tolist()].index.tolist()
print(len(tempL), regulation.lower()+' reactions with significance in', posL, 'but not in', negL)

#Select representatives
topX = np.min([30, len(tempL)])
topX_plot = np.min([5, len(tempL)])
tempDF = statDF.loc[:, statDF.columns.str.contains('_AdjPval$')]
tempDF = pd.merge(statDF[['ReactionName', variable+'_Pval']], tempDF,
                  left_index=True, right_index=True, how='left')
tempDF = tempDF.loc[tempL].sort_values(by=variable+'_AdjPval', ascending=True)
print('Top', topX, 'reactions (sort by the main effect of '+variable+'):')
display(tempDF.iloc[:topX])
plotL = tempDF.index.tolist()[:topX_plot]

#Prepare DF for plot
tempDF = fluxDF.reset_index().melt(var_name='ModelID', value_name='Flux', id_vars='ReactionID')
tempDF1 = sampleDF.reset_index()[['ModelID', 'Intervention']]
tempDF = pd.merge(tempDF, tempDF1, on='ModelID', how='left')

#Prepare label and color
tempD0 = {'Con1':'Control 1', 'Acar':'Acarbose',
          '17aE':'17'+r'$\alpha$'+'-Estradiol', 'Prot':'Protandim',
          'Rapa':'Rapamycin', 'CalR':'CR diet',
          'Con2':'Control 2', 'MetR':'MR diet',
          'GHRw':'GHR WT', 'GHRk':'GHR KO',
          'SneW':'Snell WT', 'SneD':'Snell dwarf'}
tempDF['Group'] = tempDF['Intervention'].map(tempD0)
tempD1 = {'Control 1':'tab:blue', 'Acarbose':'tab:red',
          '17'+r'$\alpha$'+'-Estradiol':'tab:green', 'Protandim':'tab:brown',
          'Rapamycin':'tab:purple', 'CR diet':'tab:olive',
          'Control 2':'tab:blue', 'MR diet':'tab:orange',
          'GHR WT':'tab:gray', 'GHR KO':'tab:pink',
          'Snell WT':'black', 'Snell dwarf':'tab:cyan'}

#Visualize each representative
for rank_i in range(len(plotL)):
    print(' - Rank '+str(rank_i+1)+' (sort by the main effect of '+variable+'):')
    rxn = plotL[rank_i]
    #Check reaction summary
    tempDF1 = pd.DataFrame(rxnDF.loc[rxn]).T
    display(tempDF1)
    
    #Select Flux
    tempDF1 = tempDF.loc[tempDF['ReactionID']==rxn]
    
    #Check Flux summary
    tempDF2 = tempDF1.groupby(['Group'])['Flux'].agg(['count', 'mean', 'std'])
    tempL1 = []
    tempL2 = []
    for row_n in tempDF2.index.tolist():
        count, mean, std = tempDF2.loc[row_n]
        tempL1.append(mean - 1.96*std/np.sqrt(count))
        tempL2.append(mean + 1.96*std/np.sqrt(count))
    tempDF2['0.025'] = tempL1
    tempDF2['0.975'] = tempL2
    tempDF2 = tempDF2.loc[list(tempD1.keys())]#Sort
    display(tempDF2)
    
    #Prepare significance labels
    ##Retrieve statistical significance
    tempS = pvalDF.loc[rxn, pvalDF.columns.str.contains('-vs-')]
    tempS.name = 'AdjPval'
    ##Clean
    tempDF2 = tempS.index.to_series().str.split(pat='-vs-', expand=True)
    tempDF2 = tempDF2.rename(columns={0:'Contrast', 1:'Baseline'})
    tempDF2 = pd.merge(tempDF2, tempS, left_index=True, right_index=True, how='left')
    tempDF2['Contrast'] = tempDF2['Contrast'].map(tempD0)
    tempDF2['Baseline'] = tempDF2['Baseline'].map(tempD0)
    ##Convert p-value to label
    tempL = []
    for row_i in range(len(tempDF2)):
        pval = tempDF2['AdjPval'].iloc[row_i]
        if pval<0.001:
            tempL.append('***')
        elif pval<0.01:
            tempL.append('**')
        elif pval<0.05:
            tempL.append('*')
        else:
            pval_text = Decimal(str(pval)).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
            tempL.append(r'$P$ = '+str(pval_text))
    tempDF2['SignifLabel'] = tempL
    ##Add the y-position level in figure
    tempDF2['YposLevel'] = [0, 1, 2, 3, 4, 2, 3, 4]
    display(tempDF2)
    
    #Visualization
    sns.set(style='ticks', font='Arial', context='talk')
    plt.figure(figsize=(4, 4))
    sns.boxplot(data=tempDF1, x='Group', y='Flux', order=list(tempD1.keys()), palette=tempD1,
                dodge=False, showfliers=False, showcaps=True, notch=False)
    p = sns.stripplot(data=tempDF1, x='Group', y='Flux',
                      order=list(tempD1.keys()), palette=tempD1, dodge=False, jitter=0.3,
                      size=5, edgecolor='black', linewidth=1, **{'marker':'o', 'alpha':0.5})
    ##Add border line
    for xcoord in [5.5, 7.5, 9.5]:
        p.axvline(x=xcoord, **{'linestyle':'dotted', 'color':'black', 'zorder':0})
    ##Set axis
    sns.despine()
    plt.setp(p.get_xticklabels(), rotation=70, horizontalalignment='right',
             verticalalignment='center', rotation_mode='anchor')
    ###Reset the axis range for the labels
    ymin, ymax = p.get_ylim()
    tempL = p.get_yticks().tolist()
    yinter = int(tempL[1]) - int(tempL[0])
    tempL = [int(y) for y in tempL if (y>ymin)&(y<ymax)]#seaborn seems to prepare wider range!?
    p.set(ylim=(ymin, ymax+(yinter*2.5)), yticks=tempL)
    p.set_yticklabels(['{:,}'.format(int(y)) for y in tempL])
    ##Add significance labels
    aline_ymin = tempDF1['Flux'].max()
    aline_ymargin = yinter/2
    for row_i in range(len(tempDF2)):
        #Baseline
        group_0 = tempDF2['Baseline'].iloc[row_i]
        index_0 = list(tempD1.keys()).index(group_0)
        xcoord_0 = index_0
        #Contrast
        group_1 = tempDF2['Contrast'].iloc[row_i]
        index_1 = list(tempD1.keys()).index(group_1)
        xcoord_1 = index_1
        #Standard point of marker
        xcoord = (xcoord_0+xcoord_1)/2
        ycoord = aline_ymin + aline_ymargin*tempDF2['YposLevel'].iloc[row_i]
        label = tempDF2['SignifLabel'].iloc[row_i]
        #Add annotation lines
        aline_offset = yinter/10
        aline_length = yinter/10 + aline_offset
        plt.plot([xcoord_0, xcoord_0, xcoord_1, xcoord_1],
                 [ycoord+aline_offset, ycoord+aline_length, ycoord+aline_length, ycoord+aline_offset],
                 lw=1.5, c='k')
        #Add annotation text
        if label in ['***', '**', '*']:
            text_offset = yinter/4
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='center',
                       fontsize='medium', color='k')
        else:
            text_offset = yinter/5
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='bottom',
                       fontsize='x-small', color='k')
    ##Set axis label and title
    plt.setp(p, xlabel='', ylabel='Sample flux value [a.u.]')
    p.set_title(rxn, {'fontsize':'medium'})
    ##Save
    fileDir = './ExportFigures/'
    ipynbName = '220610_LC-M001-related-transcriptomics-GEM-ver2-4_FluxAnalysis_'
    fileName = 'flux-boxplot('+rxn+').tif'
    plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04,
                      pil_kwargs={'compression':'tiff_lzw'})
    plt.show()
    print('')

#### 3-4-4. Reactions decreased by Aca, GHRKO

In [None]:
#Prepare the target reaction set
posL = ['Acar-vs-Con1', 'GHRk-vs-GHRw']
negL = ['']
regulation = 'Decreased'
tempL = statDF.loc[statDF[variable+'_AdjPval']<0.05].index.tolist()
tempDF1 = pvalDF.loc[tempL, pvalDF.columns.str.contains('-vs-')]
tempDF2 = diffDF.loc[tempL, diffDF.columns.str.contains('-vs-')]
tempS = pd.Series(np.repeat(True, len(tempDF1)), index=tempDF1.index)#Initialize
for col_n in tempDF1.columns.tolist():
    tempS1 = tempDF1[col_n]
    tempS2 = tempDF2[col_n]
    if col_n in posL:
        if regulation=='Increased':
            tempS1 = (tempS1<0.05) & (tempS2>0)
        elif regulation=='Decreased':
            tempS1 = (tempS1<0.05) & (tempS2<0)
    elif col_n in negL:
        tempS3 = (tempS1>=0.05)
        #Significance for inverse regulation
        if regulation=='Increased':
            tempS1 = (tempS1<0.05) & (tempS2<0)
        elif regulation=='Decreased':
            tempS1 = (tempS1<0.05) & (tempS2>0)
        tempS1 = tempS3 | tempS1
    else:
        tempS1 = (tempS1>=0.0)
    #Update True
    tempS = tempS & tempS1
tempL = tempS.loc[tempS.tolist()].index.tolist()
print(len(tempL), regulation.lower()+' reactions with significance in', posL, 'but not in', negL)

#Select representatives
topX = np.min([30, len(tempL)])
topX_plot = np.min([5, len(tempL)])
tempDF = statDF.loc[:, statDF.columns.str.contains('_AdjPval$')]
tempDF = pd.merge(statDF[['ReactionName', variable+'_Pval']], tempDF,
                  left_index=True, right_index=True, how='left')
tempDF = tempDF.loc[tempL].sort_values(by=variable+'_AdjPval', ascending=True)
print('Top', topX, 'reactions (sort by the main effect of '+variable+'):')
display(tempDF.iloc[:topX])
plotL = tempDF.index.tolist()[:topX_plot]

#Prepare DF for plot
tempDF = fluxDF.reset_index().melt(var_name='ModelID', value_name='Flux', id_vars='ReactionID')
tempDF1 = sampleDF.reset_index()[['ModelID', 'Intervention']]
tempDF = pd.merge(tempDF, tempDF1, on='ModelID', how='left')

#Prepare label and color
tempD0 = {'Con1':'Control 1', 'Acar':'Acarbose',
          '17aE':'17'+r'$\alpha$'+'-Estradiol', 'Prot':'Protandim',
          'Rapa':'Rapamycin', 'CalR':'CR diet',
          'Con2':'Control 2', 'MetR':'MR diet',
          'GHRw':'GHR WT', 'GHRk':'GHR KO',
          'SneW':'Snell WT', 'SneD':'Snell dwarf'}
tempDF['Group'] = tempDF['Intervention'].map(tempD0)
tempD1 = {'Control 1':'tab:blue', 'Acarbose':'tab:red',
          '17'+r'$\alpha$'+'-Estradiol':'tab:green', 'Protandim':'tab:brown',
          'Rapamycin':'tab:purple', 'CR diet':'tab:olive',
          'Control 2':'tab:blue', 'MR diet':'tab:orange',
          'GHR WT':'tab:gray', 'GHR KO':'tab:pink',
          'Snell WT':'black', 'Snell dwarf':'tab:cyan'}

#Visualize each representative
for rank_i in range(len(plotL)):
    print(' - Rank '+str(rank_i+1)+' (sort by the main effect of '+variable+'):')
    rxn = plotL[rank_i]
    #Check reaction summary
    tempDF1 = pd.DataFrame(rxnDF.loc[rxn]).T
    display(tempDF1)
    
    #Select Flux
    tempDF1 = tempDF.loc[tempDF['ReactionID']==rxn]
    
    #Check Flux summary
    tempDF2 = tempDF1.groupby(['Group'])['Flux'].agg(['count', 'mean', 'std'])
    tempL1 = []
    tempL2 = []
    for row_n in tempDF2.index.tolist():
        count, mean, std = tempDF2.loc[row_n]
        tempL1.append(mean - 1.96*std/np.sqrt(count))
        tempL2.append(mean + 1.96*std/np.sqrt(count))
    tempDF2['0.025'] = tempL1
    tempDF2['0.975'] = tempL2
    tempDF2 = tempDF2.loc[list(tempD1.keys())]#Sort
    display(tempDF2)
    
    #Prepare significance labels
    ##Retrieve statistical significance
    tempS = pvalDF.loc[rxn, pvalDF.columns.str.contains('-vs-')]
    tempS.name = 'AdjPval'
    ##Clean
    tempDF2 = tempS.index.to_series().str.split(pat='-vs-', expand=True)
    tempDF2 = tempDF2.rename(columns={0:'Contrast', 1:'Baseline'})
    tempDF2 = pd.merge(tempDF2, tempS, left_index=True, right_index=True, how='left')
    tempDF2['Contrast'] = tempDF2['Contrast'].map(tempD0)
    tempDF2['Baseline'] = tempDF2['Baseline'].map(tempD0)
    ##Convert p-value to label
    tempL = []
    for row_i in range(len(tempDF2)):
        pval = tempDF2['AdjPval'].iloc[row_i]
        if pval<0.001:
            tempL.append('***')
        elif pval<0.01:
            tempL.append('**')
        elif pval<0.05:
            tempL.append('*')
        else:
            pval_text = Decimal(str(pval)).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
            tempL.append(r'$P$ = '+str(pval_text))
    tempDF2['SignifLabel'] = tempL
    ##Add the y-position level in figure
    tempDF2['YposLevel'] = [0, 1, 2, 3, 4, 2, 3, 4]
    display(tempDF2)
    
    #Visualization
    sns.set(style='ticks', font='Arial', context='talk')
    plt.figure(figsize=(4, 4))
    sns.boxplot(data=tempDF1, x='Group', y='Flux', order=list(tempD1.keys()), palette=tempD1,
                dodge=False, showfliers=False, showcaps=True, notch=False)
    p = sns.stripplot(data=tempDF1, x='Group', y='Flux',
                      order=list(tempD1.keys()), palette=tempD1, dodge=False, jitter=0.3,
                      size=5, edgecolor='black', linewidth=1, **{'marker':'o', 'alpha':0.5})
    ##Add border line
    for xcoord in [5.5, 7.5, 9.5]:
        p.axvline(x=xcoord, **{'linestyle':'dotted', 'color':'black', 'zorder':0})
    ##Set axis
    sns.despine()
    plt.setp(p.get_xticklabels(), rotation=70, horizontalalignment='right',
             verticalalignment='center', rotation_mode='anchor')
    ###Reset the axis range for the labels
    ymin, ymax = p.get_ylim()
    tempL = p.get_yticks().tolist()
    yinter = int(tempL[1]) - int(tempL[0])
    tempL = [int(y) for y in tempL if (y>ymin)&(y<ymax)]#seaborn seems to prepare wider range!?
    p.set(ylim=(ymin, ymax+(yinter*2.5)), yticks=tempL)
    p.set_yticklabels(['{:,}'.format(int(y)) for y in tempL])
    ##Add significance labels
    aline_ymin = tempDF1['Flux'].max()
    aline_ymargin = yinter/2
    for row_i in range(len(tempDF2)):
        #Baseline
        group_0 = tempDF2['Baseline'].iloc[row_i]
        index_0 = list(tempD1.keys()).index(group_0)
        xcoord_0 = index_0
        #Contrast
        group_1 = tempDF2['Contrast'].iloc[row_i]
        index_1 = list(tempD1.keys()).index(group_1)
        xcoord_1 = index_1
        #Standard point of marker
        xcoord = (xcoord_0+xcoord_1)/2
        ycoord = aline_ymin + aline_ymargin*tempDF2['YposLevel'].iloc[row_i]
        label = tempDF2['SignifLabel'].iloc[row_i]
        #Add annotation lines
        aline_offset = yinter/10
        aline_length = yinter/10 + aline_offset
        plt.plot([xcoord_0, xcoord_0, xcoord_1, xcoord_1],
                 [ycoord+aline_offset, ycoord+aline_length, ycoord+aline_length, ycoord+aline_offset],
                 lw=1.5, c='k')
        #Add annotation text
        if label in ['***', '**', '*']:
            text_offset = yinter/4
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='center',
                       fontsize='medium', color='k')
        else:
            text_offset = yinter/5
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='bottom',
                       fontsize='x-small', color='k')
    ##Set axis label and title
    plt.setp(p, xlabel='', ylabel='Sample flux value [a.u.]')
    p.set_title(rxn, {'fontsize':'medium'})
    ##Save
    fileDir = './ExportFigures/'
    ipynbName = '220610_LC-M001-related-transcriptomics-GEM-ver2-4_FluxAnalysis_'
    fileName = 'flux-boxplot('+rxn+').tif'
    plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04,
                      pil_kwargs={'compression':'tiff_lzw'})
    plt.show()
    print('')

#### 3-4-5. Reactions of special interest

> 25 reactions included in the Priyanka's diagram, related to fatty acid metabolism etc. Of note, ACS would not be used for any represetnation because she could not map ACS to the diagram.  

In [None]:
plotL = ['ACITL', 'ACS', 'ACYP', 'ALDD21', 'CBPPer',
         'CRNtx', 'DPGase', 'DPGM', 'FACOAL140', 'FACOAL180',
         'FACOAL184', 'FACOAL191', 'FAOXC80', 'FAOXC140', 'FAOXC160',
         'FBP', 'HEX1', 'MGSA', 'MGSA2', 'PGK',
         'PGM', 'PRPPS', 'STRDNCCPT2', 'SUCOAS1m', 'TKT1',
         'TKT2']
plotL = list(set(plotL))
plotL.sort()
for rxn in plotL:
    if rxn not in fluxDF.index.tolist():
        print(rxn+' was NOT included?')
        plotL.remove(rxn)
tempDF = statDF.loc[:, statDF.columns.str.contains('_AdjPval$')]
tempDF = pd.merge(statDF[['ReactionName', variable+'_Pval']], tempDF,
                  left_index=True, right_index=True, how='left')
tempDF = tempDF.loc[plotL].sort_values(by=variable+'_AdjPval', ascending=True)
print(len(plotL), 'target reactions:')
print(' -> '+variable+' (adjusted P < 0.05):', len(tempDF.loc[tempDF[variable+'_AdjPval']<0.05]))
print(' -> '+variable+' (nominal P < 0.05):', len(tempDF.loc[tempDF[variable+'_Pval']<0.05]))
display(tempDF)
plotL = tempDF.index.tolist()#Update by the sorted order

#Prepare DF for plot
tempDF = fluxDF.reset_index().melt(var_name='ModelID', value_name='Flux', id_vars='ReactionID')
tempDF1 = sampleDF.reset_index()[['ModelID', 'Intervention']]
tempDF = pd.merge(tempDF, tempDF1, on='ModelID', how='left')

#Prepare label and color
tempD0 = {'Con1':'Control 1', 'Acar':'Acarbose',
          '17aE':'17'+r'$\alpha$'+'-Estradiol', 'Prot':'Protandim',
          'Rapa':'Rapamycin', 'CalR':'CR diet',
          'Con2':'Control 2', 'MetR':'MR diet',
          'GHRw':'GHR WT', 'GHRk':'GHR KO',
          'SneW':'Snell WT', 'SneD':'Snell dwarf'}
tempDF['Group'] = tempDF['Intervention'].map(tempD0)
tempD1 = {'Control 1':'tab:blue', 'Acarbose':'tab:red',
          '17'+r'$\alpha$'+'-Estradiol':'tab:green', 'Protandim':'tab:brown',
          'Rapamycin':'tab:purple', 'CR diet':'tab:olive',
          'Control 2':'tab:blue', 'MR diet':'tab:orange',
          'GHR WT':'tab:gray', 'GHR KO':'tab:pink',
          'Snell WT':'black', 'Snell dwarf':'tab:cyan'}

#Visualize each representative
for rank_i in range(len(plotL)):
    print(' - Rank '+str(rank_i+1)+' (sort by the main effect of '+variable+'):')
    rxn = plotL[rank_i]
    #Check reaction summary
    tempDF1 = pd.DataFrame(rxnDF.loc[rxn]).T
    display(tempDF1)
    
    #Select Flux
    tempDF1 = tempDF.loc[tempDF['ReactionID']==rxn]
    
    #Check Flux summary
    tempDF2 = tempDF1.groupby(['Group'])['Flux'].agg(['count', 'mean', 'std'])
    tempL1 = []
    tempL2 = []
    for row_n in tempDF2.index.tolist():
        count, mean, std = tempDF2.loc[row_n]
        tempL1.append(mean - 1.96*std/np.sqrt(count))
        tempL2.append(mean + 1.96*std/np.sqrt(count))
    tempDF2['0.025'] = tempL1
    tempDF2['0.975'] = tempL2
    tempDF2 = tempDF2.loc[list(tempD1.keys())]#Sort
    display(tempDF2)
    
    #Prepare significance labels
    ##Retrieve statistical significance
    tempS = pvalDF.loc[rxn, pvalDF.columns.str.contains('-vs-')]
    tempS.name = 'AdjPval'
    ##Clean
    tempDF2 = tempS.index.to_series().str.split(pat='-vs-', expand=True)
    tempDF2 = tempDF2.rename(columns={0:'Contrast', 1:'Baseline'})
    tempDF2 = pd.merge(tempDF2, tempS, left_index=True, right_index=True, how='left')
    tempDF2['Contrast'] = tempDF2['Contrast'].map(tempD0)
    tempDF2['Baseline'] = tempDF2['Baseline'].map(tempD0)
    ##Convert p-value to label
    tempL = []
    for row_i in range(len(tempDF2)):
        pval = tempDF2['AdjPval'].iloc[row_i]
        if pval<0.001:
            tempL.append('***')
        elif pval<0.01:
            tempL.append('**')
        elif pval<0.05:
            tempL.append('*')
        else:
            pval_text = Decimal(str(pval)).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
            tempL.append(r'$P$ = '+str(pval_text))
    tempDF2['SignifLabel'] = tempL
    ##Add the y-position level in figure
    tempDF2['YposLevel'] = [0, 1, 2, 3, 4, 2, 3, 4]
    display(tempDF2)
    
    #Visualization
    sns.set(style='ticks', font='Arial', context='talk')
    plt.figure(figsize=(4, 4))
    sns.boxplot(data=tempDF1, x='Group', y='Flux', order=list(tempD1.keys()), palette=tempD1,
                dodge=False, showfliers=False, showcaps=True, notch=False)
    p = sns.stripplot(data=tempDF1, x='Group', y='Flux',
                      order=list(tempD1.keys()), palette=tempD1, dodge=False, jitter=0.3,
                      size=5, edgecolor='black', linewidth=1, **{'marker':'o', 'alpha':0.5})
    ##Add border line
    for xcoord in [5.5, 7.5, 9.5]:
        p.axvline(x=xcoord, **{'linestyle':'dotted', 'color':'black', 'zorder':0})
    ##Set axis
    sns.despine()
    plt.setp(p.get_xticklabels(), rotation=70, horizontalalignment='right',
             verticalalignment='center', rotation_mode='anchor')
    ###Reset the axis range for the labels
    ymin, ymax = p.get_ylim()
    tempL = p.get_yticks().tolist()
    yinter = int(tempL[1]) - int(tempL[0])
    tempL = [int(y) for y in tempL if (y>ymin)&(y<ymax)]#seaborn seems to prepare wider range!?
    p.set(ylim=(ymin, ymax+(yinter*2.5)), yticks=tempL)
    p.set_yticklabels(['{:,}'.format(int(y)) for y in tempL])
    ##Add significance labels
    aline_ymin = tempDF1['Flux'].max()
    aline_ymargin = yinter/2
    for row_i in range(len(tempDF2)):
        #Baseline
        group_0 = tempDF2['Baseline'].iloc[row_i]
        index_0 = list(tempD1.keys()).index(group_0)
        xcoord_0 = index_0
        #Contrast
        group_1 = tempDF2['Contrast'].iloc[row_i]
        index_1 = list(tempD1.keys()).index(group_1)
        xcoord_1 = index_1
        #Standard point of marker
        xcoord = (xcoord_0+xcoord_1)/2
        ycoord = aline_ymin + aline_ymargin*tempDF2['YposLevel'].iloc[row_i]
        label = tempDF2['SignifLabel'].iloc[row_i]
        #Add annotation lines
        aline_offset = yinter/10
        aline_length = yinter/10 + aline_offset
        plt.plot([xcoord_0, xcoord_0, xcoord_1, xcoord_1],
                 [ycoord+aline_offset, ycoord+aline_length, ycoord+aline_length, ycoord+aline_offset],
                 lw=1.5, c='k')
        #Add annotation text
        if label in ['***', '**', '*']:
            text_offset = yinter/4
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='center',
                       fontsize='medium', color='k')
        else:
            text_offset = yinter/5
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='bottom',
                       fontsize='x-small', color='k')
    ##Set axis label and title
    plt.setp(p, xlabel='', ylabel='Sample flux value [a.u.]')
    p.set_title(rxn, {'fontsize':'medium'})
    ##Save
    fileDir = './ExportFigures/'
    ipynbName = '220610_LC-M001-related-transcriptomics-GEM-ver2-4_FluxAnalysis_'
    fileName = 'flux-boxplot('+rxn+').tif'
    plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04,
                      pil_kwargs={'compression':'tiff_lzw'})
    plt.show()
    print('')

## 4. Compare flux values between MR and CR

> Model is same as the above Dunn's test, but post-hoc comparisons are differently retrieved to compare MR vs. CR.  

# — †2. Go to †1 of the R sub-notebook —  

### 4-1. Another post-hoc Dunn's test

#### 4-1-1/2. Import the summary tables

In [None]:
#Import the summary tables
fileDir = './ExportData/'
ipynbName = '220610_LC-M001-related-transcriptomics-GEM-ver2-4_StatisticalTest_'
fileName = 'flux-comparison.xlsx'
sheetName = 'Posthoc2_MR-vs-CR'
tempDF = pd.read_excel(fileDir+ipynbName+fileName, sheet_name=sheetName, engine='openpyxl')
tempDF = tempDF.set_index('ReactionID')
display(tempDF)

statDF2 = tempDF

#### 4-1-3. Changed reactions by each intervention (Dunn's test)

In [None]:
#Clean DF for adjusted P-value and z-statistic
variable = 'Intervention'
##Take adjusted P-value
tempDF1 = statDF2.loc[:, statDF2.columns.str.contains('-vs-.*_AdjPval$')]
tempDF1.columns = tempDF1.columns.str.replace('_AdjPval$', '')
tempDF1 = pd.merge(statDF[['ReactionName', variable+'_AdjPval']], tempDF1,
                   left_index=True, right_index=True, how='left')
tempDF1 = tempDF1.sort_values(by=variable+'_AdjPval', ascending=True)
print('Adjusted P-value:')
display(tempDF1)
##Take z-statistic
tempDF2 = statDF2.loc[:, statDF2.columns.str.contains('-vs-.*_zStat$')]
tempDF2.columns = tempDF2.columns.str.replace('_zStat$', '')
tempDF2 = pd.merge(statDF[['ReactionName', variable+'_AdjPval']], tempDF2,
                   left_index=True, right_index=True, how='left')
tempDF2 = tempDF2.sort_values(by=variable+'_AdjPval', ascending=True)
print('Changed direction (z-statistic):')
display(tempDF2)

pvalDF2 = tempDF1
diffDF2 = tempDF2

> –> When z-statistic is positive, the flux median of contrast is larger than that of baseline.  

In [None]:
#Extract only the changed reactions
variable = 'Intervention'
tempDF = pvalDF2.loc[pvalDF2[variable+'_AdjPval']<0.05]
print(variable+' (adjusted P < 0.05):', len(tempDF))

tempL = tempDF.loc[:, tempDF.columns.str.contains('-vs-')].columns.tolist()
for comparison in tempL:
    tempS = tempDF[comparison]
    tempS = tempS.loc[tempS<0.05]
    print(' - '+comparison+':', len(tempS))

> Check the changed reactions (nominal P-value < 0.05) as reference.  

In [None]:
#Extract only the changed reactions
variable = 'Intervention'
tempL = statDF.loc[statDF[variable+'_Pval']<0.05].index.tolist()
tempDF = pvalDF2.loc[tempL]
print(variable+' (nominal P < 0.05):', len(tempDF))

tempL = tempDF.loc[:, tempDF.columns.str.contains('-vs-')].columns.tolist()
for comparison in tempL:
    tempS = tempDF[comparison]
    tempS = tempS.loc[tempS<0.05]
    print(' - '+comparison+':', len(tempS))

### 4-2. Visualization: venn diagram

In [None]:
#Prepare label and color
tempD0 = {'Con1':'Control 1', 'Acar':'Acarbose',
          '17aE':'17'+r'$\alpha$'+'-Estradiol', 'Prot':'Protandim',
          'Rapa':'Rapamycin', 'CalR':'CR diet',
          'Con2':'Control 2', 'MetR':'MR diet',
          'GHRw':'GHR WT', 'GHRk':'GHR KO',
          'SneW':'Snell WT', 'SneD':'Snell dwarf'}
tempD1 = {'CR diet\nvs.\nControl 1':'tab:olive',
          'MR diet\nvs.\nControl 2':'tab:orange',
          'MR diet\nvs.\nCR diet':'turquoise'}

#Visualization per direction
for regulation in ['Changed', 'Increased', 'Decreased']:
    #Prepare reaction sets
    variable = 'Intervention'
    tempL = statDF.loc[statDF[variable+'_AdjPval']<0.05].index.tolist()
    tempDF1 = pvalDF2.loc[tempL, pvalDF2.columns.str.contains('-vs-')]
    tempDF2 = diffDF2.loc[tempL, diffDF2.columns.str.contains('-vs-')]
    tempD2 = {}
    count = 0
    for col_n in tempDF1.columns.tolist():
        tempS1 = tempDF1[col_n]
        tempS2 = tempDF2[col_n]
        if regulation=='Changed':
            tempS2 = tempS2.loc[(tempS1<0.05)]
        elif regulation=='Increased':
            tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2>0)]
        elif regulation=='Decreased':
            tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2<0)]
        if len(tempS2)>0:
            count += 1
        contrast, baseline = col_n.split(sep='-vs-')
        label = tempD0[contrast]+'\nvs.\n'+tempD0[baseline]
        tempD2[label] = set(tempS2.index.tolist())
    if count==0:
        print(regulation+' reactions: no significance in any comparisons')
        continue
    ##Sort to make consistent order in manual legend generation
    tempD = {}
    count = 0
    for label in tempD1.keys():
        tempD[label] = tempD2[label]
        if len(tempD2[label])>0:
            count += 1
    
    #Skip the followings if no significant reaction
    if count==0:
        print(regulation+' reactions: no significance in any target comparisons')
        continue
    
    #Venn diagram
    sns.set(style='ticks', font='Arial', context='talk')
    fig, ax = plt.subplots(figsize=(4, 4))
    venn(tempD, fmt='{size:,}', cmap=list(tempD1.values()), legend_loc=None, ax=ax)
    plt.setp(ax, ylim=(0.05, 0.975))#Otherwise, weird space...
    ##Add legend annotation
    x_coord = [0.05, 0.95, 0.85]
    y_coord = [0.65, 0.65, 0.4]
    h_align = ['right', 'left', 'left']
    v_align = ['bottom', 'bottom', 'top']
    for i in range(len(tempD1)):
        key = list(tempD1.keys())[i]
        total = f'{len(tempD[key]):,}'
        ax.text(x_coord[i], y_coord[i], key+'\n('+total+' reactions)',
                fontsize='small', multialignment='center',
                horizontalalignment=h_align[i], verticalalignment=v_align[i],
                bbox={'boxstyle':'round', 'facecolor':tempD1[key], 'pad':0.2, 'alpha':0.5})
    title = regulation+' reactions'
    ax.set_title(title, fontsize='medium')
    ##Save
    if regulation!='Changed':
        fileDir = './ExportFigures/'
        ipynbName = '220610_LC-M001-related-transcriptomics-GEM-ver2-4_FluxAnalysis_'
        fileName = 'MR-vs-CR-venn-'+regulation.lower()+'.tif'
        plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04,
                          pil_kwargs={'compression':'tiff_lzw'})
    plt.show()

In [None]:
#Export reaction list in each subset in the venn diagram
for regulation in ['Increased', 'Decreased']:
    #Prepare reaction sets
    variable = 'Intervention'
    tempL1 = statDF.loc[statDF[variable+'_AdjPval']<0.05].index.tolist()
    tempL2 = pvalDF2.loc[:, pvalDF2.columns.str.contains('-vs-')].columns.tolist()
    tempDF1 = pvalDF2.loc[tempL1, tempL2]
    tempDF2 = diffDF2.loc[tempL1, tempL2]
    tempD = {}
    count = 0
    for col_n in tempDF1.columns.tolist():
        tempS1 = tempDF1[col_n]
        tempS2 = tempDF2[col_n]
        if regulation=='Changed':
            tempS2 = tempS2.loc[(tempS1<0.05)]
        elif regulation=='Increased':
            tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2>0)]
        elif regulation=='Decreased':
            tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2<0)]
        if len(tempS2)>0:
            count += 1
        tempD[col_n] = set(tempS2.index.tolist())
    
    #Skip the followings if no significant reaction
    if count==0:
        print(regulation+' reactions: no significance in any target comparisons')
        continue
    
    #Prepare a new .xlsx file (dummy README)
    tempL1 = [len(tempD[key]) for key in tempD.keys()]
    tempDF = pd.DataFrame({'Group':tempD.keys(), 'nReactions':tempL1})
    tempDF = tempDF.reset_index().rename(columns={'index':'VennOrder'})
    tempDF['VennOrder'] = tempDF['VennOrder'] + 1
    fileDir = './ExportData/'
    ipynbName = '220610_LC-M001-related-transcriptomics-GEM-ver2-4_FluxAnalysis_'
    fileName = 'MR-vs-CR-venn-'+regulation.lower()+'.xlsx'
    tempDF.to_excel(fileDir+ipynbName+fileName, sheet_name='README', header=True, index=False)
    display(tempDF)#Check
    
    t_start = time.time()
    #Extract overall set
    for key_i in range(len(tempD)):
        key = list(tempD.keys())[key_i]
        tempS = tempD[key]
        tempDF = statDF2.loc[statDF2.index.isin(tempS)]
        #Save summary table by appended to the above .xlsx file
        ##Prepare sheet name
        tempL1 = ['NA' for i in range(len(tempD))]
        tempL1[key_i] = '1'
        setName = '('+','.join(tempL1)+')'
        with pd.ExcelWriter(fileDir+ipynbName+fileName, mode='a', engine='openpyxl') as writer:
            tempDF.to_excel(writer, sheet_name=setName, header=True, index=True)
        print(' - '+setName+':', len(tempDF))
    
    #Extract subset
    tempL1 = ['1', '0']
    tempL2 = [[k1, k2, k3] for k1 in tempL1 for k2 in tempL1 for k3 in tempL1]
    #tempL2.remove(['0', '0', '0'])
    for tempL1 in tempL2:
        #Positive reaction set
        tempL3 = [list(tempD.values())[key_i] for key_i, binary in enumerate(tempL1) if binary=='1']
        tempS1 = set(statDF.loc[statDF[variable+'_AdjPval']<0.05].index.tolist())#Initialize
        for tempS in tempL3:
            tempS1 = tempS1 & tempS
        #Negative reaction set
        tempL3 = [list(tempD.values())[key_i] for key_i, binary in enumerate(tempL1) if binary=='0']
        tempS2 = set()#Initialize
        for tempS in tempL3:
            tempS2 = tempS2 | tempS
        #Extract subset
        tempS = tempS1 - tempS2
        tempDF = statDF2.loc[statDF2.index.isin(tempS)]
        #Save summary table by appended to the above .xlsx file
        ##Prepare sheet name
        setName = '('+','.join(tempL1)+')'
        with pd.ExcelWriter(fileDir+ipynbName+fileName, mode='a', engine='openpyxl') as writer:
            tempDF.to_excel(writer, sheet_name=setName, header=True, index=True)
        print(' - '+setName+':', len(tempDF))
    
    t_elapsed = time.time() - t_start
    print(' - Elapsed time:', round(t_elapsed//60), 'min', round(t_elapsed%60, 1), 'sec')

### 4-3. Visualization: boxplot

#### 4-3-1. Reactions increased by MR but not CR

In [None]:
#Prepare the target reaction set
posL = ['MetR-vs-Con2', 'MetR-vs-CalR']
negL = ['CalR-vs-Con1']
regulation = 'Increased'
tempL = statDF.loc[statDF[variable+'_AdjPval']<0.05].index.tolist()
tempDF1 = pvalDF2.loc[tempL, pvalDF2.columns.str.contains('-vs-')]
tempDF2 = diffDF2.loc[tempL, diffDF2.columns.str.contains('-vs-')]
tempS = pd.Series(np.repeat(True, len(tempDF1)), index=tempDF1.index)#Initialize
for col_n in tempDF1.columns.tolist():
    tempS1 = tempDF1[col_n]
    tempS2 = tempDF2[col_n]
    if col_n in posL:
        if regulation=='Increased':
            tempS1 = (tempS1<0.05) & (tempS2>0)
        elif regulation=='Decreased':
            tempS1 = (tempS1<0.05) & (tempS2<0)
    elif col_n in negL:
        tempS3 = (tempS1>=0.05)
        #Significance for inverse regulation
        if regulation=='Increased':
            tempS1 = (tempS1<0.05) & (tempS2<0)
        elif regulation=='Decreased':
            tempS1 = (tempS1<0.05) & (tempS2>0)
        tempS1 = tempS3 | tempS1
    else:
        tempS1 = (tempS1>=0.0)
    #Update True
    tempS = tempS & tempS1
tempL = tempS.loc[tempS.tolist()].index.tolist()
print(len(tempL), regulation.lower()+' reactions with significance in', posL, 'but not in', negL)

#Select representatives
topX = np.min([30, len(tempL)])
topX_plot = np.min([5, len(tempL)])
tempDF = statDF2.loc[:, statDF2.columns.str.contains('_AdjPval$')]
tempDF = pd.merge(statDF[['ReactionName', variable+'_Pval', variable+'_AdjPval']], tempDF,
                  left_index=True, right_index=True, how='left')
tempDF = tempDF.loc[tempL].sort_values(by=variable+'_AdjPval', ascending=True)
print('Top', topX, 'reactions (sort by the main effect of '+variable+'):')
display(tempDF.iloc[:topX])
plotL = tempDF.index.tolist()[:topX_plot]

#Prepare DF for plot
tempDF = fluxDF.reset_index().melt(var_name='ModelID', value_name='Flux', id_vars='ReactionID')
tempDF1 = sampleDF.reset_index()[['ModelID', 'Intervention']]
tempDF = pd.merge(tempDF, tempDF1, on='ModelID', how='left')

#Prepare label and color
tempD0 = {'Con1':'Control 1', 'Acar':'Acarbose',
          '17aE':'17'+r'$\alpha$'+'-Estradiol', 'Prot':'Protandim',
          'Rapa':'Rapamycin', 'CalR':'CR diet',
          'Con2':'Control 2', 'MetR':'MR diet',
          'GHRw':'GHR WT', 'GHRk':'GHR KO',
          'SneW':'Snell WT', 'SneD':'Snell dwarf'}
tempDF['Group'] = tempDF['Intervention'].map(tempD0)
tempD1 = {'Control 1':'tab:blue', 'CR diet':'tab:olive',
          'Control 2':'tab:blue', 'MR diet':'tab:orange'}

#Select only the target groups
tempDF = tempDF.loc[tempDF['Group'].isin(tempD1.keys())]

#Visualize each representative
for rank_i in range(len(plotL)):
    print(' - Rank '+str(rank_i+1)+' (sort by the main effect of '+variable+'):')
    rxn = plotL[rank_i]
    #Check reaction summary
    tempDF1 = pd.DataFrame(rxnDF.loc[rxn]).T
    display(tempDF1)
    
    #Select Flux
    tempDF1 = tempDF.loc[tempDF['ReactionID']==rxn]
    
    #Check Flux summary
    tempDF2 = tempDF1.groupby(['Group'])['Flux'].agg(['count', 'mean', 'std'])
    tempL1 = []
    tempL2 = []
    for row_n in tempDF2.index.tolist():
        count, mean, std = tempDF2.loc[row_n]
        tempL1.append(mean - 1.96*std/np.sqrt(count))
        tempL2.append(mean + 1.96*std/np.sqrt(count))
    tempDF2['0.025'] = tempL1
    tempDF2['0.975'] = tempL2
    tempDF2 = tempDF2.loc[list(tempD1.keys())]#Sort
    display(tempDF2)
    
    #Prepare significance labels
    ##Retrieve statistical significance
    tempS = pvalDF2.loc[rxn, pvalDF2.columns.str.contains('-vs-')]
    tempS.name = 'AdjPval'
    ##Clean
    tempDF2 = tempS.index.to_series().str.split(pat='-vs-', expand=True)
    tempDF2 = tempDF2.rename(columns={0:'Contrast', 1:'Baseline'})
    tempDF2 = pd.merge(tempDF2, tempS, left_index=True, right_index=True, how='left')
    tempDF2['Contrast'] = tempDF2['Contrast'].map(tempD0)
    tempDF2['Baseline'] = tempDF2['Baseline'].map(tempD0)
    ##Convert p-value to label
    tempL = []
    for row_i in range(len(tempDF2)):
        pval = tempDF2['AdjPval'].iloc[row_i]
        if pval<0.001:
            tempL.append('***')
        elif pval<0.01:
            tempL.append('**')
        elif pval<0.05:
            tempL.append('*')
        else:
            pval_text = Decimal(str(pval)).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
            tempL.append(r'$P$ = '+str(pval_text))
    tempDF2['SignifLabel'] = tempL
    ##Add the y-position level in figure
    tempDF2['YposLevel'] = [0, 0, 1]
    display(tempDF2)
    
    #Visualization
    sns.set(style='ticks', font='Arial', context='talk')
    plt.figure(figsize=(2, 4))
    sns.boxplot(data=tempDF1, x='Group', y='Flux', order=list(tempD1.keys()), palette=tempD1,
                dodge=False, showfliers=False, showcaps=True, notch=False)
    p = sns.stripplot(data=tempDF1, x='Group', y='Flux',
                      order=list(tempD1.keys()), palette=tempD1, dodge=False, jitter=0.3,
                      size=5, edgecolor='black', linewidth=1, **{'marker':'o', 'alpha':0.5})
    ##Add border line
    for xcoord in [1.5]:
        p.axvline(x=xcoord, **{'linestyle':'dotted', 'color':'black', 'zorder':0})
    ##Set axis
    sns.despine()
    plt.setp(p.get_xticklabels(), rotation=70, horizontalalignment='right',
             verticalalignment='center', rotation_mode='anchor')
    ###Reset the axis range for the labels
    ymin, ymax = p.get_ylim()
    tempL = p.get_yticks().tolist()
    yinter = int(tempL[1]) - int(tempL[0])
    tempL = [int(y) for y in tempL if (y>ymin)&(y<ymax)]#seaborn seems to prepare wider range!?
    p.set(ylim=(ymin, ymax+(yinter*0.8)), yticks=tempL)
    p.set_yticklabels(['{:,}'.format(int(y)) for y in tempL])
    ##Add significance labels
    aline_ymin = tempDF1['Flux'].max()
    aline_ymargin = yinter/2
    for row_i in range(len(tempDF2)):
        #Baseline
        group_0 = tempDF2['Baseline'].iloc[row_i]
        index_0 = list(tempD1.keys()).index(group_0)
        xcoord_0 = index_0
        #Contrast
        group_1 = tempDF2['Contrast'].iloc[row_i]
        index_1 = list(tempD1.keys()).index(group_1)
        xcoord_1 = index_1
        #Standard point of marker
        xcoord = (xcoord_0+xcoord_1)/2
        ycoord = aline_ymin + aline_ymargin*tempDF2['YposLevel'].iloc[row_i]
        label = tempDF2['SignifLabel'].iloc[row_i]
        #Add annotation lines
        aline_offset = yinter/10
        aline_length = yinter/10 + aline_offset
        plt.plot([xcoord_0, xcoord_0, xcoord_1, xcoord_1],
                 [ycoord+aline_offset, ycoord+aline_length, ycoord+aline_length, ycoord+aline_offset],
                 lw=1.5, c='k')
        #Add annotation text
        if label in ['***', '**', '*']:
            text_offset = yinter/4
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='center',
                       fontsize='medium', color='k')
        else:
            text_offset = yinter/5
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='bottom',
                       fontsize='x-small', color='k')
    ##Set axis label and title
    plt.setp(p, xlabel='', ylabel='Sample flux value [a.u.]')
    p.set_title(rxn, {'fontsize':'medium'})
    ##Save
    fileDir = './ExportFigures/'
    ipynbName = '220610_LC-M001-related-transcriptomics-GEM-ver2-4_FluxAnalysis_'
    fileName = 'MR-vs-CR-boxplot('+rxn+').tif'
    plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04,
                      pil_kwargs={'compression':'tiff_lzw'})
    plt.show()
    print('')

#### 4-3-2. Reactions decreased by MR but not CR

In [None]:
#Prepare the target reaction set
posL = ['MetR-vs-Con2', 'MetR-vs-CalR']
negL = ['CalR-vs-Con1']
regulation = 'Decreased'
tempL = statDF.loc[statDF[variable+'_AdjPval']<0.05].index.tolist()
tempDF1 = pvalDF2.loc[tempL, pvalDF2.columns.str.contains('-vs-')]
tempDF2 = diffDF2.loc[tempL, diffDF2.columns.str.contains('-vs-')]
tempS = pd.Series(np.repeat(True, len(tempDF1)), index=tempDF1.index)#Initialize
for col_n in tempDF1.columns.tolist():
    tempS1 = tempDF1[col_n]
    tempS2 = tempDF2[col_n]
    if col_n in posL:
        if regulation=='Increased':
            tempS1 = (tempS1<0.05) & (tempS2>0)
        elif regulation=='Decreased':
            tempS1 = (tempS1<0.05) & (tempS2<0)
    elif col_n in negL:
        tempS3 = (tempS1>=0.05)
        #Significance for inverse regulation
        if regulation=='Increased':
            tempS1 = (tempS1<0.05) & (tempS2<0)
        elif regulation=='Decreased':
            tempS1 = (tempS1<0.05) & (tempS2>0)
        tempS1 = tempS3 | tempS1
    else:
        tempS1 = (tempS1>=0.0)
    #Update True
    tempS = tempS & tempS1
tempL = tempS.loc[tempS.tolist()].index.tolist()
print(len(tempL), regulation.lower()+' reactions with significance in', posL, 'but not in', negL)

#Select representatives
topX = np.min([30, len(tempL)])
topX_plot = np.min([5, len(tempL)])
tempDF = statDF2.loc[:, statDF2.columns.str.contains('_AdjPval$')]
tempDF = pd.merge(statDF[['ReactionName', variable+'_Pval', variable+'_AdjPval']], tempDF,
                  left_index=True, right_index=True, how='left')
tempDF = tempDF.loc[tempL].sort_values(by=variable+'_AdjPval', ascending=True)
print('Top', topX, 'reactions (sort by the main effect of '+variable+'):')
display(tempDF.iloc[:topX])
plotL = tempDF.index.tolist()[:topX_plot]

#Prepare DF for plot
tempDF = fluxDF.reset_index().melt(var_name='ModelID', value_name='Flux', id_vars='ReactionID')
tempDF1 = sampleDF.reset_index()[['ModelID', 'Intervention']]
tempDF = pd.merge(tempDF, tempDF1, on='ModelID', how='left')

#Prepare label and color
tempD0 = {'Con1':'Control 1', 'Acar':'Acarbose',
          '17aE':'17'+r'$\alpha$'+'-Estradiol', 'Prot':'Protandim',
          'Rapa':'Rapamycin', 'CalR':'CR diet',
          'Con2':'Control 2', 'MetR':'MR diet',
          'GHRw':'GHR WT', 'GHRk':'GHR KO',
          'SneW':'Snell WT', 'SneD':'Snell dwarf'}
tempDF['Group'] = tempDF['Intervention'].map(tempD0)
tempD1 = {'Control 1':'tab:blue', 'CR diet':'tab:olive',
          'Control 2':'tab:blue', 'MR diet':'tab:orange'}

#Select only the target groups
tempDF = tempDF.loc[tempDF['Group'].isin(tempD1.keys())]

#Visualize each representative
for rank_i in range(len(plotL)):
    print(' - Rank '+str(rank_i+1)+' (sort by the main effect of '+variable+'):')
    rxn = plotL[rank_i]
    #Check reaction summary
    tempDF1 = pd.DataFrame(rxnDF.loc[rxn]).T
    display(tempDF1)
    
    #Select Flux
    tempDF1 = tempDF.loc[tempDF['ReactionID']==rxn]
    
    #Check Flux summary
    tempDF2 = tempDF1.groupby(['Group'])['Flux'].agg(['count', 'mean', 'std'])
    tempL1 = []
    tempL2 = []
    for row_n in tempDF2.index.tolist():
        count, mean, std = tempDF2.loc[row_n]
        tempL1.append(mean - 1.96*std/np.sqrt(count))
        tempL2.append(mean + 1.96*std/np.sqrt(count))
    tempDF2['0.025'] = tempL1
    tempDF2['0.975'] = tempL2
    tempDF2 = tempDF2.loc[list(tempD1.keys())]#Sort
    display(tempDF2)
    
    #Prepare significance labels
    ##Retrieve statistical significance
    tempS = pvalDF2.loc[rxn, pvalDF2.columns.str.contains('-vs-')]
    tempS.name = 'AdjPval'
    ##Clean
    tempDF2 = tempS.index.to_series().str.split(pat='-vs-', expand=True)
    tempDF2 = tempDF2.rename(columns={0:'Contrast', 1:'Baseline'})
    tempDF2 = pd.merge(tempDF2, tempS, left_index=True, right_index=True, how='left')
    tempDF2['Contrast'] = tempDF2['Contrast'].map(tempD0)
    tempDF2['Baseline'] = tempDF2['Baseline'].map(tempD0)
    ##Convert p-value to label
    tempL = []
    for row_i in range(len(tempDF2)):
        pval = tempDF2['AdjPval'].iloc[row_i]
        if pval<0.001:
            tempL.append('***')
        elif pval<0.01:
            tempL.append('**')
        elif pval<0.05:
            tempL.append('*')
        else:
            pval_text = Decimal(str(pval)).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
            tempL.append(r'$P$ = '+str(pval_text))
    tempDF2['SignifLabel'] = tempL
    ##Add the y-position level in figure
    tempDF2['YposLevel'] = [0, 0, 1]
    display(tempDF2)
    
    #Visualization
    sns.set(style='ticks', font='Arial', context='talk')
    plt.figure(figsize=(2, 4))
    sns.boxplot(data=tempDF1, x='Group', y='Flux', order=list(tempD1.keys()), palette=tempD1,
                dodge=False, showfliers=False, showcaps=True, notch=False)
    p = sns.stripplot(data=tempDF1, x='Group', y='Flux',
                      order=list(tempD1.keys()), palette=tempD1, dodge=False, jitter=0.3,
                      size=5, edgecolor='black', linewidth=1, **{'marker':'o', 'alpha':0.5})
    ##Add border line
    for xcoord in [1.5]:
        p.axvline(x=xcoord, **{'linestyle':'dotted', 'color':'black', 'zorder':0})
    ##Set axis
    sns.despine()
    plt.setp(p.get_xticklabels(), rotation=70, horizontalalignment='right',
             verticalalignment='center', rotation_mode='anchor')
    ###Reset the axis range for the labels
    ymin, ymax = p.get_ylim()
    tempL = p.get_yticks().tolist()
    yinter = int(tempL[1]) - int(tempL[0])
    tempL = [int(y) for y in tempL if (y>ymin)&(y<ymax)]#seaborn seems to prepare wider range!?
    p.set(ylim=(ymin, ymax+(yinter*0.8)), yticks=tempL)
    p.set_yticklabels(['{:,}'.format(int(y)) for y in tempL])
    ##Add significance labels
    aline_ymin = tempDF1['Flux'].max()
    aline_ymargin = yinter/2
    for row_i in range(len(tempDF2)):
        #Baseline
        group_0 = tempDF2['Baseline'].iloc[row_i]
        index_0 = list(tempD1.keys()).index(group_0)
        xcoord_0 = index_0
        #Contrast
        group_1 = tempDF2['Contrast'].iloc[row_i]
        index_1 = list(tempD1.keys()).index(group_1)
        xcoord_1 = index_1
        #Standard point of marker
        xcoord = (xcoord_0+xcoord_1)/2
        ycoord = aline_ymin + aline_ymargin*tempDF2['YposLevel'].iloc[row_i]
        label = tempDF2['SignifLabel'].iloc[row_i]
        #Add annotation lines
        aline_offset = yinter/10
        aline_length = yinter/10 + aline_offset
        plt.plot([xcoord_0, xcoord_0, xcoord_1, xcoord_1],
                 [ycoord+aline_offset, ycoord+aline_length, ycoord+aline_length, ycoord+aline_offset],
                 lw=1.5, c='k')
        #Add annotation text
        if label in ['***', '**', '*']:
            text_offset = yinter/4
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='center',
                       fontsize='medium', color='k')
        else:
            text_offset = yinter/5
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='bottom',
                       fontsize='x-small', color='k')
    ##Set axis label and title
    plt.setp(p, xlabel='', ylabel='Sample flux value [a.u.]')
    p.set_title(rxn, {'fontsize':'medium'})
    ##Save
    fileDir = './ExportFigures/'
    ipynbName = '220610_LC-M001-related-transcriptomics-GEM-ver2-4_FluxAnalysis_'
    fileName = 'MR-vs-CR-boxplot('+rxn+').tif'
    plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04,
                      pil_kwargs={'compression':'tiff_lzw'})
    plt.show()
    print('')

# — End of notebook —