# DIRAC Analysis of LC M001 Liver Proteomics — DIRAC with GOBP Modules

***by Kengo Watanabe***  

This Jupyter Notebook (with Python 3 kernel) performed the differential rank conservation (DIRAC; Eddy, J.A. et al. PLoS Comput. Biol. 2010) analysis on the preprocessed Longevity Consortium (LC) M001 proteomics dataset (adjusted with sex and age; analytes detected in all samples; sample-based robust Z-score followed by analyte-based robust Z-score) using the retrieved a priori module set (Gene Ontology (Biological Process) derived by EMBL-EBI QuickGO API; ≥4 analytes and ≥50% coverage).  

Input files:  
- Preprocessed analyte data: 230213_LC-M001-proteomics-DIRAC-ver7-2_Preprocessing_normalized-data.tsv  
- Module–analyte metadata: 220427_LCproteomics-M001-DIRAC-ver6-2_Preprocessing_module-metadata_QuickGO-GOBP-min-n4-cov50.tsv  
- Analyte metadata: 220427_LCproteomics-M001-DIRAC-ver6-2_Preprocessing_analyte-metadata_UniProt.tsv  
- Sample–mouse metadata: 230213_LC-M001-proteomics-DIRAC-ver7-2_Preprocessing_sample-metadata.tsv  

Output figures and tables:  
- Figure 2  
- Supplementary Figure 1  
- Supplementary Data 1  

Original notebook (memo for my future tracing):  
- dalek:\[JupyterLab HOME\]/230206_LC-M001-proteomics-DIRAC-ver7/230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP.ipynb  

In [1]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
#For Arial font
#!conda install -c conda-forge -y mscorefonts
##-> The below was also needed in matplotlib 3.4.2
#import shutil
#import matplotlib
#shutil.rmtree(matplotlib.get_cachedir())
import warnings
warnings.filterwarnings('ignore')
from IPython.display import display
import time
#For exporting .pdf file with editable text
import matplotlib
matplotlib.rcParams['pdf.fonttype']=42
matplotlib.rcParams['ps.fonttype']=42

from itertools import combinations
import math
from multiprocessing import Pool
import statsmodels.formula.api as smf
from statsmodels.stats.anova import anova_lm
from statsmodels.stats import weightstats
from statsmodels.stats import multitest as multi
from decimal import Decimal, ROUND_HALF_UP
import re
import matplotlib.patches as mpatches
#!pip install venn
from venn import venn
from textwrap import wrap

!conda list

# packages in environment at /opt/conda/envs/arivale-py3:
#
# Name                    Version                   Build  Channel
_libgcc_mutex             0.1                 conda_forge    conda-forge
_openmp_mutex             4.5                       1_gnu    conda-forge
analytics                 0.1                      pypi_0    pypi
argon2-cffi               21.1.0           py39h3811e60_0    conda-forge
arivale-data-interface    0.1.0                    pypi_0    pypi
async_generator           1.10                       py_0    conda-forge
atk-1.0                   2.36.0               h3371d22_4    conda-forge
attrs                     21.2.0             pyhd8ed1ab_0    conda-forge
backcall                  0.2.0              pyh9f0ad1d_0    conda-forge
backports                 1.0                        py_2    conda-forge
backports.functools_lru_cache 1.6.4              pyhd8ed1ab_0    conda-forge
biopython                 1.79             py39h3811e60_0    conda-forge
bleach 

## 0. DIRAC code

> The original code for DIRAC was written in MATLAB. Hence, it is re-written in Python 3 here.  
> <– I don't care about computational cost here; rather, the code adheres to the story in the original paper.  

In [None]:
def network_ranking(DF, networkS):
    # This function calculates the pairwise ordering of network genes (i.e., network ranking).
    ## Ref. Eddy, J. A. et al. PLoS Comput. Biol. 2010 (Figure 1 at a glance)
    # Requirements:
    ## import numpy as np: confirmed with versions 1.17.5 and 1.21.1
    ## import pandas as pd: confirmed with versions 0.25.3 and 1.3.1
    ## from itertools import combinations: confirmed with Python 3.7.6 and 3.9.6
    # Input:
    ## DF: pd.DataFrame containing expression values (X_gn) with gene (g; 1-G) indices and sample (n; 1-N) columns
    ## networkS: pd.Series containing genes (g; 1-G) with network (m; 1-M) indices (i.e., 1-on-1 long-format)
    # Output:
    ## pd.DataFrame containing binary values of the networking ranking comparison for each sample
    ## -> row: comparison_id (indicated by network (m) and ordering (g_i < g_j) columns)
    ## -> column: sample (n) (with exception of the NetworkID and Ordering columns)
    # Note:
    ## If items in network m and gene g contain ' : ' or ' < ', this code would produce error or unexpected output.
    ## If an item in sample n is 'NetworkID' or 'Ordering', this code would produce error or unexpected output.
    
    #Calculate binary values of the network ranking comparison for each sample
    rankDF = pd.DataFrame()
    sampleL = DF.columns.tolist()
    networkL = networkS.index.unique()
    for n in sampleL:
        rankDF_n = pd.DataFrame()
        for m in networkL:
            #Pairs of genes (g_i, g_j) in the network m
            networkS_m = networkS.loc[m]
            pairL_m = list(combinations(range(0, len(networkS_m)), 2))
            pairL_m_i, pairL_m_j = [[pair[x] for pair in pairL_m] for x in (0,1)]
            rankDF_m = pd.DataFrame({'g_i':networkS_m.iloc[pairL_m_i],
                                     'g_j':networkS_m.iloc[pairL_m_j]})#Hold network (m; 1-M) index
            #Compare the expression values (X_gn) between pairwise genes (g_i vs. g_j)
            tempL = []
            for pair_i in range(0, len(rankDF_m)):
                g_i = rankDF_m.iloc[pair_i, 0]
                g_j = rankDF_m.iloc[pair_i, 1]
                X_i = DF.loc[g_i, n]
                X_j = DF.loc[g_j, n]
                #If X_i < X_j is true, add 1; otherwise (X_i >= X_j), add 0
                if X_i < X_j:
                    tempL.append(1)
                else:
                    tempL.append(0)
            rankDF_m['X_i<X_j'] = tempL
            #Update the network ranking dataframe of sample n
            rankDF_m.index.set_names('NetworkID', inplace=True)#Set/reset index name
            rankDF_m = rankDF_m.reset_index()
            rankDF_n = pd.concat([rankDF_n, rankDF_m], axis=0)
        #Updata the network ranking dataframe of all samples
        rankDF_n['Sample'] = n
        rankDF = pd.concat([rankDF, rankDF_n], axis=0)
    ##Prepare dummy index and clean dataframe
    rankDF['ComparisonID'] = rankDF['NetworkID'] + ' : ' + rankDF['g_i'] + ' < ' + rankDF['g_j']
    rankDF = rankDF.pivot(index='ComparisonID', columns='Sample', values='X_i<X_j')#Sorted by index during this
    rankDF = rankDF.reset_index()#Index becomes row number here
    tempDF = rankDF['ComparisonID'].str.split(pat=' : ', expand=True)
    tempDF = tempDF.rename(columns={0:'NetworkID', 1:'Ordering'})
    rankDF = pd.concat([tempDF, rankDF], axis=1)#Dropping columns name 'Sample' during this
    rankDF = rankDF.drop(columns='ComparisonID')
    return rankDF

def rank_template(rankDF, phenotypeS):
    # This function generates the rank template (T) presenting the expected network ranking in a phenotype.
    ## Ref. Eddy, J. A. et al. PLoS Comput. Biol. 2010 (Figure 1 at a galance)
    # Requirements:
    ## import numpy as np: confirmed with versions 1.17.5 and 1.21.1
    ## import pandas as pd: confirmed with versions 0.25.3 and 1.3.1
    # Input:
    ## rankDF: pd.DataFrame obtained from the above network_ranking() function
    ## phenotypeS: pd.Series containing phenotypes (k; 1-K) with sample (n; 1-N) indices (i.e., 1-on-1 long-format)
    # Output:
    ## pd.DataFrame containing the expected binary values of network ranking comparison for each phenotype (T_mk)
    ## -> row: comparison_id (indicated by network (m) and ordering (g_i < g_j) columns)
    ## -> column: phenotype (k) (with exception of the NetworkID and Ordering columns)
    # Note:
    ## True rate = 0.5 is assigned to 0 in this code.
    ## If an item in phenotype k is 'NetworkID' or 'Ordering', this code would produce error or unexpected output.
    
    #Calculate the expected binary values of network ranking comparison for each phenotype (T_mk)
    templateDF = rankDF[['NetworkID', 'Ordering']]
    phenotypeL = phenotypeS.unique().tolist()
    for k in phenotypeL:
        sampleL_k = phenotypeS.loc[phenotypeS==k].index.tolist()
        tempDF = rankDF[sampleL_k]
        tempS = tempDF.mean(axis=1)#True (=1) rate
        tempS = (tempS>0.5).astype('int64')#If true rate > 0.5, 1; otherwise (<= 0.5), 0
        templateDF[k] = tempS
    return templateDF

def rank_matching_score(rankDF, templateDF):
    # This function calculates the rank matching score (R) of a sample.
    ## Ref. Eddy, J. A. et al. PLoS Comput. Biol. 2010 (Figure 1 at a galance)
    # Requirements:
    ## import numpy as np: confirmed with versions 1.17.5 and 1.21.1
    ## import pandas as pd: confirmed with versions 0.25.3 and 1.3.1
    # Input:
    ## rankDF: pd.DataFrame obtained from the above network_ranking() function
    ## templateDF: pd.DataFrame obtained from the above rank_template() function
    # Output:
    ## pd.DataFrame containing the rates of gene pairs matching to a rank template in a sample (R_mkn)
    ## -> row: rank template (T_mk) (indicated by network (m) and template phenotype (k) columns)
    ## -> column: sample (n) (with exception of the NetworkID and Template columns)
    # Note:
    ## True rate = 0.5 was assigned to 0 in the above rank_template() function.
    ## -> 'Match (1)' and 'Mismatch (0)' are evenly assigned to samples in the phenotype.
    ##    (i.e., 'Match' for (X_i < X_j) = 0 and 'Mismatch' for (X_i < X_j) = 1 in the tie case)
    ## If items in network m and phenotype k contain ' : ', this code would produce error or unexpected output.
    
    #Calculate the rates of gene pairs matching to a rank template in a sample (R_mkn)
    scoreDF = pd.DataFrame()
    sampleL = rankDF.drop(columns=['NetworkID', 'Ordering']).columns.tolist()
    phenotypeL = templateDF.drop(columns=['NetworkID', 'Ordering']).columns.tolist()
    for n in sampleL:
        scoreDF_n = pd.DataFrame()
        for k_template in phenotypeL:
            tempDF = rankDF[['NetworkID', 'Ordering']]
            tempS = (rankDF[n]==templateDF[k_template]).astype('int64')#If matching, 1; otherwise, 0
            tempDF['Match'] = tempS
            #Calculate the rank matching score
            scoreDF_k = tempDF.groupby(by='NetworkID', as_index=False, sort=False).mean()
            #Update the rank matching score dataframe of sample n
            scoreDF_k['Template'] = k_template
            scoreDF_k = scoreDF_k[['NetworkID', 'Template', 'Match']]
            scoreDF_n = pd.concat([scoreDF_n, scoreDF_k], axis=0)
        #Update the rank matching score dataframe of all samples
        scoreDF_n['Sample'] = n
        scoreDF = pd.concat([scoreDF, scoreDF_n], axis=0)
    ##Prepare dummy index and clean dataframe
    scoreDF['RMSmkID'] = scoreDF['NetworkID'].str.cat(scoreDF['Template'], sep=' : ')
    scoreDF = scoreDF.pivot(index='RMSmkID', columns='Sample', values='Match')#Sorted by index during this
    scoreDF = scoreDF.reset_index()#Index becomes row number here
    tempDF = scoreDF['RMSmkID'].str.split(pat=' : ', expand=True)
    tempDF = tempDF.rename(columns={0:'NetworkID', 1:'Template'})
    scoreDF = pd.concat([tempDF, scoreDF], axis=1)#Dropping columns name 'Sample' during this
    scoreDF = scoreDF.drop(columns='RMSmkID')
    return scoreDF

def rank_conservation_index(scoreDF, phenotypeS):
    # This function calculates the rank conservation index (muR) of a phenotype.
    ## Ref. Eddy, J. A. et al. PLoS Comput. Biol. 2010 (Figure 1 at a galance)
    # Requirements:
    ## import numpy as np: confirmed with versions 1.17.5 and 1.21.1
    ## import pandas as pd: confirmed with versions 0.25.3 and 1.3.1
    # Input:
    ## scoreDF: pd.DataFrame obtained from the above rank_matching_score() function
    ## phenotypeS: pd.Series containing phenotypes (k; 1-K) with sample (n; 1-N) indices (i.e., 1-on-1 long-format)
    # Output:
    ## pd.DataFrame containing the mean values of rank matching scores (RMSs) in a phenotype (muR_mkk)
    ## -> row: rank template (T_mk) (indicated by network (m) and template phenotype (k) columns)
    ## -> column: phenotype (k) (with exception of the NetworkID and Template columns)
    # Note:
    ## Rank conservation index (RCI) is used with a broader stance here, but following a strict interpretation,
    ## the term RCI is used for the mean of RMSs only when template phenotype is same with the sample phenotype.
    ## If an item in phenotype k is 'NetworkID' or 'Template', this code would produce error or unexpected output.
    
    #Calculate the mean values of RMSs in a phenotype (muR_mkk)
    conservationDF = scoreDF[['NetworkID', 'Template']]
    phenotypeL = phenotypeS.unique().tolist()
    for k in phenotypeL:
        sampleL_k = phenotypeS.loc[phenotypeS==k].index.tolist()
        tempS = scoreDF[sampleL_k].mean(axis=1)
        conservationDF[k] = tempS
    return conservationDF

## 1. Prepare dataset and metadata

### 1-1. Analyte data

In [None]:
#Import analyte data
fileDir = './ExportData/'
ipynbName = '230213_LC-M001-proteomics-DIRAC-ver7-2_Preprocessing_'
fileName = 'normalized-data.tsv'
tempDF = pd.read_csv(fileDir+ipynbName+fileName, sep='\t').set_index('UniProtID')
display(tempDF)

analyteDF = tempDF

### 1-2. Module–analyte metadata

In [None]:
#Import module-analyte metadata
fileDir = '../210126_LCproteomics-M001-DIRAC-ver6/ExportData/'
ipynbName = '220427_LCproteomics-M001-DIRAC-ver6-2_Preprocessing_'
fileName = 'module-metadata_QuickGO-GOBP-min-n4-cov50.tsv'
tempDF = pd.read_csv(fileDir+ipynbName+fileName, sep='\t').set_index('ModuleID')
print(' - Unique analytes with module:', len(tempDF['UniProtID'].unique()))
print(' - Unique modules with analytes:', len(tempDF.index.unique()))

#Prepare moduleS
moduleS = tempDF['UniProtID']
display(moduleS)

#Retrieve module metadata
tempDF = tempDF[['ModuleName', 'nAnalytes', 'nBackgrounds', 'Coverage']]
moduleDF = tempDF.reset_index().drop_duplicates(keep='first').set_index('ModuleID')
display(moduleDF)
display(moduleDF.describe(include='all'))

> –> Add the mapped analytes to module metadata.  

In [None]:
#Import analyte metadata
fileDir = '../210126_LCproteomics-M001-DIRAC-ver6/ExportData/'
ipynbName = '220427_LCproteomics-M001-DIRAC-ver6-2_Preprocessing_'
fileName = 'analyte-metadata_UniProt.tsv'
tempDF = pd.read_csv(fileDir+ipynbName+fileName, sep='\t')
tempDF = pd.merge(moduleS.reset_index(), tempDF, on='UniProtID', how='left').set_index('ModuleID')
display(tempDF)
print(' - Unique analytes:', len(tempDF['UniProtID'].unique()))
print(' - Unique labels:', len(tempDF['GeneLabel'].unique()))

#Concatenate labels
t_start = time.time()
moduleDF['MappedAnalyteIDs'] = ''#Initialize
moduleDF['MappedAnalyteGeneLabels'] = ''#Initialize
for module in moduleDF.index.tolist():
    tempS = tempDF['UniProtID'].loc[tempDF.index.isin([module])]#Retrieve as pd.Series
    label = tempS.str.cat(sep=';')
    moduleDF.loc[module, 'MappedAnalyteIDs'] = label
    tempS = tempDF['GeneLabel'].loc[tempDF.index.isin([module])]#Retrieve as pd.Series
    label = tempS.str.cat(sep=';')
    moduleDF.loc[module, 'MappedAnalyteGeneLabels'] = label
t_elapsed = time.time() - t_start
print('Elapsed time:', round(t_elapsed//60), 'min', round(t_elapsed%60, 1), 'sec')

#Clean
moduleDF['ModuleType'] = 'Gene Ontology (Biological Process)'
moduleDF['Source'] = 'EMBL-EBI QuickGO API'
moduleDF = moduleDF[['ModuleName', 'ModuleType', 'MappedAnalyteIDs', 'MappedAnalyteGeneLabels',
                     'nAnalytes', 'nBackgrounds', 'Coverage', 'Source']]
moduleDF = moduleDF.sort_index(ascending=True)

#Save
fileDir = './ExportData/'
ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
fileName = 'module-metadata.tsv'
moduleDF.to_csv(fileDir+ipynbName+fileName, index=True, sep='\t')

display(moduleDF)

### 1-3. Sample–mouse metadata

In [None]:
#Import sample-mouse metadata
fileDir = './ExportData/'
ipynbName = '230213_LC-M001-proteomics-DIRAC-ver7-2_Preprocessing_'
fileName = 'sample-metadata.tsv'
tempDF = pd.read_csv(fileDir+ipynbName+fileName, sep='\t')
tempDF = tempDF.loc[tempDF['SampleID'].isin(analyteDF.columns)]
tempDF = tempDF.set_index('SampleID')

#Prepare phenotypeS
tempDF['Phenotype'] = tempDF['Intervention']#Sex-pooled
display(tempDF)
display(tempDF['Phenotype'].value_counts())

sampleDF = tempDF

## 2. Perform DIRAC with sex-pooled rank consensus

> When applied to overall dataset, the DIRAC code often stops runnning due to memory limit (e.g., transcriptomics).  
> –> Divide moduleS into subsets while considering the number of comparisons, and compute DIRAC in parallel.  

In [None]:
#Divide moduleS
cutoff = 250#The maximum number of analytes per module
tempDF = moduleDF.sort_values(by='nAnalytes', ascending=True)
nwsubL = []
tempL = []#Initialize
count = 0#Initialize
for module in tempDF.index.tolist():
    nanalytes = tempDF.loc[module, 'nAnalytes']
    if nanalytes>cutoff:
        nwsubL.append([module])
    else:
        tempL.append(module)
        count += nanalytes
        if count>cutoff:
            nwsubL.append(tempL)
            tempL = []#Initialize
            count = 0#Initialize
if len(tempL)>0:#The last one but still count<=cutoff
    nwsubL.append(tempL)
nSub = len(nwsubL)
print('nSublists: ', nSub)
print('nModules per sublist:', [len(sublist) for sublist in nwsubL])

#Check nAnalytes distribution
tempS = pd.Series(name='Subset')
for list_i in range(nSub):
    tempL = moduleS.loc[nwsubL[list_i]].index.unique().tolist()
    tempS1 = pd.Series(np.repeat('Subset '+str(list_i+1).zfill(3), len(tempL)), index=tempL, name='Subset')
    tempS = pd.concat([tempS, tempS1], axis=0)
tempS.index.set_names('ModuleID', inplace=True)
tempDF = pd.merge(moduleDF, tempS, left_index=True, right_index=True, how='left')
tempDF = tempDF.sort_values(by='nAnalytes', ascending=False)
display(tempDF)
tempDF = tempDF.groupby('Subset').agg({'nAnalytes':[len, sum, max, np.median]})
tempDF = tempDF.sort_values(by=('nAnalytes', 'sum'), ascending=False)
print('Subset summary:')
display(tempDF.describe())
print(' -> Check subset which will need high computational cost:')
display(tempDF.loc[tempDF[('nAnalytes', 'max')]>cutoff])

In [None]:
nprocessors = 4
fileDir = './ExportData/'
ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'

#Wrap as a single function
def parallel_dirac(list_i):
    tempS = moduleS.loc[nwsubL[list_i]]
    
    #Calculate the pairwise ordering of network genes (i.e., network ranking)
    t_start1 = time.time()
    rankingDF = network_ranking(analyteDF, tempS)
    t_elapsed1 = time.time() - t_start1
    
    #Generate the rank template (T) presenting the expected network ranking in a phenotype
    t_start2 = time.time()
    templateDF = rank_template(rankingDF, sampleDF['Phenotype'])
    t_elapsed2 = time.time() - t_start2
    
    #Calculate the rank matching score (R) of a sample
    t_start3 = time.time()
    rmsDF = rank_matching_score(rankingDF, templateDF)
    t_elapsed3 = time.time() - t_start3
    
    #Calculate the rank conservation index (muR) of a phenotype
    t_start4 = time.time()
    rciDF = rank_conservation_index(rmsDF, sampleDF['Phenotype'])
    t_elapsed4 = time.time() - t_start4
    
    #Save
    fileName = 'NetworkRanking-'+str(list_i+1).zfill(3)+'.tsv'
    rankingDF.to_csv(fileDir+ipynbName+fileName, index=False, sep='\t')
    fileName = 'RankTemplate-BS-'+str(list_i+1).zfill(3)+'.tsv'
    templateDF.to_csv(fileDir+ipynbName+fileName, index=False, sep='\t')
    fileName = 'RankMatchingScore-BS-'+str(list_i+1).zfill(3)+'.tsv'
    rmsDF.to_csv(fileDir+ipynbName+fileName, index=False, sep='\t')
    fileName = 'RankConservationIndex-BS-'+str(list_i+1).zfill(3)+'.tsv'
    rciDF.to_csv(fileDir+ipynbName+fileName, index=False, sep='\t')
    
    #Check results
    print('Subset '+str(list_i+1).zfill(3)+':',
          len(tempS.index.unique()), 'modules,', len(tempS.unique()), 'analytes')
    print(' - Network ranking dataframe:')
    print('   - DF shape:', rankingDF.shape)
    print('   - Elapsed time:', round(t_elapsed1//60), 'min', round(t_elapsed1%60, 1), 'sec')
    print(' - Rank template dataframe:')
    print('   - DF shape:', templateDF.shape)
    print('   - Elapsed time:', round(t_elapsed2//60), 'min', round(t_elapsed2%60, 1), 'sec')
    print(' - Rank matching score dataframe:')
    print('   - DF shape:', rmsDF.shape)
    print('   - Elapsed time:', round(t_elapsed3//60), 'min', round(t_elapsed3%60, 1), 'sec')
    print(' - Rank conservation index dataframe:')
    print('   - DF shape:', rciDF.shape)
    print('   - Elapsed time:', round(t_elapsed4//60), 'min', round(t_elapsed4%60, 1), 'sec')
    print('')

#Parallel computing
if __name__=='__main__':
    t_start = time.time()
    p = Pool(nprocessors)
    p.map(parallel_dirac, range(nSub))
    t_finish = time.time()

In [None]:
#Record as reference
print(nSub, 'sublists with', nprocessors, 'processors')
print(' - Start:', time.ctime(t_start))
print(' - Finish:', time.ctime(t_finish))
t_elapsed = (t_finish - t_start)
print(' - Elapsed time:', round(t_elapsed//60), 'min', round(t_elapsed%60, 1), 'sec')
t_elapsed = (t_finish - t_start) * nprocessors
print(' - Total (approximate) elapsed time:', round(t_elapsed//60), 'min', round(t_elapsed%60, 1), 'sec')

In [None]:
#Combine each result
rmsDF = pd.DataFrame()
rciDF = pd.DataFrame()
for list_i in range(nSub):
    fileName = 'RankMatchingScore-BS-'+str(list_i+1).zfill(3)+'.tsv'
    tempDF = pd.read_csv(fileDir+ipynbName+fileName, sep='\t')
    rmsDF = pd.concat([rmsDF, tempDF], axis=0, ignore_index=True)
    
    fileName = 'RankConservationIndex-BS-'+str(list_i+1).zfill(3)+'.tsv'
    tempDF = pd.read_csv(fileDir+ipynbName+fileName, sep='\t')
    rciDF = pd.concat([rciDF, tempDF], axis=0, ignore_index=True)

#Clean
tempD1 = {'RMS':rmsDF, 'RCI':rciDF}
tempD2 = {}
for metric in tempD1.keys():
    tempDF = tempD1[metric]
    tempDF = tempDF.rename(columns={'NetworkID':'ModuleID'})
    tempD2[metric] = tempDF
    
    print(metric+' dataframe:', tempDF.shape)
    print(' - Unique modules:', len(tempDF['ModuleID'].unique()))
    print(' - Unique templates:', len(tempDF['Template'].unique()))
    display(tempDF)
    print('')
##Update
rmsDF = tempD2['RMS']
rciDF = tempD2['RCI']

#Save
fileName = 'RankMatchingScore-BS-combined.tsv'
rmsDF.to_csv(fileDir+ipynbName+fileName, index=False, sep='\t')
fileName = 'RankConservationIndex-BS-combined.tsv'
rciDF.to_csv(fileDir+ipynbName+fileName, index=False, sep='\t')

## 3. Rank conservation index: general pattern

### 3-1. Extract RCI (the mean of RMSs under the own phenotype consensus)

In [None]:
#Extract RCI whose template phenotype corresponds to the own phenotype
phenotypeL = rciDF.drop(columns=['ModuleID', 'Template']).columns.tolist()
rciDF_kk = pd.DataFrame(index=pd.Index(rciDF['ModuleID'].unique(), name='ModuleID'))
tempDF = rciDF.set_index('ModuleID')
for k in phenotypeL:
    tempS = tempDF[k].loc[tempDF['Template']==k]
    rciDF_kk = pd.merge(rciDF_kk, tempS, left_index=True, right_index=True, how='left')
##Sort
tempL = ['Ctrl', 'Aca', '17aE2', 'Rapa']
rciDF_kk = rciDF_kk[tempL]
display(rciDF_kk)
display(rciDF_kk.describe())

### 3-2. Mann–Whitney U-test

> Note that the scipy API (scipy.stats.mannwhitneyu) is used, because only the one-sided test seems implemented in the current statsmodels API (statsmodels.stats.nonparametric.rank_compare_2indep). Actually, the output objects are same b/w the two APIs, which is contrast to the case of t-test (degrees of freedom is not reported in the scipy API).  

In [None]:
tempDF = rciDF_kk
tempL = ['Ctrl', 'Aca', '17aE2', 'Rapa']
control = 'Ctrl'

#Statistical tests
tempDF1 = pd.DataFrame(columns=['Ustat', 'Pval'])
for contrast in tempL:
    if control!=contrast:
        tempS1 = tempDF[control]
        tempS2 = tempDF[contrast]
        #Two-sided Mann–Whitney U-test
        ustat, pval = stats.mannwhitneyu(tempS2, tempS1,#U-statistic corresponds to the contrast
                                         use_continuity=True, alternative='two-sided', method='auto')
        tempDF1.loc[contrast+'-vs-'+control] = [ustat, pval]
##P-value adjustment by using Benjamini–Hochberg method
tempDF1['AdjPval'] = multi.multipletests(tempDF1['Pval'], alpha=0.05, method='fdr_bh',
                                         is_sorted=False, returnsorted=False)[1]
tempDF1.index.rename('ComparisonLabel', inplace=True)
display(tempDF1)

#Calculate general statistics
tempDF2 = pd.DataFrame(columns=['N', 'RCImedian', 'RCImad'])
for group in tempL:
    tempS = tempDF[group]
    size = len(tempS)
    median = tempS.median()
    mad = stats.median_absolute_deviation(tempS)#Cf. pd.Series.mad() is not median absolute deviation but mean absolute deviation
    tempDF2.loc[group] = [size, median, mad]
tempDF2.index.rename('GroupLabel', inplace=True)
display(tempDF2)

#Clean
##Reformat while renaming column names
tempD1 = {'Comparison':tempDF1, 'Group':tempDF2}
tempD2 = {}
for target in tempD1.keys():
    tempDF3 = tempD1[target]
    tempL1 = tempDF3.index.tolist()#For sorting later
    tempL2 = tempDF3.columns.tolist()#For sorting later
    tempDF3 = tempDF3.reset_index().melt(var_name='Variable', value_name='Value', id_vars=target+'Label')
    tempDF3['Variable'] = tempDF3[target+'Label']+'_'+tempDF3['Variable']
    tempDF3['ModuleID'] = 'All'#Dummy
    tempDF3 = tempDF3.pivot(index='ModuleID', columns='Variable', values='Value')
    tempDF3.columns.name = None#Erase 'Variable'
    tempL3 = [label+'_'+variable for label in tempL1 for variable in tempL2]
    tempDF3 = tempDF3[tempL3]#Sort
    tempD2[target] = tempDF3
##Merge
tempDF3 = pd.merge(tempD2['Group'], tempD2['Comparison'], left_index=True, right_index=True, how='inner')
##Convert data type
for col_n in tempDF3.columns.tolist():
    if re.search('_N$', col_n):
        tempDF3[col_n] = tempDF3[col_n].astype(int)
#Add dummy module name
tempL1 = tempDF3.columns.tolist()#For sorting later
tempDF3['ModuleName'] = 'General pattern'
tempL2 = [col_n for sublist in [['ModuleName'], tempL1] for col_n in sublist]
tempDF3 = tempDF3[tempL2]
display(tempDF3)

#Save
fileDir = './ExportData/'
ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
fileName = 'inter-group-comparison_RCIdistribution.tsv'
tempDF3.to_csv(fileDir+ipynbName+fileName, sep='\t', index=True)

statDF = tempDF3

### 3-3. Visualization: boxplot

In [None]:
#Prepare DF and color
tempD0 = {'Ctrl':'Control', 'Aca':'Acarbose',
          '17aE2':'17'+r'$\alpha$'+'-Estradiol', 'Rapa':'Rapamycin'}
tempDF = rciDF_kk.rename(columns=tempD0)
tempDF = tempDF.reset_index().melt(var_name='Group', value_name='RCI', id_vars='ModuleID')
tempD = {'Control':'tab:blue', 'Acarbose':'tab:red',
         '17'+r'$\alpha$'+'-Estradiol':'tab:green', 'Rapamycin':'tab:purple'}

#Prepare significance labels
##Retrieve statistical significance
module = 'All'
tempS = statDF.loc[module, statDF.columns.str.contains('AdjPval')]
tempS.index = tempS.index.str.replace('_AdjPval', '')
tempS.name = 'AdjPval'
##Clean
tempDF1 = tempS.index.to_series().str.split(pat='-vs-', expand=True)
tempDF1 = tempDF1.rename(columns={0:'Contrast', 1:'Baseline'})
tempDF1 = pd.merge(tempDF1, tempS, left_index=True, right_index=True, how='left')
tempDF1['Contrast'] = tempDF1['Contrast'].map(tempD0)
tempDF1['Baseline'] = tempDF1['Baseline'].map(tempD0)
##Convert p-value to label
tempL = []
for row_i in range(len(tempDF1)):
    pval = tempDF1['AdjPval'].iloc[row_i]
    if pval<0.001:
        tempL.append('***')
    elif pval<0.01:
        tempL.append('**')
    elif pval<0.05:
        tempL.append('*')
    else:
        pval_text = Decimal(str(pval)).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
        tempL.append(r'$P$ = '+str(pval_text))
tempDF1['SignifLabel'] = tempL
display(tempDF1)

#Visualization
ymax = 1.0
ymin = 0.5
yinter = 0.1
ymargin_t = 0.06
ymargin_b = 0.01
aline_ymin = 0.9
aline_ymargin = 0.05
sns.set(style='ticks', font='Arial', context='talk')
plt.figure(figsize=(2, 4))
p = sns.boxplot(data=tempDF, y='RCI', x='Group', order=list(tempD.keys()), palette=tempD, dodge=False,
                showfliers=True, flierprops={'marker':'o', 'markerfacecolor':'gray', 'alpha':0.4},
                showcaps=True, notch=True)
p.set(ylim=(ymin-ymargin_b, ymax+ymargin_t), yticks=np.arange(ymin, ymax + yinter/10, yinter))
##Add significance labels
lines = p.axes.get_lines()#Line2D: [[Q1, Q1-1.5IQR], [Q3, Q3+1.5IQR], [Q1, Q1], [Q3, Q3], [Med, Med], [flier]]
lines_unit = 5 + int(True)#showfliers=True
for row_i in range(len(tempDF1)):
    #Baseline
    group_0 = tempDF1['Baseline'].iloc[row_i]
    index_0 = list(tempD.keys()).index(group_0)
    whisker_0 = lines[index_0*lines_unit + 1]
    xcoord_0 = whisker_0._x[1]#Q3+1.5IQR
    #ycoord_0 = whisker_0._y[1]#Q3+1.5IQR
    #Contrast
    group_1 = tempDF1['Contrast'].iloc[row_i]
    index_1 = list(tempD.keys()).index(group_1)
    whisker_1 = lines[index_1*lines_unit + 1]
    xcoord_1 = whisker_1._x[1]#Q3+1.5IQR
    #ycoord_1 = whisker_1._y[1]#Q3+1.5IQR
    #Standard point of marker
    xcoord = (xcoord_0+xcoord_1)/2
    #ycoord = max(ycoord_0, ycoord_1)
    ycoord = aline_ymin + aline_ymargin*row_i
    label = tempDF1['SignifLabel'].iloc[row_i]
    #Add annotation lines
    aline_offset = yinter/10
    aline_length = yinter/10 + aline_offset
    plt.plot([xcoord_0, xcoord_0, xcoord_1, xcoord_1],
             [ycoord+aline_offset, ycoord+aline_length, ycoord+aline_length, ycoord+aline_offset],
             lw=1.5, c='k')
    #Add annotation text
    if label in ['***', '**', '*']:
        text_offset = yinter/25
        p.annotate(label, xy=(xcoord, ycoord+text_offset),
                   horizontalalignment='center', verticalalignment='bottom',
                   fontsize='medium', color='k')
    else:
        text_offset = yinter/5
        p.annotate(label, xy=(xcoord, ycoord+text_offset),
                   horizontalalignment='center', verticalalignment='bottom',
                   fontsize='x-small', color='k')
sns.despine()
plt.xlabel('')
plt.ylabel('Module RCI')
plt.xticks(rotation=70, horizontalalignment='right', verticalalignment='center', rotation_mode='anchor')
##Save
fileDir = './ExportFigures/'
ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
fileName = 'RCI-boxplot.pdf'
plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04, transparent=True)
plt.show()

## 4. Rank conservation index: inter-group module comparison

> Test specific hypothesis: control RCI == intervention RCI (i.e., inter-group module comparison).  
> 1. Testing the main effect of intervention on rank mathing scores (RMSs) for each module using ANOVA model  
> 2. Then, performing post-hoc comparisons of RMSs between control vs. each intervention using Welch's t-tests  
>  
> Since RCI is the mean of RMSs under the own phenotype consensus, not RCI but RMS is practically used to obtain variance around group consensus. As well as using sex-pooled rank consensus, sex is NOT included in the ANOVA model. Note that the dataset was already adjusted with sex and age in this version 7. Although tricky, the P-value adjustment in (1) is performed across modules under a conservative assumption that modules are completely independent. Because the post-hoc comparisons (2) are to address the effect of each intervention within a specific module, the P-values are adjusted across interventions only within the module (not across modules).  

### 4-1. Extract RMS under the own phenotype consensus

In [None]:
#Extract RMS whose template phenotype corresponds to the own phenotype
phenotypeL = rciDF.drop(columns=['ModuleID', 'Template']).columns.tolist()
rmsDF_kk = pd.DataFrame(index=pd.Index(rmsDF['ModuleID'].unique(), name='ModuleID'))
tempDF = rmsDF.set_index('ModuleID')
for k in phenotypeL:
    tempL = sampleDF.loc[sampleDF['Phenotype']==k].index.tolist()
    tempDF1 = tempDF[tempL].loc[tempDF['Template']==k]
    rmsDF_kk = pd.merge(rmsDF_kk, tempDF1, left_index=True, right_index=True, how='left')

display(rmsDF_kk)
display(rmsDF_kk.describe())

### 4-2. ANOVA test (RMS ~ Intervention), followed by Welch's t-tests (Intervention)

#### 4-2-1. Perform all statistical tests

In [None]:
tempL1 = ['Ctrl', 'Aca', '17aE2', 'Rapa']#Target sample groups to be assessed
control = 'Ctrl'#For post-hoc comparisons
tempDF1 = rmsDF_kk
tempDF2 = sampleDF.loc[sampleDF['Phenotype'].isin(tempL1)]
tempI = moduleDF.index
formula = 'RMS ~ C(Intervention)'
tempL2 = ['C(Intervention)']#For variables of interest in ANOVA

#Statistical tests per module
t_start = time.time()
tempL3 = []#For ANOVA table
tempL4 = []#For post-hoc test table
for module in tempI.tolist():
    #Select the target module
    tempS = tempDF1.loc[module]
    tempS.name = 'RMS'
    #Add metadata while selecting the target sample samples
    tempDF = pd.merge(tempS, tempDF2, left_index=True, right_index=True, how='inner')
    
    #ANOVA
    model = smf.ols(formula, data=tempDF).fit()
    anovaDF = anova_lm(model, typ=2)#ANOVA type doesn't matter in this case
    ##Take the results per variable
    tempDF3 = pd.DataFrame(columns=['DoF', 'Fstat', 'Pval'])
    for variable in tempL2:
        dof1 = int(anovaDF.at[variable, 'df'])#Between-groups
        dof2 = int(anovaDF.at['Residual', 'df'])#Within-groups
        dof = (dof1, dof2)
        fstat = anovaDF.at[variable, 'F']
        pval = anovaDF.at[variable, 'PR(>F)']
        tempDF3.loc[variable] = [dof, fstat, pval]
    tempDF3['AdjPval'] = 1.0#Add dummy column for now
    ##Convert to wide-format
    tempS = pd.Series(len(tempDF), index=['N'], name=module)
    tempL = [tempS]
    for variable in tempDF3.index.tolist():
        tempS = tempDF3.loc[variable]
        tempS.index = variable+'_'+tempS.index
        tempS.name = module
        tempL.append(tempS)
    tempS = pd.concat(tempL, axis=0)
    tempL3.append(tempS)
    
    #Post-hoc tests per control vs. contrast
    tempDF4 = pd.DataFrame(columns=['DoF', 'tStat', 'Pval'])
    for contrast in tempL1:
        if control!=contrast:
            tempS1 = tempDF['RMS'].loc[tempDF['Phenotype']==control]
            tempS2 = tempDF['RMS'].loc[tempDF['Phenotype']==contrast]
            #Two-sided Welch's t-test
            tstat, pval, dof = weightstats.ttest_ind(tempS2, tempS1,#t-statistic reflects direction from the baseline
                                                     alternative='two-sided', usevar='unequal')
            tempDF4.loc[contrast+'-vs-'+control] = [dof, tstat, pval]
    ##P-value adjustment across all comparisons per module by using Benjamini–Hochberg method
    tempDF4['AdjPval'] = multi.multipletests(tempDF4['Pval'], alpha=0.05, method='fdr_bh',
                                             is_sorted=False, returnsorted=False)[1]
    ##Convert to wide-format
    tempL = []
    for comparison in tempDF4.index.tolist():
        tempS = tempDF4.loc[comparison]
        tempS.index = comparison+'_'+tempS.index
        tempS.name = module
        tempL.append(tempS)
    tempS = pd.concat(tempL, axis=0)
    tempL4.append(tempS)
t_elapsed = time.time() - t_start
print('Elapsed time for', len(tempI), 'ANOVA and',
      (len(tempL1)-1)*len(tempI), 'post-hoc tests (',
      len(tempL1)-1, 'comparisons x', len(tempI), 'modules):',
      round(t_elapsed//60), 'min', round(t_elapsed%60, 1), 'sec')

#Generate ANOVA table
tempDF3 = pd.concat(tempL3, axis=1).T
tempDF3.index.name = tempI.name
##P-value adjustment across all tests by using Benjamini–Hochberg method
for variable in tempL2:
    #Overwrite the dummy values
    tempDF3[variable+'_AdjPval'] = multi.multipletests(tempDF3[variable+'_Pval'], alpha=0.05, method='fdr_bh',
                                                       is_sorted=False, returnsorted=False)[1]
##Convert back dtypes (due to the forced change during wide-format)
for col_n in tempDF3.columns.tolist():
    if 'N'==col_n:
        tempDF3[col_n] = tempDF3[col_n].astype(int)
    elif 'DoF' in col_n:
        tempDF3[col_n] = tempDF3[col_n].astype(str)
    else:
        tempDF3[col_n] = tempDF3[col_n].astype(float)
##Rename columns (because only one variable in this case)
tempDF3.columns = 'ANOVA_'+tempDF3.columns.str.replace('^.*_', '', regex=True)
display(tempDF3)

#Generate post-hoc test table
tempDF4 = pd.concat(tempL4, axis=1).T
tempDF4.index.name = tempI.name
display(tempDF4)

statDF1 = tempDF3
statDF2 = tempDF4

In [None]:
tempL1 = ['Ctrl', 'Aca', '17aE2', 'Rapa']#Target sample groups to be summarized
tempDF1 = rmsDF_kk
tempDF2 = sampleDF.loc[sampleDF['Phenotype'].isin(tempL1)]

#Calculate general statistics per intervention group
tempL2 = []
for phenotype in tempL1:
    #Select the target samples
    tempL = tempDF2.loc[tempDF2['Phenotype']==phenotype].index.tolist()
    tempDF = tempDF1[tempL]
    #Calculate general statistics
    tempS1 = len(tempL) - tempDF.isnull().sum(axis=1)
    tempS1.name = phenotype+'_N'
    tempS2 = tempDF.mean(axis=1)
    tempS2.name = phenotype+'_RMSmean'
    tempS3 = tempDF.sem(axis=1, ddof=1)
    tempS3.name = phenotype+'_RMSsem'
    #Merge
    tempDF = pd.concat([tempS1, tempS2, tempS3], axis=1)
    tempL2.append(tempDF)
tempDF = pd.concat(tempL2, axis=1)
display(tempDF)

#Merge all the tables
print('General statistics table:', tempDF.shape)
print('ANOVA table:', statDF1.shape)
print('Post-hoc test table:', statDF2.shape)
tempDF = pd.concat([moduleDF['ModuleName'], tempDF, statDF1, statDF2], axis=1)

#Sort
tempDF = tempDF.sort_values(by='ANOVA_Pval', ascending=True)
display(tempDF)

#Save
fileDir = './ExportData/'
ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
fileName = 'inter-group-comparison_RCI.tsv'
tempDF.to_csv(fileDir+ipynbName+fileName, sep='\t', index=True)

statDF = tempDF

#### 4-2-2. Changed modules (ANOVA)

In [None]:
#Prepare variables in the model
#-> In this case, only the one variable (intervention) was included.
variableL = ['ANOVA']

#Changed modules
for variable in variableL:
    tempDF = statDF.loc[statDF[variable+'_AdjPval']<0.05]
    tempDF = tempDF.sort_values(by=variable+'_AdjPval', ascending=True)
    tempL1 = tempDF.loc[:, tempDF.columns.str.contains('_RMSmean')].columns.tolist()
    tempL2 = tempDF.loc[:, tempDF.columns.str.contains('^'+variable+'_')].columns.tolist()
    tempDF = tempDF[[col_n for subL in [['ModuleName'], tempL1, tempL2] for col_n in subL]]
    print(variable+' (adjusted P < 0.05):', len(tempDF))
    display(tempDF)

#### 4-2-3. Changed modules by each intervention (Welch's t-test)

In [None]:
#Extract only the changed modules
variable = 'ANOVA'
tempDF = statDF.loc[statDF[variable+'_AdjPval']<0.05]
tempDF = tempDF.sort_values(by=variable+'_AdjPval', ascending=True)
print(variable+' (adjusted P < 0.05):', len(tempDF))

#Take adjusted P-value
tempDF1 = tempDF.loc[:, tempDF.columns.str.contains('-vs-.*_AdjPval$')]
tempDF1.columns = tempDF1.columns.str.replace('_AdjPval$', '')
tempDF1 = pd.merge(tempDF[['ModuleName', variable+'_AdjPval']], tempDF1,
                   left_index=True, right_index=True, how='right')
print('Adjusted P-value:')
display(tempDF1)
display(tempDF1.describe())

#Take t-statistic (for direction)
tempDF2 = tempDF.loc[:, tempDF.columns.str.contains('-vs-.*_tStat$')]
tempDF2.columns = tempDF2.columns.str.replace('_tStat$', '')
tempDF2 = pd.merge(tempDF[['ModuleName', variable+'_AdjPval']], tempDF2,
                   left_index=True, right_index=True, how='right')
print('Changed direction (t-statistic):')
display(tempDF2)
display(tempDF2.describe())

pvalDF = tempDF1
diffDF = tempDF2

> Check the changed modules (nominal P-value < 0.05) as reference.  

In [None]:
#Extract only the changed modules
variable = 'ANOVA'
tempDF = statDF.loc[statDF[variable+'_Pval']<0.05]
tempDF = tempDF.sort_values(by=variable+'_Pval', ascending=True)
print(variable+' (nominal P < 0.05):', len(tempDF))

#Take adjusted P-value
tempDF1 = tempDF.loc[:, tempDF.columns.str.contains('-vs-.*_AdjPval$')]
tempDF1.columns = tempDF1.columns.str.replace('_AdjPval$', '')
tempDF1 = pd.merge(tempDF[['ModuleName', variable+'_AdjPval']], tempDF1,
                   left_index=True, right_index=True, how='right')
print('Adjusted P-value:')
display(tempDF1)
display(tempDF1.describe())

#Take t-statistic (for direction)
tempDF2 = tempDF.loc[:, tempDF.columns.str.contains('-vs-.*_tStat$')]
tempDF2.columns = tempDF2.columns.str.replace('_tStat$', '')
tempDF2 = pd.merge(tempDF[['ModuleName', variable+'_AdjPval']], tempDF2,
                   left_index=True, right_index=True, how='right')
#print('Changed direction (t-statistic):')
#display(tempDF2)
#display(tempDF2.describe())

pvalDF_ref = tempDF1
diffDF_ref = tempDF2

### 4-3. Visualization: clustermap

In [None]:
tempD0 = {'Ctrl':'Control', 'Aca':'Acarbose',
          '17aE2':'17'+r'$\alpha$'+'-Estradiol', 'Rapa':'Rapamycin'}
tempDF1 = rciDF_kk.rename(columns=tempD0)
regulation = 'Tightened'
tempD1 = {'Acarbose':'tab:red',
          '17'+r'$\alpha$'+'-Estradiol':'tab:green', 'Rapamycin':'tab:purple'}
tempD2 = {'Acarbose':plt.get_cmap('tab20')(7),
          '17'+r'$\alpha$'+'-Estradiol':plt.get_cmap('tab20')(5),
          'Rapamycin':plt.get_cmap('tab20')(9)}

#Prepare color labels for tightened module set
tempDF2 = pvalDF_ref.loc[:, pvalDF_ref.columns.str.contains('-vs-')]#Nominal P-value
for col_n in tempDF2.columns.tolist():
    tempS1 = pvalDF_ref[col_n]
    tempS2 = diffDF_ref[col_n]
    if regulation=='Changed':
        tempS2 = tempS2.loc[(tempS1<0.05)]
    elif regulation=='Tightened':
        tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2>0)]
    elif regulation=='Loosened':
        tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2<0)]
    label = tempD0[re.sub('-vs-.*', '', col_n)]
    tempL = []
    count = 0#just for checking
    for module in tempDF2.index.tolist():
        if module in tempS2.index.tolist():
            if module in pvalDF.index.tolist():#Adjusted P-value
                tempL.append(tempD1[label])
                count += 1#Just for checking
            else:#Nominal P-value
                tempL.append(tempD2[label])
        else:
            tempL.append('white')
    tempDF2[label] = tempL
    print(regulation+' module in '+col_n)
    print(' -> adjusted P < 0.05:', count)
    print(' -> nominal P < 0.05:', len(tempS2))
tempDF2 = tempDF2[list(tempD1.keys())]

#Clustermap
sns.set(style='ticks', font='Arial', context='talk')
cm = sns.clustermap(tempDF1.T, method='ward', metric='euclidean', cmap='afmhot',
                    row_cluster=True, col_cluster=True, row_linkage=None, col_linkage=None,
                    row_colors=None, col_colors=tempDF2, xticklabels=False, yticklabels=True,
                    dendrogram_ratio=(0.025, 0.2), colors_ratio=(0.01, 0.1),
                    cbar_pos=(0.05, -0.15, 0.3, 0.075), cbar_kws={'orientation': 'horizontal'},
                    figsize=(12, 3), **{'vmin':0.5, 'vmax':1})
cm.cax.set_title('Module RCI', size='medium',
                 verticalalignment='bottom', horizontalalignment='center')
cm.cax.tick_params(labelsize='small')
bottom, top = cm.ax_heatmap.get_ylim()
#cm.ax_heatmap.set_ylim(bottom + 0.5, top - 0.5)##To avoid half cut of first and last rows
hm = cm.ax_heatmap.get_position()
rd = cm.ax_row_dendrogram.get_position()
cd = cm.ax_col_dendrogram.get_position()
cm.ax_heatmap.set_position([hm.x0, hm.y0, hm.width, hm.height])
cm.ax_row_dendrogram.set_position([rd.x0, rd.y0, rd.width, rd.height])
cm.ax_col_dendrogram.set_position([cd.x0, cd.y0, cd.width, cd.height])
cm.ax_heatmap.set_xlabel('GOBP module')
cm.ax_heatmap.set_ylabel('')
##row/column color bar legend (axis is same with cm.cax!)
tempL = []
for group in tempD1.keys():
    tempL.append(mpatches.Patch(color=tempD1[group],
                                label='by '+group+' (adjusted '+r'$P$'+' < 0.05)'))
for group in tempD2.keys():
    tempL.append(mpatches.Patch(color=tempD2[group], label='(nominal '+r'$P$'+' < 0.05)'))
legend1 = plt.legend(handles=tempL, fontsize='small', labelspacing=0.2, ncol=2,
                     title='Tightened module (vs. Control)', title_fontsize='medium',
                     bbox_to_anchor=(1, 0.5), loc='center left', borderaxespad=3.5, frameon=False)
plt.gca().add_artist(legend1)
##Save
fileDir = './ExportFigures/'
ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
fileName = 'RCI-clustermap.pdf'
#plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04, transparent=True)
plt.show()

#Save label order
tempDF = moduleDF.loc[rciDF_kk.index[cm.dendrogram_col.reordered_ind]]
tempDF1 = rciDF_kk.copy()
tempDF = pd.merge(tempDF['ModuleName'], tempDF1, left_index=True, right_index=True, how='left')
tempDF1 = pvalDF.loc[:, pvalDF.columns.str.contains('-vs-')]#Adjusted P-value
tempDF = pd.merge(tempDF, tempDF1, left_index=True, right_index=True, how='left')
tempDF = tempDF.reset_index()
tempDF.index.name = 'Xcoord'
display(tempDF)
fileDir = './ExportData/'
ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
fileName = 'RCI-clustermap-xticks.tsv'
#tempDF.to_csv(fileDir+ipynbName+fileName, index=True, sep='\t')

> –> Simplify.  

In [None]:
tempD0 = {'Ctrl':'Control', 'Aca':'Acarbose',
          '17aE2':'17'+r'$\alpha$'+'-Estradiol', 'Rapa':'Rapamycin'}
tempDF1 = rciDF_kk.rename(columns=tempD0)
regulation = 'Tightened'
tempD1 = {'Acarbose':'tab:red',
          '17'+r'$\alpha$'+'-Estradiol':'tab:green', 'Rapamycin':'tab:purple'}

#Prepare color labels for tightened module set
tempDF2 = pvalDF.loc[:, pvalDF.columns.str.contains('-vs-')]#Adjusted P-value
for col_n in tempDF2.columns.tolist():
    tempS1 = pvalDF[col_n]
    tempS2 = diffDF[col_n]
    if regulation=='Changed':
        tempS2 = tempS2.loc[(tempS1<0.05)]
    elif regulation=='Tightened':
        tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2>0)]
    elif regulation=='Loosened':
        tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2<0)]
    label = tempD0[re.sub('-vs-.*', '', col_n)]
    tempL = []
    count = 0#just for checking
    for module in tempDF2.index.tolist():
        if module in tempS2.index.tolist():
            tempL.append(tempD1[label])
            count += 1#Just for checking
        else:
            tempL.append('white')
    tempDF2[label] = tempL
    print(regulation+' module in '+col_n)
    print(' -> adjusted P < 0.05:', count)
tempDF2 = tempDF2[list(tempD1.keys())]

#Clustermap
sns.set(style='ticks', font='Arial', context='talk')
cm = sns.clustermap(tempDF1.T, method='ward', metric='euclidean', cmap='afmhot',
                    row_cluster=True, col_cluster=True, row_linkage=None, col_linkage=None,
                    row_colors=None, col_colors=tempDF2, xticklabels=False, yticklabels=True,
                    dendrogram_ratio=(0.025, 0.2), colors_ratio=(0.01, 0.1),
                    cbar_pos=(0.175, -0.15, 0.3, 0.075), cbar_kws={'orientation': 'horizontal'},
                    figsize=(12, 3), **{'vmin':0.5, 'vmax':1})
cm.cax.set_title('Module RCI', size='medium',
                 verticalalignment='bottom', horizontalalignment='center')
cm.cax.tick_params(labelsize='small')
bottom, top = cm.ax_heatmap.get_ylim()
#cm.ax_heatmap.set_ylim(bottom + 0.5, top - 0.5)##To avoid half cut of first and last rows
hm = cm.ax_heatmap.get_position()
rd = cm.ax_row_dendrogram.get_position()
cd = cm.ax_col_dendrogram.get_position()
cm.ax_heatmap.set_position([hm.x0, hm.y0, hm.width, hm.height])
cm.ax_row_dendrogram.set_position([rd.x0, rd.y0, rd.width, rd.height])
cm.ax_col_dendrogram.set_position([cd.x0, cd.y0, cd.width, cd.height])
cm.ax_heatmap.set_xlabel('GOBP module')
cm.ax_heatmap.set_ylabel('')
##row/column color bar legend (axis is same with cm.cax!)
tempL = []
for group in tempD1.keys():
    tempL.append(mpatches.Patch(color=tempD1[group], label='by '+group))
legend1 = plt.legend(handles=tempL, fontsize='small', labelspacing=0.2,
                     title='Tightened module (vs. Control)', title_fontsize='medium',
                     bbox_to_anchor=(1, 0.5), loc='center left', borderaxespad=3.5, frameon=False)
plt.gca().add_artist(legend1)
##Save
fileDir = './ExportFigures/'
ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
fileName = 'RCI-clustermap.pdf'
plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04, transparent=True)
plt.show()

#Save label order
tempDF = moduleDF.loc[rciDF_kk.index[cm.dendrogram_col.reordered_ind]]
tempDF1 = rciDF_kk.copy()
tempDF = pd.merge(tempDF['ModuleName'], tempDF1, left_index=True, right_index=True, how='left')
tempDF1 = pvalDF.loc[:, pvalDF.columns.str.contains('-vs-')]#Adjusted P-value
tempDF = pd.merge(tempDF, tempDF1, left_index=True, right_index=True, how='left')
tempDF = tempDF.reset_index()
tempDF.index.name = 'Xcoord'
display(tempDF)
fileDir = './ExportData/'
ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
fileName = 'RCI-clustermap-xticks.tsv'
tempDF.to_csv(fileDir+ipynbName+fileName, index=True, sep='\t')

### 4-4. Visualization: venn diagram

In [None]:
#Prepare label and color
tempD0 = {'Ctrl':'Control', 'Aca':'Acarbose',
          '17aE2':'17'+r'$\alpha$'+'-Estradiol', 'Rapa':'Rapamycin'}
tempD1 = {'Acarbose':'tab:red',
          '17'+r'$\alpha$'+'-Estradiol':'tab:green', 'Rapamycin':'tab:purple'}

#Visualization per direction
for regulation in ['Changed', 'Tightened', 'Loosened']:
    #Prepare module sets
    tempD2 = {}
    tempDF = pvalDF.loc[:, pvalDF.columns.str.contains('-vs-')]#Adjusted P-value
    for col_n in tempDF.columns.tolist():
        tempS1 = pvalDF[col_n]
        tempS2 = diffDF[col_n]
        if regulation=='Changed':
            tempS2 = tempS2.loc[(tempS1<0.05)]
        elif regulation=='Tightened':
            tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2>0)]
        elif regulation=='Loosened':
            tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2<0)]
        label = tempD0[re.sub('-vs-.*$', '', col_n)]
        tempD2[label] = set(tempS2.index.tolist())
    ##Sort to make consistent order in manual legend generation
    tempD = {}
    for label in tempD1.keys():
        tempD[label] = tempD2[label]
    
    #Not significant in all contrasts
    tempDF = pvalDF.copy()
    for moduleS in tempD.values():
        tempDF = tempDF.loc[~tempDF.index.isin(moduleS)]
    count = len(tempDF)
    print(regulation+' modules (vs. Control):')
    print(' -> Not significant in all contrasts:', count)
    
    #Skip the followings if no significant module
    if count==len(pvalDF):
        continue
    
    #Venn diagram
    sns.set(style='ticks', font='Arial', context='talk')
    fig, ax = plt.subplots(figsize=(4, 4))
    venn(tempD, fmt='{size:,}', cmap=list(tempD1.values()), legend_loc=None, ax=ax)
    plt.setp(ax, ylim=(0.05, 0.975))#Otherwise, weird space...
    ##Add legend annotation
    x_coord = [0.1, 0.9, 0.8]
    y_coord = [0.8, 0.8, 0.25]
    h_align = ['right', 'left', 'left']
    v_align = ['bottom', 'bottom', 'top']
    for i in range(len(tempD1)):
        key = list(tempD1.keys())[i]
        total = f'{len(tempD[key]):,}'
        ax.text(x_coord[i], y_coord[i], key+'\n('+total+' modules)',
                fontsize='small', multialignment='center',
                horizontalalignment=h_align[i], verticalalignment=v_align[i],
                bbox={'boxstyle':'round', 'facecolor':tempD1[key], 'pad':0.2, 'alpha':0.5})
    title = regulation+' modules (vs. Control)'
    ax.set_title(title, fontsize='medium')
    ##Save
    if regulation!='Changed':
        fileDir = './ExportFigures/'
        ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
        fileName = 'RCI-venn-'+regulation.lower()+'.pdf'
        plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04, transparent=True)
    plt.show()

In [None]:
#Export module list in each subset in the venn diagram
for regulation in ['Tightened', 'Loosened']:
    #Prepare module sets
    tempD = {}
    tempDF = pvalDF.loc[:, pvalDF.columns.str.contains('-vs-')]#Adjusted P-value
    for col_n in tempDF.columns.tolist():
        tempS1 = pvalDF[col_n]
        tempS2 = diffDF[col_n]
        if regulation=='Changed':
            tempS2 = tempS2.loc[(tempS1<0.05)]
        elif regulation=='Tightened':
            tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2>0)]
        elif regulation=='Loosened':
            tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2<0)]
        tempD[col_n] = set(tempS2.index.tolist())
    
    #Not significant in all contrasts
    tempDF = pvalDF.copy()
    for moduleS in tempD.values():
        tempDF = tempDF.loc[~tempDF.index.isin(moduleS)]
    count = len(tempDF)
    print(regulation+' modules (vs. Control):')
    print(' -> Not significant in all contrasts:', count)
    
    #Skip the followings if no significant module
    if count==len(pvalDF):
        continue
    
    #Prepare a new .xlsx file (dummy README)
    tempL1 = [len(tempD[key]) for key in tempD.keys()]
    tempDF = pd.DataFrame({'Group':tempD.keys(), 'nModules':tempL1})
    tempDF = tempDF.reset_index().rename(columns={'index':'VennOrder'})
    tempDF['VennOrder'] = tempDF['VennOrder'] + 1
    fileDir = './ExportData/'
    ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
    fileName = 'RCI-venn-'+regulation.lower()+'.xlsx'
    tempDF.to_excel(fileDir+ipynbName+fileName, sheet_name='README', header=True, index=False)
    display(tempDF)#Check
    
    t_start = time.time()
    #Extract overall set
    for key_i in range(len(tempD)):
        key = list(tempD.keys())[key_i]
        tempS = tempD[key]
        tempDF = statDF.loc[statDF.index.isin(tempS)]
        #Save summary table by appended to the above .xlsx file
        ##Prepare sheet name
        tempL1 = ['NA' for i in range(len(tempD))]
        tempL1[key_i] = '1'
        setName = '('+','.join(tempL1)+')'
        with pd.ExcelWriter(fileDir+ipynbName+fileName, mode='a', engine='openpyxl') as writer:
            tempDF.to_excel(writer, sheet_name=setName, header=True, index=True)
        print(' - '+setName+':', len(tempDF))
    
    #Extract subset
    tempL1 = ['1', '0']
    tempL2 = [[k1, k2, k3] for k1 in tempL1 for k2 in tempL1 for k3 in tempL1]
    #tempL2.remove(['0', '0', '0'])
    for tempL1 in tempL2:
        #Positive module set
        tempL3 = [list(tempD.values())[key_i] for key_i, binary in enumerate(tempL1) if binary=='1']
        tempS1 = set(pvalDF.index.tolist())#Initialize
        for tempS in tempL3:
            tempS1 = tempS1 & tempS
        #Negative module set
        tempL3 = [list(tempD.values())[key_i] for key_i, binary in enumerate(tempL1) if binary=='0']
        tempS2 = set()#Initialize
        for tempS in tempL3:
            tempS2 = tempS2 | tempS
        #Extract subset
        tempS = tempS1 - tempS2
        tempDF = statDF.loc[statDF.index.isin(tempS)]
        #Save summary table by appended to the above .xlsx file
        ##Prepare sheet name
        setName = '('+','.join(tempL1)+')'
        with pd.ExcelWriter(fileDir+ipynbName+fileName, mode='a', engine='openpyxl') as writer:
            tempDF.to_excel(writer, sheet_name=setName, header=True, index=True)
        print(' - '+setName+':', len(tempDF))
    
    t_elapsed = time.time() - t_start
    print(' - Elapsed time:', round(t_elapsed//60), 'min', round(t_elapsed%60, 1), 'sec')

### 4-5. Visualization: pointplot

> More modules are checked later (in a separate notebook), and so representative modules are checked for now.  

#### 4-5-1. Modules tightened by all interventions

In [None]:
#Prepare the target module set
posL = ['Aca-vs-Ctrl', '17aE2-vs-Ctrl', 'Rapa-vs-Ctrl']
negL = ['']
regulation = 'Tightened'
tempS = pd.Series(np.repeat(True, len(pvalDF)), index=pvalDF.index)#Initialize
tempDF = pvalDF.loc[:, pvalDF.columns.str.contains('-vs-')]#Adjusted P-value
for col_n in tempDF.columns.tolist():
    tempS1 = pvalDF[col_n]
    tempS2 = diffDF[col_n]
    if col_n in posL:
        if regulation=='Tightened':
            tempS1 = (tempS1<0.05) & (tempS2>0)
        elif regulation=='Loosened':
            tempS1 = (tempS1<0.05) & (tempS2<0)
    elif col_n in negL:
        tempS3 = (tempS1>=0.05)
        #Significance for inverse regulation
        if regulation=='Tightened':
            tempS1 = (tempS1<0.05) & (tempS2<0)
        elif regulation=='Loosened':
            tempS1 = (tempS1<0.05) & (tempS2>0)
        tempS1 = tempS3 | tempS1
    else:
        tempS1 = (tempS1>=0.0)
    #Update True
    tempS = tempS & tempS1
tempL = tempS.loc[tempS.tolist()].index.tolist()
print(len(tempL), regulation.lower()+' modules with significance in', posL, 'but not in', negL)

#Select representatives
topX = np.min([30, len(tempL)])
topX_plot = np.min([10, len(tempL)])
tempDF = statDF.loc[:, statDF.columns.str.contains('Pval$')]
tempDF = pd.merge(statDF['ModuleName'], tempDF, left_index=True, right_index=True, how='left')
tempDF = tempDF.loc[tempL].sort_values(by=variable+'_AdjPval', ascending=True)
print('Top', topX, 'modules (sort by the main effect of '+variable+'):')
display(tempDF.iloc[:topX])
plotL = tempDF.index.tolist()[:topX_plot]

#Prepare DF for plot
tempDF = rmsDF_kk.reset_index().melt(var_name='SampleID', value_name='RMS', id_vars='ModuleID')
tempDF1 = sampleDF.reset_index()[['SampleID', 'Phenotype']]
tempDF = pd.merge(tempDF, tempDF1, on='SampleID', how='left')

#Prepare label and color
tempD0 = {'Ctrl':'Control', 'Aca':'Acarbose',
          '17aE2':'17'+r'$\alpha$'+'-Estradiol', 'Rapa':'Rapamycin'}
tempDF['Group'] = tempDF['Phenotype'].map(tempD0)
tempD1 = {'Control':'tab:blue', 'Acarbose':'tab:red',
          '17'+r'$\alpha$'+'-Estradiol':'tab:green', 'Rapamycin':'tab:purple'}

#Visualize each representative
for rank_i in range(len(plotL)):
    print(' - Rank '+str(rank_i+1)+' (sort by the main effect of '+variable+'):')
    module = plotL[rank_i]
    #Check module summary
    tempDF1 = pd.DataFrame(moduleDF.loc[module]).T
    display(tempDF1)
    
    #Select RMS
    tempDF1 = tempDF.loc[tempDF['ModuleID']==module]
    
    #Check RMS summary
    tempDF2 = tempDF1.groupby(['Group'])['RMS'].agg(['count', 'mean', 'std'])
    tempL1 = []
    tempL2 = []
    for row_n in tempDF2.index.tolist():
        count, mean, std = tempDF2.loc[row_n]
        tempL1.append(mean - 1.96*std/np.sqrt(count))
        tempL2.append(mean + 1.96*std/np.sqrt(count))
    tempDF2['0.025'] = tempL1
    tempDF2['0.975'] = tempL2
    tempDF2 = tempDF2.loc[list(tempD1.keys())]#Sort
    display(tempDF2)
    
    #Prepare significance labels
    ##Retrieve statistical significance
    tempS = pvalDF.loc[module, pvalDF.columns.str.contains('-vs-')]#Adjusted P-value
    tempS.name = 'AdjPval'
    ##Clean
    tempDF2 = tempS.index.to_series().str.split(pat='-vs-', expand=True)
    tempDF2 = tempDF2.rename(columns={0:'Contrast', 1:'Baseline'})
    tempDF2 = pd.merge(tempDF2, tempS, left_index=True, right_index=True, how='left')
    tempDF2['Contrast'] = tempDF2['Contrast'].map(tempD0)
    tempDF2['Baseline'] = tempDF2['Baseline'].map(tempD0)
    ##Convert p-value to label
    tempL = []
    for row_i in range(len(tempDF2)):
        pval = tempDF2['AdjPval'].iloc[row_i]
        if pval<0.001:
            tempL.append('***')
        elif pval<0.01:
            tempL.append('**')
        elif pval<0.05:
            tempL.append('*')
        else:
            pval_text = Decimal(str(pval)).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
            tempL.append(r'$P$ = '+str(pval_text))
    tempDF2['SignifLabel'] = tempL
    display(tempDF2)
    
    #Visualization
    ymax = 1.0
    ymin = 0.0
    yinter = 0.2
    ymargin_t = 0.4
    ymargin_b = 0.05
    aline_ymin = 1.0
    aline_ymargin = 0.125
    sns.set(style='ticks', font='Arial', context='talk')
    plt.figure(figsize=(2, 4))
    sns.pointplot(data=tempDF1, x='Group', y='RMS', order=list(tempD1.keys()), palette=tempD1,
                  markers='o', dodge=False, join=False, capsize=0.6, estimator=np.mean, ci=95)
    p = sns.stripplot(data=tempDF1, x='Group', y='RMS',
                      order=list(tempD1.keys()), palette=tempD1, dodge=False, jitter=0.15,
                      size=5, edgecolor='black', linewidth=1, **{'marker':'o', 'alpha':0.5})
    ##Set axis
    sns.despine()
    p.set(ylim=(ymin-ymargin_b, ymax+ymargin_t), yticks=np.arange(ymin, ymax + yinter/10, yinter))
    plt.setp(p.get_xticklabels(), rotation=70, horizontalalignment='right',
             verticalalignment='center', rotation_mode='anchor')
    ##Add significance labels
    for row_i in range(len(tempDF2)):
        #Baseline
        group_0 = tempDF2['Baseline'].iloc[row_i]
        index_0 = list(tempD1.keys()).index(group_0)
        xcoord_0 = index_0
        #Contrast
        group_1 = tempDF2['Contrast'].iloc[row_i]
        index_1 = list(tempD1.keys()).index(group_1)
        xcoord_1 = index_1
        #Standard point of marker
        xcoord = (xcoord_0+xcoord_1)/2
        ycoord = aline_ymin + aline_ymargin*row_i
        label = tempDF2['SignifLabel'].iloc[row_i]
        #Add annotation lines
        aline_offset = yinter/5
        aline_length = yinter/5 + aline_offset/2
        plt.plot([xcoord_0, xcoord_0, xcoord_1, xcoord_1],
                 [ycoord+aline_offset, ycoord+aline_length, ycoord+aline_length, ycoord+aline_offset],
                 lw=1.5, c='k')
        #Add annotation text
        if label in ['***', '**', '*']:
            text_offset = yinter/21
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='bottom',
                       fontsize='medium', color='k')
        else:
            text_offset = yinter/3.5
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='bottom',
                       fontsize='x-small', color='k')
    ##Add annotation
    p.set_title('Consensus:\nown group', {'fontsize':'small'})
    xoff = 0.015
    yoff = 0.01
    rect = plt.Rectangle((xoff, 1), 1-xoff, 0.155,#Manual adjustment
                         transform=p.axes.transAxes, facecolor=plt.get_cmap('tab20')(15),
                         clip_on=False, linewidth=0, zorder=0.5)
    p.add_patch(rect)
    ##Set axis label and title
    plt.setp(p, xlabel='', ylabel='Sample RMS\n(Mean = Module RCI)')
    plt.suptitle(module, size='small',
                 verticalalignment='bottom', horizontalalignment='center', wrap=True, y=1.0)
    ##Save
    #fileDir = './ExportFigures/'
    #ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
    #fileName = 'RCI-pointplot-'+module.replace('GO:', 'GO')+'.pdf'
    #plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04, transparent=True)
    plt.show()
    print('')

#### 4-5-2. Modules tightened specifically by Aca

In [None]:
#Prepare the target module set
posL = ['Aca-vs-Ctrl']
negL = ['17aE2-vs-Ctrl', 'Rapa-vs-Ctrl']
regulation = 'Tightened'
tempS = pd.Series(np.repeat(True, len(pvalDF)), index=pvalDF.index)#Initialize
tempDF = pvalDF.loc[:, pvalDF.columns.str.contains('-vs-')]#Adjusted P-value
for col_n in tempDF.columns.tolist():
    tempS1 = pvalDF[col_n]
    tempS2 = diffDF[col_n]
    if col_n in posL:
        if regulation=='Tightened':
            tempS1 = (tempS1<0.05) & (tempS2>0)
        elif regulation=='Loosened':
            tempS1 = (tempS1<0.05) & (tempS2<0)
    elif col_n in negL:
        tempS3 = (tempS1>=0.05)
        #Significance for inverse regulation
        if regulation=='Tightened':
            tempS1 = (tempS1<0.05) & (tempS2<0)
        elif regulation=='Loosened':
            tempS1 = (tempS1<0.05) & (tempS2>0)
        tempS1 = tempS3 | tempS1
    else:
        tempS1 = (tempS1>=0.0)
    #Update True
    tempS = tempS & tempS1
tempL = tempS.loc[tempS.tolist()].index.tolist()
print(len(tempL), regulation.lower()+' modules with significance in', posL, 'but not in', negL)

#Select representatives
topX = np.min([30, len(tempL)])
topX_plot = np.min([10, len(tempL)])
tempDF = statDF.loc[:, statDF.columns.str.contains('Pval$')]
tempDF = pd.merge(statDF['ModuleName'], tempDF, left_index=True, right_index=True, how='left')
tempDF = tempDF.loc[tempL].sort_values(by=variable+'_AdjPval', ascending=True)
print('Top', topX, 'modules (sort by the main effect of '+variable+'):')
display(tempDF.iloc[:topX])
plotL = tempDF.index.tolist()[:topX_plot]

#Prepare DF for plot
tempDF = rmsDF_kk.reset_index().melt(var_name='SampleID', value_name='RMS', id_vars='ModuleID')
tempDF1 = sampleDF.reset_index()[['SampleID', 'Phenotype']]
tempDF = pd.merge(tempDF, tempDF1, on='SampleID', how='left')

#Prepare label and color
tempD0 = {'Ctrl':'Control', 'Aca':'Acarbose',
          '17aE2':'17'+r'$\alpha$'+'-Estradiol', 'Rapa':'Rapamycin'}
tempDF['Group'] = tempDF['Phenotype'].map(tempD0)
tempD1 = {'Control':'tab:blue', 'Acarbose':'tab:red',
          '17'+r'$\alpha$'+'-Estradiol':'tab:green', 'Rapamycin':'tab:purple'}

#Visualize each representative
for rank_i in range(len(plotL)):
    print(' - Rank '+str(rank_i+1)+' (sort by the main effect of '+variable+'):')
    module = plotL[rank_i]
    #Check module summary
    tempDF1 = pd.DataFrame(moduleDF.loc[module]).T
    display(tempDF1)
    
    #Select RMS
    tempDF1 = tempDF.loc[tempDF['ModuleID']==module]
    
    #Check RMS summary
    tempDF2 = tempDF1.groupby(['Group'])['RMS'].agg(['count', 'mean', 'std'])
    tempL1 = []
    tempL2 = []
    for row_n in tempDF2.index.tolist():
        count, mean, std = tempDF2.loc[row_n]
        tempL1.append(mean - 1.96*std/np.sqrt(count))
        tempL2.append(mean + 1.96*std/np.sqrt(count))
    tempDF2['0.025'] = tempL1
    tempDF2['0.975'] = tempL2
    tempDF2 = tempDF2.loc[list(tempD1.keys())]#Sort
    display(tempDF2)
    
    #Prepare significance labels
    ##Retrieve statistical significance
    tempS = pvalDF.loc[module, pvalDF.columns.str.contains('-vs-')]#Adjusted P-value
    tempS.name = 'AdjPval'
    ##Clean
    tempDF2 = tempS.index.to_series().str.split(pat='-vs-', expand=True)
    tempDF2 = tempDF2.rename(columns={0:'Contrast', 1:'Baseline'})
    tempDF2 = pd.merge(tempDF2, tempS, left_index=True, right_index=True, how='left')
    tempDF2['Contrast'] = tempDF2['Contrast'].map(tempD0)
    tempDF2['Baseline'] = tempDF2['Baseline'].map(tempD0)
    ##Convert p-value to label
    tempL = []
    for row_i in range(len(tempDF2)):
        pval = tempDF2['AdjPval'].iloc[row_i]
        if pval<0.001:
            tempL.append('***')
        elif pval<0.01:
            tempL.append('**')
        elif pval<0.05:
            tempL.append('*')
        else:
            pval_text = Decimal(str(pval)).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
            tempL.append(r'$P$ = '+str(pval_text))
    tempDF2['SignifLabel'] = tempL
    display(tempDF2)
    
    #Visualization
    ymax = 1.0
    ymin = 0.0
    yinter = 0.2
    ymargin_t = 0.4
    ymargin_b = 0.05
    aline_ymin = 1.0
    aline_ymargin = 0.125
    sns.set(style='ticks', font='Arial', context='talk')
    plt.figure(figsize=(2, 4))
    sns.pointplot(data=tempDF1, x='Group', y='RMS', order=list(tempD1.keys()), palette=tempD1,
                  markers='o', dodge=False, join=False, capsize=0.6, estimator=np.mean, ci=95)
    p = sns.stripplot(data=tempDF1, x='Group', y='RMS',
                      order=list(tempD1.keys()), palette=tempD1, dodge=False, jitter=0.15,
                      size=5, edgecolor='black', linewidth=1, **{'marker':'o', 'alpha':0.5})
    ##Set axis
    sns.despine()
    p.set(ylim=(ymin-ymargin_b, ymax+ymargin_t), yticks=np.arange(ymin, ymax + yinter/10, yinter))
    plt.setp(p.get_xticklabels(), rotation=70, horizontalalignment='right',
             verticalalignment='center', rotation_mode='anchor')
    ##Add significance labels
    for row_i in range(len(tempDF2)):
        #Baseline
        group_0 = tempDF2['Baseline'].iloc[row_i]
        index_0 = list(tempD1.keys()).index(group_0)
        xcoord_0 = index_0
        #Contrast
        group_1 = tempDF2['Contrast'].iloc[row_i]
        index_1 = list(tempD1.keys()).index(group_1)
        xcoord_1 = index_1
        #Standard point of marker
        xcoord = (xcoord_0+xcoord_1)/2
        ycoord = aline_ymin + aline_ymargin*row_i
        label = tempDF2['SignifLabel'].iloc[row_i]
        #Add annotation lines
        aline_offset = yinter/5
        aline_length = yinter/5 + aline_offset/2
        plt.plot([xcoord_0, xcoord_0, xcoord_1, xcoord_1],
                 [ycoord+aline_offset, ycoord+aline_length, ycoord+aline_length, ycoord+aline_offset],
                 lw=1.5, c='k')
        #Add annotation text
        if label in ['***', '**', '*']:
            text_offset = yinter/21
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='bottom',
                       fontsize='medium', color='k')
        else:
            text_offset = yinter/3.5
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='bottom',
                       fontsize='x-small', color='k')
    ##Add annotation
    p.set_title('Consensus:\nown group', {'fontsize':'small'})
    xoff = 0.015
    yoff = 0.01
    rect = plt.Rectangle((xoff, 1), 1-xoff, 0.155,#Manual adjustment
                         transform=p.axes.transAxes, facecolor=plt.get_cmap('tab20')(15),
                         clip_on=False, linewidth=0, zorder=0.5)
    p.add_patch(rect)
    ##Set axis label and title
    plt.setp(p, xlabel='', ylabel='Sample RMS\n(Mean = Module RCI)')
    plt.suptitle(module, size='small',
                 verticalalignment='bottom', horizontalalignment='center', wrap=True, y=1.0)
    ##Save
    #fileDir = './ExportFigures/'
    #ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
    #fileName = 'RCI-pointplot-'+module.replace('GO:', 'GO')+'.pdf'
    #plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04, transparent=True)
    plt.show()
    print('')

#### 4-5-3. Modules tightened specifically by 17aE2

In [None]:
#Prepare the target module set
posL = ['17aE2-vs-Ctrl']
negL = ['Aca-vs-Ctrl', 'Rapa-vs-Ctrl']
regulation = 'Tightened'
tempS = pd.Series(np.repeat(True, len(pvalDF)), index=pvalDF.index)#Initialize
tempDF = pvalDF.loc[:, pvalDF.columns.str.contains('-vs-')]#Adjusted P-value
for col_n in tempDF.columns.tolist():
    tempS1 = pvalDF[col_n]
    tempS2 = diffDF[col_n]
    if col_n in posL:
        if regulation=='Tightened':
            tempS1 = (tempS1<0.05) & (tempS2>0)
        elif regulation=='Loosened':
            tempS1 = (tempS1<0.05) & (tempS2<0)
    elif col_n in negL:
        tempS3 = (tempS1>=0.05)
        #Significance for inverse regulation
        if regulation=='Tightened':
            tempS1 = (tempS1<0.05) & (tempS2<0)
        elif regulation=='Loosened':
            tempS1 = (tempS1<0.05) & (tempS2>0)
        tempS1 = tempS3 | tempS1
    else:
        tempS1 = (tempS1>=0.0)
    #Update True
    tempS = tempS & tempS1
tempL = tempS.loc[tempS.tolist()].index.tolist()
print(len(tempL), regulation.lower()+' modules with significance in', posL, 'but not in', negL)

#Select representatives
topX = np.min([30, len(tempL)])
topX_plot = np.min([10, len(tempL)])
tempDF = statDF.loc[:, statDF.columns.str.contains('Pval$')]
tempDF = pd.merge(statDF['ModuleName'], tempDF, left_index=True, right_index=True, how='left')
tempDF = tempDF.loc[tempL].sort_values(by=variable+'_AdjPval', ascending=True)
print('Top', topX, 'modules (sort by the main effect of '+variable+'):')
display(tempDF.iloc[:topX])
plotL = tempDF.index.tolist()[:topX_plot]

#Prepare DF for plot
tempDF = rmsDF_kk.reset_index().melt(var_name='SampleID', value_name='RMS', id_vars='ModuleID')
tempDF1 = sampleDF.reset_index()[['SampleID', 'Phenotype']]
tempDF = pd.merge(tempDF, tempDF1, on='SampleID', how='left')

#Prepare label and color
tempD0 = {'Ctrl':'Control', 'Aca':'Acarbose',
          '17aE2':'17'+r'$\alpha$'+'-Estradiol', 'Rapa':'Rapamycin'}
tempDF['Group'] = tempDF['Phenotype'].map(tempD0)
tempD1 = {'Control':'tab:blue', 'Acarbose':'tab:red',
          '17'+r'$\alpha$'+'-Estradiol':'tab:green', 'Rapamycin':'tab:purple'}

#Visualize each representative
for rank_i in range(len(plotL)):
    print(' - Rank '+str(rank_i+1)+' (sort by the main effect of '+variable+'):')
    module = plotL[rank_i]
    #Check module summary
    tempDF1 = pd.DataFrame(moduleDF.loc[module]).T
    display(tempDF1)
    
    #Select RMS
    tempDF1 = tempDF.loc[tempDF['ModuleID']==module]
    
    #Check RMS summary
    tempDF2 = tempDF1.groupby(['Group'])['RMS'].agg(['count', 'mean', 'std'])
    tempL1 = []
    tempL2 = []
    for row_n in tempDF2.index.tolist():
        count, mean, std = tempDF2.loc[row_n]
        tempL1.append(mean - 1.96*std/np.sqrt(count))
        tempL2.append(mean + 1.96*std/np.sqrt(count))
    tempDF2['0.025'] = tempL1
    tempDF2['0.975'] = tempL2
    tempDF2 = tempDF2.loc[list(tempD1.keys())]#Sort
    display(tempDF2)
    
    #Prepare significance labels
    ##Retrieve statistical significance
    tempS = pvalDF.loc[module, pvalDF.columns.str.contains('-vs-')]#Adjusted P-value
    tempS.name = 'AdjPval'
    ##Clean
    tempDF2 = tempS.index.to_series().str.split(pat='-vs-', expand=True)
    tempDF2 = tempDF2.rename(columns={0:'Contrast', 1:'Baseline'})
    tempDF2 = pd.merge(tempDF2, tempS, left_index=True, right_index=True, how='left')
    tempDF2['Contrast'] = tempDF2['Contrast'].map(tempD0)
    tempDF2['Baseline'] = tempDF2['Baseline'].map(tempD0)
    ##Convert p-value to label
    tempL = []
    for row_i in range(len(tempDF2)):
        pval = tempDF2['AdjPval'].iloc[row_i]
        if pval<0.001:
            tempL.append('***')
        elif pval<0.01:
            tempL.append('**')
        elif pval<0.05:
            tempL.append('*')
        else:
            pval_text = Decimal(str(pval)).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
            tempL.append(r'$P$ = '+str(pval_text))
    tempDF2['SignifLabel'] = tempL
    display(tempDF2)
    
    #Visualization
    ymax = 1.0
    ymin = 0.0
    yinter = 0.2
    ymargin_t = 0.4
    ymargin_b = 0.05
    aline_ymin = 1.0
    aline_ymargin = 0.125
    sns.set(style='ticks', font='Arial', context='talk')
    plt.figure(figsize=(2, 4))
    sns.pointplot(data=tempDF1, x='Group', y='RMS', order=list(tempD1.keys()), palette=tempD1,
                  markers='o', dodge=False, join=False, capsize=0.6, estimator=np.mean, ci=95)
    p = sns.stripplot(data=tempDF1, x='Group', y='RMS',
                      order=list(tempD1.keys()), palette=tempD1, dodge=False, jitter=0.15,
                      size=5, edgecolor='black', linewidth=1, **{'marker':'o', 'alpha':0.5})
    ##Set axis
    sns.despine()
    p.set(ylim=(ymin-ymargin_b, ymax+ymargin_t), yticks=np.arange(ymin, ymax + yinter/10, yinter))
    plt.setp(p.get_xticklabels(), rotation=70, horizontalalignment='right',
             verticalalignment='center', rotation_mode='anchor')
    ##Add significance labels
    for row_i in range(len(tempDF2)):
        #Baseline
        group_0 = tempDF2['Baseline'].iloc[row_i]
        index_0 = list(tempD1.keys()).index(group_0)
        xcoord_0 = index_0
        #Contrast
        group_1 = tempDF2['Contrast'].iloc[row_i]
        index_1 = list(tempD1.keys()).index(group_1)
        xcoord_1 = index_1
        #Standard point of marker
        xcoord = (xcoord_0+xcoord_1)/2
        ycoord = aline_ymin + aline_ymargin*row_i
        label = tempDF2['SignifLabel'].iloc[row_i]
        #Add annotation lines
        aline_offset = yinter/5
        aline_length = yinter/5 + aline_offset/2
        plt.plot([xcoord_0, xcoord_0, xcoord_1, xcoord_1],
                 [ycoord+aline_offset, ycoord+aline_length, ycoord+aline_length, ycoord+aline_offset],
                 lw=1.5, c='k')
        #Add annotation text
        if label in ['***', '**', '*']:
            text_offset = yinter/21
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='bottom',
                       fontsize='medium', color='k')
        else:
            text_offset = yinter/3.5
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='bottom',
                       fontsize='x-small', color='k')
    ##Add annotation
    p.set_title('Consensus:\nown group', {'fontsize':'small'})
    xoff = 0.015
    yoff = 0.01
    rect = plt.Rectangle((xoff, 1), 1-xoff, 0.155,#Manual adjustment
                         transform=p.axes.transAxes, facecolor=plt.get_cmap('tab20')(15),
                         clip_on=False, linewidth=0, zorder=0.5)
    p.add_patch(rect)
    ##Set axis label and title
    plt.setp(p, xlabel='', ylabel='Sample RMS\n(Mean = Module RCI)')
    plt.suptitle(module, size='small',
                 verticalalignment='bottom', horizontalalignment='center', wrap=True, y=1.0)
    ##Save
    #fileDir = './ExportFigures/'
    #ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
    #fileName = 'RCI-pointplot-'+module.replace('GO:', 'GO')+'.pdf'
    #plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04, transparent=True)
    plt.show()
    print('')

#### 4-5-4. Modules tightened specifically by Rapa

In [None]:
#Prepare the target module set
posL = ['Rapa-vs-Ctrl']
negL = ['Aca-vs-Ctrl', '17aE2-vs-Ctrl']
regulation = 'Tightened'
tempS = pd.Series(np.repeat(True, len(pvalDF)), index=pvalDF.index)#Initialize
tempDF = pvalDF.loc[:, pvalDF.columns.str.contains('-vs-')]#Adjusted P-value
for col_n in tempDF.columns.tolist():
    tempS1 = pvalDF[col_n]
    tempS2 = diffDF[col_n]
    if col_n in posL:
        if regulation=='Tightened':
            tempS1 = (tempS1<0.05) & (tempS2>0)
        elif regulation=='Loosened':
            tempS1 = (tempS1<0.05) & (tempS2<0)
    elif col_n in negL:
        tempS3 = (tempS1>=0.05)
        #Significance for inverse regulation
        if regulation=='Tightened':
            tempS1 = (tempS1<0.05) & (tempS2<0)
        elif regulation=='Loosened':
            tempS1 = (tempS1<0.05) & (tempS2>0)
        tempS1 = tempS3 | tempS1
    else:
        tempS1 = (tempS1>=0.0)
    #Update True
    tempS = tempS & tempS1
tempL = tempS.loc[tempS.tolist()].index.tolist()
print(len(tempL), regulation.lower()+' modules with significance in', posL, 'but not in', negL)

#Select representatives
topX = np.min([30, len(tempL)])
topX_plot = np.min([10, len(tempL)])
tempDF = statDF.loc[:, statDF.columns.str.contains('Pval$')]
tempDF = pd.merge(statDF['ModuleName'], tempDF, left_index=True, right_index=True, how='left')
tempDF = tempDF.loc[tempL].sort_values(by=variable+'_AdjPval', ascending=True)
print('Top', topX, 'modules (sort by the main effect of '+variable+'):')
display(tempDF.iloc[:topX])
plotL = tempDF.index.tolist()[:topX_plot]

#Prepare DF for plot
tempDF = rmsDF_kk.reset_index().melt(var_name='SampleID', value_name='RMS', id_vars='ModuleID')
tempDF1 = sampleDF.reset_index()[['SampleID', 'Phenotype']]
tempDF = pd.merge(tempDF, tempDF1, on='SampleID', how='left')

#Prepare label and color
tempD0 = {'Ctrl':'Control', 'Aca':'Acarbose',
          '17aE2':'17'+r'$\alpha$'+'-Estradiol', 'Rapa':'Rapamycin'}
tempDF['Group'] = tempDF['Phenotype'].map(tempD0)
tempD1 = {'Control':'tab:blue', 'Acarbose':'tab:red',
          '17'+r'$\alpha$'+'-Estradiol':'tab:green', 'Rapamycin':'tab:purple'}

#Visualize each representative
for rank_i in range(len(plotL)):
    print(' - Rank '+str(rank_i+1)+' (sort by the main effect of '+variable+'):')
    module = plotL[rank_i]
    #Check module summary
    tempDF1 = pd.DataFrame(moduleDF.loc[module]).T
    display(tempDF1)
    
    #Select RMS
    tempDF1 = tempDF.loc[tempDF['ModuleID']==module]
    
    #Check RMS summary
    tempDF2 = tempDF1.groupby(['Group'])['RMS'].agg(['count', 'mean', 'std'])
    tempL1 = []
    tempL2 = []
    for row_n in tempDF2.index.tolist():
        count, mean, std = tempDF2.loc[row_n]
        tempL1.append(mean - 1.96*std/np.sqrt(count))
        tempL2.append(mean + 1.96*std/np.sqrt(count))
    tempDF2['0.025'] = tempL1
    tempDF2['0.975'] = tempL2
    tempDF2 = tempDF2.loc[list(tempD1.keys())]#Sort
    display(tempDF2)
    
    #Prepare significance labels
    ##Retrieve statistical significance
    tempS = pvalDF.loc[module, pvalDF.columns.str.contains('-vs-')]#Adjusted P-value
    tempS.name = 'AdjPval'
    ##Clean
    tempDF2 = tempS.index.to_series().str.split(pat='-vs-', expand=True)
    tempDF2 = tempDF2.rename(columns={0:'Contrast', 1:'Baseline'})
    tempDF2 = pd.merge(tempDF2, tempS, left_index=True, right_index=True, how='left')
    tempDF2['Contrast'] = tempDF2['Contrast'].map(tempD0)
    tempDF2['Baseline'] = tempDF2['Baseline'].map(tempD0)
    ##Convert p-value to label
    tempL = []
    for row_i in range(len(tempDF2)):
        pval = tempDF2['AdjPval'].iloc[row_i]
        if pval<0.001:
            tempL.append('***')
        elif pval<0.01:
            tempL.append('**')
        elif pval<0.05:
            tempL.append('*')
        else:
            pval_text = Decimal(str(pval)).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
            tempL.append(r'$P$ = '+str(pval_text))
    tempDF2['SignifLabel'] = tempL
    display(tempDF2)
    
    #Visualization
    ymax = 1.0
    ymin = 0.0
    yinter = 0.2
    ymargin_t = 0.4
    ymargin_b = 0.05
    aline_ymin = 1.0
    aline_ymargin = 0.125
    sns.set(style='ticks', font='Arial', context='talk')
    plt.figure(figsize=(2, 4))
    sns.pointplot(data=tempDF1, x='Group', y='RMS', order=list(tempD1.keys()), palette=tempD1,
                  markers='o', dodge=False, join=False, capsize=0.6, estimator=np.mean, ci=95)
    p = sns.stripplot(data=tempDF1, x='Group', y='RMS',
                      order=list(tempD1.keys()), palette=tempD1, dodge=False, jitter=0.15,
                      size=5, edgecolor='black', linewidth=1, **{'marker':'o', 'alpha':0.5})
    ##Set axis
    sns.despine()
    p.set(ylim=(ymin-ymargin_b, ymax+ymargin_t), yticks=np.arange(ymin, ymax + yinter/10, yinter))
    plt.setp(p.get_xticklabels(), rotation=70, horizontalalignment='right',
             verticalalignment='center', rotation_mode='anchor')
    ##Add significance labels
    for row_i in range(len(tempDF2)):
        #Baseline
        group_0 = tempDF2['Baseline'].iloc[row_i]
        index_0 = list(tempD1.keys()).index(group_0)
        xcoord_0 = index_0
        #Contrast
        group_1 = tempDF2['Contrast'].iloc[row_i]
        index_1 = list(tempD1.keys()).index(group_1)
        xcoord_1 = index_1
        #Standard point of marker
        xcoord = (xcoord_0+xcoord_1)/2
        ycoord = aline_ymin + aline_ymargin*row_i
        label = tempDF2['SignifLabel'].iloc[row_i]
        #Add annotation lines
        aline_offset = yinter/5
        aline_length = yinter/5 + aline_offset/2
        plt.plot([xcoord_0, xcoord_0, xcoord_1, xcoord_1],
                 [ycoord+aline_offset, ycoord+aline_length, ycoord+aline_length, ycoord+aline_offset],
                 lw=1.5, c='k')
        #Add annotation text
        if label in ['***', '**', '*']:
            text_offset = yinter/21
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='bottom',
                       fontsize='medium', color='k')
        else:
            text_offset = yinter/3.5
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='bottom',
                       fontsize='x-small', color='k')
    ##Add annotation
    p.set_title('Consensus:\nown group', {'fontsize':'small'})
    xoff = 0.015
    yoff = 0.01
    rect = plt.Rectangle((xoff, 1), 1-xoff, 0.155,#Manual adjustment
                         transform=p.axes.transAxes, facecolor=plt.get_cmap('tab20')(15),
                         clip_on=False, linewidth=0, zorder=0.5)
    p.add_patch(rect)
    ##Set axis label and title
    plt.setp(p, xlabel='', ylabel='Sample RMS\n(Mean = Module RCI)')
    plt.suptitle(module, size='small',
                 verticalalignment='bottom', horizontalalignment='center', wrap=True, y=1.0)
    ##Save
    #fileDir = './ExportFigures/'
    #ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
    #fileName = 'RCI-pointplot-'+module.replace('GO:', 'GO')+'.pdf'
    #plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04, transparent=True)
    plt.show()
    print('')

#### 4-5-5. Top changed modules by Aca

In [None]:
#Prepare the target module set
target = 'Aca-vs-Ctrl'
tempL = pvalDF.loc[pvalDF[target]<0.05].index.tolist()
print(len(tempL), 'changed modules with significance in '+target)

#Select representatives
topX = np.min([30, len(tempL)])
topX_plot = np.min([3, len(tempL)])
tempDF = statDF.loc[:, statDF.columns.str.contains('Pval$')]
tempDF = pd.merge(statDF['ModuleName'], tempDF, left_index=True, right_index=True, how='left')
tempDF = tempDF.loc[tempL].sort_values(by=target+'_AdjPval', ascending=True)
print('Top', topX, 'modules (sort by significance in '+target+'):')
display(tempDF.iloc[:topX])
plotL = tempDF.index.tolist()[:topX_plot]

#Prepare DF for plot
tempDF = rmsDF_kk.reset_index().melt(var_name='SampleID', value_name='RMS', id_vars='ModuleID')
tempDF1 = sampleDF.reset_index()[['SampleID', 'Phenotype']]
tempDF = pd.merge(tempDF, tempDF1, on='SampleID', how='left')

#Prepare label and color
tempD0 = {'Ctrl':'Control', 'Aca':'Acarbose',
          '17aE2':'17'+r'$\alpha$'+'-Estradiol', 'Rapa':'Rapamycin'}
tempDF['Group'] = tempDF['Phenotype'].map(tempD0)
tempD1 = {'Control':'tab:blue', 'Acarbose':'tab:red',
          '17'+r'$\alpha$'+'-Estradiol':'tab:green', 'Rapamycin':'tab:purple'}

#Visualize each representative
for rank_i in range(len(plotL)):
    print(' - Rank '+str(rank_i+1)+' (sort by the main effect of '+variable+'):')
    module = plotL[rank_i]
    #Check module summary
    tempDF1 = pd.DataFrame(moduleDF.loc[module]).T
    display(tempDF1)
    
    #Select RMS
    tempDF1 = tempDF.loc[tempDF['ModuleID']==module]
    
    #Check RMS summary
    tempDF2 = tempDF1.groupby(['Group'])['RMS'].agg(['count', 'mean', 'std'])
    tempL1 = []
    tempL2 = []
    for row_n in tempDF2.index.tolist():
        count, mean, std = tempDF2.loc[row_n]
        tempL1.append(mean - 1.96*std/np.sqrt(count))
        tempL2.append(mean + 1.96*std/np.sqrt(count))
    tempDF2['0.025'] = tempL1
    tempDF2['0.975'] = tempL2
    tempDF2 = tempDF2.loc[list(tempD1.keys())]#Sort
    display(tempDF2)
    
    #Prepare significance labels
    ##Retrieve statistical significance
    tempS = pvalDF.loc[module, pvalDF.columns.str.contains('-vs-')]#Adjusted P-value
    tempS.name = 'AdjPval'
    ##Clean
    tempDF2 = tempS.index.to_series().str.split(pat='-vs-', expand=True)
    tempDF2 = tempDF2.rename(columns={0:'Contrast', 1:'Baseline'})
    tempDF2 = pd.merge(tempDF2, tempS, left_index=True, right_index=True, how='left')
    tempDF2['Contrast'] = tempDF2['Contrast'].map(tempD0)
    tempDF2['Baseline'] = tempDF2['Baseline'].map(tempD0)
    ##Convert p-value to label
    tempL = []
    for row_i in range(len(tempDF2)):
        pval = tempDF2['AdjPval'].iloc[row_i]
        if pval<0.001:
            tempL.append('***')
        elif pval<0.01:
            tempL.append('**')
        elif pval<0.05:
            tempL.append('*')
        else:
            pval_text = Decimal(str(pval)).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
            tempL.append(r'$P$ = '+str(pval_text))
    tempDF2['SignifLabel'] = tempL
    display(tempDF2)
    
    #Visualization
    ymax = 1.0
    ymin = 0.0
    yinter = 0.2
    ymargin_t = 0.4
    ymargin_b = 0.05
    aline_ymin = 1.0
    aline_ymargin = 0.125
    sns.set(style='ticks', font='Arial', context='talk')
    plt.figure(figsize=(2, 4))
    sns.pointplot(data=tempDF1, x='Group', y='RMS', order=list(tempD1.keys()), palette=tempD1,
                  markers='o', dodge=False, join=False, capsize=0.6, estimator=np.mean, ci=95)
    p = sns.stripplot(data=tempDF1, x='Group', y='RMS',
                      order=list(tempD1.keys()), palette=tempD1, dodge=False, jitter=0.15,
                      size=5, edgecolor='black', linewidth=1, **{'marker':'o', 'alpha':0.5})
    ##Set axis
    sns.despine()
    p.set(ylim=(ymin-ymargin_b, ymax+ymargin_t), yticks=np.arange(ymin, ymax + yinter/10, yinter))
    plt.setp(p.get_xticklabels(), rotation=70, horizontalalignment='right',
             verticalalignment='center', rotation_mode='anchor')
    ##Add significance labels
    for row_i in range(len(tempDF2)):
        #Baseline
        group_0 = tempDF2['Baseline'].iloc[row_i]
        index_0 = list(tempD1.keys()).index(group_0)
        xcoord_0 = index_0
        #Contrast
        group_1 = tempDF2['Contrast'].iloc[row_i]
        index_1 = list(tempD1.keys()).index(group_1)
        xcoord_1 = index_1
        #Standard point of marker
        xcoord = (xcoord_0+xcoord_1)/2
        ycoord = aline_ymin + aline_ymargin*row_i
        label = tempDF2['SignifLabel'].iloc[row_i]
        #Add annotation lines
        aline_offset = yinter/5
        aline_length = yinter/5 + aline_offset/2
        plt.plot([xcoord_0, xcoord_0, xcoord_1, xcoord_1],
                 [ycoord+aline_offset, ycoord+aline_length, ycoord+aline_length, ycoord+aline_offset],
                 lw=1.5, c='k')
        #Add annotation text
        if label in ['***', '**', '*']:
            text_offset = yinter/21
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='bottom',
                       fontsize='medium', color='k')
        else:
            text_offset = yinter/3.5
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='bottom',
                       fontsize='x-small', color='k')
    ##Add annotation
    p.set_title('Consensus:\nown group', {'fontsize':'small'})
    xoff = 0.015
    yoff = 0.01
    rect = plt.Rectangle((xoff, 1), 1-xoff, 0.155,#Manual adjustment
                         transform=p.axes.transAxes, facecolor=plt.get_cmap('tab20')(15),
                         clip_on=False, linewidth=0, zorder=0.5)
    p.add_patch(rect)
    ##Set axis label and title
    plt.setp(p, xlabel='', ylabel='Sample RMS\n(Mean = Module RCI)')
    plt.suptitle(module, size='small',
                 verticalalignment='bottom', horizontalalignment='center', wrap=True, y=1.0)
    ##Save
    #fileDir = './ExportFigures/'
    #ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
    #fileName = 'RCI-pointplot-'+module.replace('GO:', 'GO')+'.pdf'
    #plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04, transparent=True)
    plt.show()
    print('')

#### 4-5-6. Top changed modules by 17aE2

In [None]:
#Prepare the target module set
target = '17aE2-vs-Ctrl'
tempL = pvalDF.loc[pvalDF[target]<0.05].index.tolist()
print(len(tempL), 'changed modules with significance in '+target)

#Select representatives
topX = np.min([30, len(tempL)])
topX_plot = np.min([3, len(tempL)])
tempDF = statDF.loc[:, statDF.columns.str.contains('Pval$')]
tempDF = pd.merge(statDF['ModuleName'], tempDF, left_index=True, right_index=True, how='left')
tempDF = tempDF.loc[tempL].sort_values(by=target+'_AdjPval', ascending=True)
print('Top', topX, 'modules (sort by significance in '+target+'):')
display(tempDF.iloc[:topX])
plotL = tempDF.index.tolist()[:topX_plot]

#Prepare DF for plot
tempDF = rmsDF_kk.reset_index().melt(var_name='SampleID', value_name='RMS', id_vars='ModuleID')
tempDF1 = sampleDF.reset_index()[['SampleID', 'Phenotype']]
tempDF = pd.merge(tempDF, tempDF1, on='SampleID', how='left')

#Prepare label and color
tempD0 = {'Ctrl':'Control', 'Aca':'Acarbose',
          '17aE2':'17'+r'$\alpha$'+'-Estradiol', 'Rapa':'Rapamycin'}
tempDF['Group'] = tempDF['Phenotype'].map(tempD0)
tempD1 = {'Control':'tab:blue', 'Acarbose':'tab:red',
          '17'+r'$\alpha$'+'-Estradiol':'tab:green', 'Rapamycin':'tab:purple'}

#Visualize each representative
for rank_i in range(len(plotL)):
    print(' - Rank '+str(rank_i+1)+' (sort by the main effect of '+variable+'):')
    module = plotL[rank_i]
    #Check module summary
    tempDF1 = pd.DataFrame(moduleDF.loc[module]).T
    display(tempDF1)
    
    #Select RMS
    tempDF1 = tempDF.loc[tempDF['ModuleID']==module]
    
    #Check RMS summary
    tempDF2 = tempDF1.groupby(['Group'])['RMS'].agg(['count', 'mean', 'std'])
    tempL1 = []
    tempL2 = []
    for row_n in tempDF2.index.tolist():
        count, mean, std = tempDF2.loc[row_n]
        tempL1.append(mean - 1.96*std/np.sqrt(count))
        tempL2.append(mean + 1.96*std/np.sqrt(count))
    tempDF2['0.025'] = tempL1
    tempDF2['0.975'] = tempL2
    tempDF2 = tempDF2.loc[list(tempD1.keys())]#Sort
    display(tempDF2)
    
    #Prepare significance labels
    ##Retrieve statistical significance
    tempS = pvalDF.loc[module, pvalDF.columns.str.contains('-vs-')]#Adjusted P-value
    tempS.name = 'AdjPval'
    ##Clean
    tempDF2 = tempS.index.to_series().str.split(pat='-vs-', expand=True)
    tempDF2 = tempDF2.rename(columns={0:'Contrast', 1:'Baseline'})
    tempDF2 = pd.merge(tempDF2, tempS, left_index=True, right_index=True, how='left')
    tempDF2['Contrast'] = tempDF2['Contrast'].map(tempD0)
    tempDF2['Baseline'] = tempDF2['Baseline'].map(tempD0)
    ##Convert p-value to label
    tempL = []
    for row_i in range(len(tempDF2)):
        pval = tempDF2['AdjPval'].iloc[row_i]
        if pval<0.001:
            tempL.append('***')
        elif pval<0.01:
            tempL.append('**')
        elif pval<0.05:
            tempL.append('*')
        else:
            pval_text = Decimal(str(pval)).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
            tempL.append(r'$P$ = '+str(pval_text))
    tempDF2['SignifLabel'] = tempL
    display(tempDF2)
    
    #Visualization
    ymax = 1.0
    ymin = 0.0
    yinter = 0.2
    ymargin_t = 0.4
    ymargin_b = 0.05
    aline_ymin = 1.0
    aline_ymargin = 0.125
    sns.set(style='ticks', font='Arial', context='talk')
    plt.figure(figsize=(2, 4))
    sns.pointplot(data=tempDF1, x='Group', y='RMS', order=list(tempD1.keys()), palette=tempD1,
                  markers='o', dodge=False, join=False, capsize=0.6, estimator=np.mean, ci=95)
    p = sns.stripplot(data=tempDF1, x='Group', y='RMS',
                      order=list(tempD1.keys()), palette=tempD1, dodge=False, jitter=0.15,
                      size=5, edgecolor='black', linewidth=1, **{'marker':'o', 'alpha':0.5})
    ##Set axis
    sns.despine()
    p.set(ylim=(ymin-ymargin_b, ymax+ymargin_t), yticks=np.arange(ymin, ymax + yinter/10, yinter))
    plt.setp(p.get_xticklabels(), rotation=70, horizontalalignment='right',
             verticalalignment='center', rotation_mode='anchor')
    ##Add significance labels
    for row_i in range(len(tempDF2)):
        #Baseline
        group_0 = tempDF2['Baseline'].iloc[row_i]
        index_0 = list(tempD1.keys()).index(group_0)
        xcoord_0 = index_0
        #Contrast
        group_1 = tempDF2['Contrast'].iloc[row_i]
        index_1 = list(tempD1.keys()).index(group_1)
        xcoord_1 = index_1
        #Standard point of marker
        xcoord = (xcoord_0+xcoord_1)/2
        ycoord = aline_ymin + aline_ymargin*row_i
        label = tempDF2['SignifLabel'].iloc[row_i]
        #Add annotation lines
        aline_offset = yinter/5
        aline_length = yinter/5 + aline_offset/2
        plt.plot([xcoord_0, xcoord_0, xcoord_1, xcoord_1],
                 [ycoord+aline_offset, ycoord+aline_length, ycoord+aline_length, ycoord+aline_offset],
                 lw=1.5, c='k')
        #Add annotation text
        if label in ['***', '**', '*']:
            text_offset = yinter/21
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='bottom',
                       fontsize='medium', color='k')
        else:
            text_offset = yinter/3.5
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='bottom',
                       fontsize='x-small', color='k')
    ##Add annotation
    p.set_title('Consensus:\nown group', {'fontsize':'small'})
    xoff = 0.015
    yoff = 0.01
    rect = plt.Rectangle((xoff, 1), 1-xoff, 0.155,#Manual adjustment
                         transform=p.axes.transAxes, facecolor=plt.get_cmap('tab20')(15),
                         clip_on=False, linewidth=0, zorder=0.5)
    p.add_patch(rect)
    ##Set axis label and title
    plt.setp(p, xlabel='', ylabel='Sample RMS\n(Mean = Module RCI)')
    plt.suptitle(module, size='small',
                 verticalalignment='bottom', horizontalalignment='center', wrap=True, y=1.0)
    ##Save
    #fileDir = './ExportFigures/'
    #ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
    #fileName = 'RCI-pointplot-'+module.replace('GO:', 'GO')+'.pdf'
    #plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04, transparent=True)
    plt.show()
    print('')

#### 4-5-7. Top changed modules by Rapa

In [None]:
#Prepare the target module set
target = 'Rapa-vs-Ctrl'
tempL = pvalDF.loc[pvalDF[target]<0.05].index.tolist()
print(len(tempL), 'changed modules with significance in '+target)

#Select representatives
topX = np.min([30, len(tempL)])
topX_plot = np.min([3, len(tempL)])
tempDF = statDF.loc[:, statDF.columns.str.contains('Pval$')]
tempDF = pd.merge(statDF['ModuleName'], tempDF, left_index=True, right_index=True, how='left')
tempDF = tempDF.loc[tempL].sort_values(by=target+'_AdjPval', ascending=True)
print('Top', topX, 'modules (sort by significance in '+target+'):')
display(tempDF.iloc[:topX])
plotL = tempDF.index.tolist()[:topX_plot]

#Prepare DF for plot
tempDF = rmsDF_kk.reset_index().melt(var_name='SampleID', value_name='RMS', id_vars='ModuleID')
tempDF1 = sampleDF.reset_index()[['SampleID', 'Phenotype']]
tempDF = pd.merge(tempDF, tempDF1, on='SampleID', how='left')

#Prepare label and color
tempD0 = {'Ctrl':'Control', 'Aca':'Acarbose',
          '17aE2':'17'+r'$\alpha$'+'-Estradiol', 'Rapa':'Rapamycin'}
tempDF['Group'] = tempDF['Phenotype'].map(tempD0)
tempD1 = {'Control':'tab:blue', 'Acarbose':'tab:red',
          '17'+r'$\alpha$'+'-Estradiol':'tab:green', 'Rapamycin':'tab:purple'}

#Visualize each representative
for rank_i in range(len(plotL)):
    print(' - Rank '+str(rank_i+1)+' (sort by the main effect of '+variable+'):')
    module = plotL[rank_i]
    #Check module summary
    tempDF1 = pd.DataFrame(moduleDF.loc[module]).T
    display(tempDF1)
    
    #Select RMS
    tempDF1 = tempDF.loc[tempDF['ModuleID']==module]
    
    #Check RMS summary
    tempDF2 = tempDF1.groupby(['Group'])['RMS'].agg(['count', 'mean', 'std'])
    tempL1 = []
    tempL2 = []
    for row_n in tempDF2.index.tolist():
        count, mean, std = tempDF2.loc[row_n]
        tempL1.append(mean - 1.96*std/np.sqrt(count))
        tempL2.append(mean + 1.96*std/np.sqrt(count))
    tempDF2['0.025'] = tempL1
    tempDF2['0.975'] = tempL2
    tempDF2 = tempDF2.loc[list(tempD1.keys())]#Sort
    display(tempDF2)
    
    #Prepare significance labels
    ##Retrieve statistical significance
    tempS = pvalDF.loc[module, pvalDF.columns.str.contains('-vs-')]#Adjusted P-value
    tempS.name = 'AdjPval'
    ##Clean
    tempDF2 = tempS.index.to_series().str.split(pat='-vs-', expand=True)
    tempDF2 = tempDF2.rename(columns={0:'Contrast', 1:'Baseline'})
    tempDF2 = pd.merge(tempDF2, tempS, left_index=True, right_index=True, how='left')
    tempDF2['Contrast'] = tempDF2['Contrast'].map(tempD0)
    tempDF2['Baseline'] = tempDF2['Baseline'].map(tempD0)
    ##Convert p-value to label
    tempL = []
    for row_i in range(len(tempDF2)):
        pval = tempDF2['AdjPval'].iloc[row_i]
        if pval<0.001:
            tempL.append('***')
        elif pval<0.01:
            tempL.append('**')
        elif pval<0.05:
            tempL.append('*')
        else:
            pval_text = Decimal(str(pval)).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
            tempL.append(r'$P$ = '+str(pval_text))
    tempDF2['SignifLabel'] = tempL
    display(tempDF2)
    
    #Visualization
    ymax = 1.0
    ymin = 0.0
    yinter = 0.2
    ymargin_t = 0.4
    ymargin_b = 0.05
    aline_ymin = 1.0
    aline_ymargin = 0.125
    sns.set(style='ticks', font='Arial', context='talk')
    plt.figure(figsize=(2, 4))
    sns.pointplot(data=tempDF1, x='Group', y='RMS', order=list(tempD1.keys()), palette=tempD1,
                  markers='o', dodge=False, join=False, capsize=0.6, estimator=np.mean, ci=95)
    p = sns.stripplot(data=tempDF1, x='Group', y='RMS',
                      order=list(tempD1.keys()), palette=tempD1, dodge=False, jitter=0.15,
                      size=5, edgecolor='black', linewidth=1, **{'marker':'o', 'alpha':0.5})
    ##Set axis
    sns.despine()
    p.set(ylim=(ymin-ymargin_b, ymax+ymargin_t), yticks=np.arange(ymin, ymax + yinter/10, yinter))
    plt.setp(p.get_xticklabels(), rotation=70, horizontalalignment='right',
             verticalalignment='center', rotation_mode='anchor')
    ##Add significance labels
    for row_i in range(len(tempDF2)):
        #Baseline
        group_0 = tempDF2['Baseline'].iloc[row_i]
        index_0 = list(tempD1.keys()).index(group_0)
        xcoord_0 = index_0
        #Contrast
        group_1 = tempDF2['Contrast'].iloc[row_i]
        index_1 = list(tempD1.keys()).index(group_1)
        xcoord_1 = index_1
        #Standard point of marker
        xcoord = (xcoord_0+xcoord_1)/2
        ycoord = aline_ymin + aline_ymargin*row_i
        label = tempDF2['SignifLabel'].iloc[row_i]
        #Add annotation lines
        aline_offset = yinter/5
        aline_length = yinter/5 + aline_offset/2
        plt.plot([xcoord_0, xcoord_0, xcoord_1, xcoord_1],
                 [ycoord+aline_offset, ycoord+aline_length, ycoord+aline_length, ycoord+aline_offset],
                 lw=1.5, c='k')
        #Add annotation text
        if label in ['***', '**', '*']:
            text_offset = yinter/21
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='bottom',
                       fontsize='medium', color='k')
        else:
            text_offset = yinter/3.5
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='bottom',
                       fontsize='x-small', color='k')
    ##Add annotation
    p.set_title('Consensus:\nown group', {'fontsize':'small'})
    xoff = 0.015
    yoff = 0.01
    rect = plt.Rectangle((xoff, 1), 1-xoff, 0.155,#Manual adjustment
                         transform=p.axes.transAxes, facecolor=plt.get_cmap('tab20')(15),
                         clip_on=False, linewidth=0, zorder=0.5)
    p.add_patch(rect)
    ##Set axis label and title
    plt.setp(p, xlabel='', ylabel='Sample RMS\n(Mean = Module RCI)')
    plt.suptitle(module, size='small',
                 verticalalignment='bottom', horizontalalignment='center', wrap=True, y=1.0)
    ##Save
    #fileDir = './ExportFigures/'
    #ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
    #fileName = 'RCI-pointplot-'+module.replace('GO:', 'GO')+'.pdf'
    #plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04, transparent=True)
    plt.show()
    print('')

#### 4-5-8. Top RCI modules in Ctrl

In [None]:
target = 'Ctrl'

#Select representatives
topX = 30
topX_plot = 3
tempDF = statDF.loc[:, statDF.columns.str.contains('Pval$')]
tempDF = pd.merge(statDF['ModuleName'], tempDF, left_index=True, right_index=True, how='left')
tempDF = pd.merge(tempDF, rciDF_kk, left_index=True, right_index=True, how='left')
tempDF = tempDF.sort_values(by=target, ascending=False)
print('Top', topX, 'modules (sort by RCI of '+target+'):')
display(tempDF.iloc[:topX])
plotL = tempDF.index.tolist()[:topX_plot]

#Prepare DF for plot
tempDF = rmsDF_kk.reset_index().melt(var_name='SampleID', value_name='RMS', id_vars='ModuleID')
tempDF1 = sampleDF.reset_index()[['SampleID', 'Phenotype']]
tempDF = pd.merge(tempDF, tempDF1, on='SampleID', how='left')

#Prepare label and color
tempD0 = {'Ctrl':'Control', 'Aca':'Acarbose',
          '17aE2':'17'+r'$\alpha$'+'-Estradiol', 'Rapa':'Rapamycin'}
tempDF['Group'] = tempDF['Phenotype'].map(tempD0)
tempD1 = {'Control':'tab:blue', 'Acarbose':'tab:red',
          '17'+r'$\alpha$'+'-Estradiol':'tab:green', 'Rapamycin':'tab:purple'}

#Visualize each representative
for rank_i in range(len(plotL)):
    print(' - Rank '+str(rank_i+1)+' (sort by RCI of '+target+'):')
    module = plotL[rank_i]
    #Check module summary
    tempDF1 = pd.DataFrame(moduleDF.loc[module]).T
    display(tempDF1)
    
    #Select RMS
    tempDF1 = tempDF.loc[tempDF['ModuleID']==module]
    
    #Check RMS summary
    tempDF2 = tempDF1.groupby(['Group'])['RMS'].agg(['count', 'mean', 'std'])
    tempL1 = []
    tempL2 = []
    for row_n in tempDF2.index.tolist():
        count, mean, std = tempDF2.loc[row_n]
        tempL1.append(mean - 1.96*std/np.sqrt(count))
        tempL2.append(mean + 1.96*std/np.sqrt(count))
    tempDF2['0.025'] = tempL1
    tempDF2['0.975'] = tempL2
    tempDF2 = tempDF2.loc[list(tempD1.keys())]#Sort
    display(tempDF2)
    
    #Prepare significance labels
    ##Re-prepare p-values because these modules can be filtered out in pvalDF
    tempDF2 = statDF.loc[:, statDF.columns.str.contains('-vs-.*_AdjPval$')]
    tempDF2.columns = tempDF2.columns.str.replace('_AdjPval$', '')
    ##Retrieve statistical significance
    tempS = tempDF2.loc[module]
    tempS.name = 'AdjPval'
    ##Clean
    tempDF2 = tempS.index.to_series().str.split(pat='-vs-', expand=True)
    tempDF2 = tempDF2.rename(columns={0:'Contrast', 1:'Baseline'})
    tempDF2 = pd.merge(tempDF2, tempS, left_index=True, right_index=True, how='left')
    tempDF2['Contrast'] = tempDF2['Contrast'].map(tempD0)
    tempDF2['Baseline'] = tempDF2['Baseline'].map(tempD0)
    ##Convert p-value to label
    tempL = []
    for row_i in range(len(tempDF2)):
        pval = tempDF2['AdjPval'].iloc[row_i]
        if pval<0.001:
            tempL.append('***')
        elif pval<0.01:
            tempL.append('**')
        elif pval<0.05:
            tempL.append('*')
        else:
            pval_text = Decimal(str(pval)).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
            tempL.append(r'$P$ = '+str(pval_text))
    tempDF2['SignifLabel'] = tempL
    display(tempDF2)
    
    #Visualization
    ymax = 1.0
    ymin = 0.0
    yinter = 0.2
    ymargin_t = 0.4
    ymargin_b = 0.05
    aline_ymin = 1.0
    aline_ymargin = 0.125
    sns.set(style='ticks', font='Arial', context='talk')
    plt.figure(figsize=(2, 4))
    sns.pointplot(data=tempDF1, x='Group', y='RMS', order=list(tempD1.keys()), palette=tempD1,
                  markers='o', dodge=False, join=False, capsize=0.6, estimator=np.mean, ci=95)
    p = sns.stripplot(data=tempDF1, x='Group', y='RMS',
                      order=list(tempD1.keys()), palette=tempD1, dodge=False, jitter=0.15,
                      size=5, edgecolor='black', linewidth=1, **{'marker':'o', 'alpha':0.5})
    ##Set axis
    sns.despine()
    p.set(ylim=(ymin-ymargin_b, ymax+ymargin_t), yticks=np.arange(ymin, ymax + yinter/10, yinter))
    plt.setp(p.get_xticklabels(), rotation=70, horizontalalignment='right',
             verticalalignment='center', rotation_mode='anchor')
    ##Add significance labels
    for row_i in range(len(tempDF2)):
        #Baseline
        group_0 = tempDF2['Baseline'].iloc[row_i]
        index_0 = list(tempD1.keys()).index(group_0)
        xcoord_0 = index_0
        #Contrast
        group_1 = tempDF2['Contrast'].iloc[row_i]
        index_1 = list(tempD1.keys()).index(group_1)
        xcoord_1 = index_1
        #Standard point of marker
        xcoord = (xcoord_0+xcoord_1)/2
        ycoord = aline_ymin + aline_ymargin*row_i
        label = tempDF2['SignifLabel'].iloc[row_i]
        #Add annotation lines
        aline_offset = yinter/5
        aline_length = yinter/5 + aline_offset/2
        plt.plot([xcoord_0, xcoord_0, xcoord_1, xcoord_1],
                 [ycoord+aline_offset, ycoord+aline_length, ycoord+aline_length, ycoord+aline_offset],
                 lw=1.5, c='k')
        #Add annotation text
        if label in ['***', '**', '*']:
            text_offset = yinter/21
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='bottom',
                       fontsize='medium', color='k')
        else:
            text_offset = yinter/3.5
            p.annotate(label, xy=(xcoord, ycoord+text_offset),
                       horizontalalignment='center', verticalalignment='bottom',
                       fontsize='x-small', color='k')
    ##Add annotation
    p.set_title('Consensus:\nown group', {'fontsize':'small'})
    xoff = 0.015
    yoff = 0.01
    rect = plt.Rectangle((xoff, 1), 1-xoff, 0.155,#Manual adjustment
                         transform=p.axes.transAxes, facecolor=plt.get_cmap('tab20')(15),
                         clip_on=False, linewidth=0, zorder=0.5)
    p.add_patch(rect)
    ##Set axis label and title
    plt.setp(p, xlabel='', ylabel='Sample RMS\n(Mean = Module RCI)')
    plt.suptitle(module, size='small',
                 verticalalignment='bottom', horizontalalignment='center', wrap=True, y=1.0)
    ##Save
    #fileDir = './ExportFigures/'
    #ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
    #fileName = 'RCI-pointplot-'+module.replace('GO:', 'GO')+'.pdf'
    #plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04, transparent=True)
    plt.show()
    print('')

## 5. Rank matching score under a fixed consensus: inter-group module comparison

> Test specific hypotheses: control RMS mean == intervention RMS mean per a fixed rank consensus for each module (i.e., inter-group module comparison).  
>
> 3. performing Welch's t-tests per rank consensus for each module.  
>  
> Because the main interests are whether the changed patterns are similar or not in a changed module (i.e., this is a downstream analysis of RCI), ANOVA and the p-value adjustment across modules are unnecessary. Although tricky (i.e., the hypotheses across rank consensus are not independent), the p-values in (3) are conservatively adjusted across all comparisons per module (interventions x rank consensus group). In addition, the rank consensus group is eliminated from the tests because its mean of RMS is RCI (i.e., expected mean and variance is clearly different) and because the number of hypotheses can be reduced.  

### 5-1. Welch's t-tests

#### 5-1-1. Perform all statistical tests

In [None]:
tempL1 = ['Aca', '17aE2', 'Rapa']#Target rank consensus groups
tempL2 = ['Ctrl', 'Aca', '17aE2', 'Rapa']#Target sample groups to be assessed
control = 'Ctrl'
tempDF1 = rmsDF.loc[rmsDF['Template'].isin(tempL1)].set_index(['Template', 'ModuleID'])
tempDF2 = sampleDF.loc[sampleDF['Phenotype'].isin(tempL2)]
tempI = moduleDF.index

#Statistical tests per rank consensus
tempD = {}
for template in tempL1:
    #Statistical tests per module
    t_start = time.time()
    tempL3 = []#For summary table
    for module in tempI.tolist():
        #Select the target RMSs
        tempS = tempDF1.loc[(template, module)]
        tempS.name = 'RMS'
        #Add metadata while selecting the target sample samples
        tempDF = pd.merge(tempS, tempDF2, left_index=True, right_index=True, how='inner')
        
        #Tests per control vs. contrast, except for the template group
        tempDF4 = pd.DataFrame(columns=['DoF', 'tStat', 'Pval'])
        for contrast in tempL2:
            if control!=contrast:
                if contrast==template:
                    tstat, pval, dof = [np.nan, np.nan, np.nan]
                else:
                    tempS1 = tempDF['RMS'].loc[tempDF['Phenotype']==control]
                    tempS2 = tempDF['RMS'].loc[tempDF['Phenotype']==contrast]
                    #Two-sided Welch's t-test
                    tstat, pval, dof = weightstats.ttest_ind(tempS2, tempS1,#t-statistic reflects direction from the baseline
                                                             alternative='two-sided', usevar='unequal')
                tempDF4.loc[contrast+'-vs-'+control] = [dof, tstat, pval]
        tempDF4['AdjPval'] = 1.0#Add dummy column for now
        ##Convert to wide-format
        tempL = []
        for comparison in tempDF4.index.tolist():
            tempS = tempDF4.loc[comparison]
            tempS.index = comparison+'_'+tempS.index
            tempS.name = module
            tempL.append(tempS)
        tempS = pd.concat(tempL, axis=0)
        tempL3.append(tempS)
    t_elapsed = time.time() - t_start
    print(template)
    print('Elapsed time for', (len(tempL2)-2)*len(tempI), 'tests (',
          len(tempL2)-2, 'comparisons x', len(tempI), 'modules):',
          round(t_elapsed//60), 'min', round(t_elapsed%60, 1), 'sec')
    
    #Generate summary table
    tempDF4 = pd.concat(tempL3, axis=1).T
    tempDF4.index.name = tempI.name
    tempD[template] = tempDF4

#Clean all summary tables
tempDF4 = pd.DataFrame()
for template in tempD.keys():
    tempDF = tempD[template]
    tempDF['Template'] = template
    tempDF = tempDF.reset_index().set_index(['Template', 'ModuleID'])
    tempDF4 = pd.concat([tempDF4, tempDF], axis=0)
##P-value adjustment across all tests per module (comparisons x templates) by using Benjamini–Hochberg method
for module in tempI.tolist():
    tempL = tempDF4.loc[:, tempDF4.columns.str.contains('_Pval$', regex=True)].columns.tolist()
    tempDF = tempDF4.reset_index().melt(var_name='Comparison', value_name='Pval', value_vars=tempL,
                                        id_vars=['Template', 'ModuleID'])
    tempDF = tempDF.loc[tempDF['ModuleID']==module]
    tempDF = tempDF.dropna(axis=0)#NaN due to the eliminated test for the consensus group
    tempDF['AdjPval'] = multi.multipletests(tempDF['Pval'], alpha=0.05, method='fdr_bh',
                                            is_sorted=False, returnsorted=False)[1]
    tempDF = tempDF.pivot(index=['Template', 'ModuleID'], columns='Comparison', values='AdjPval')
    tempDF.columns = tempDF.columns.str.replace('_Pval', '_AdjPval')
    #Replace the dummy values with the adjusted p-values
    tempL = tempDF4.loc[:, tempDF4.columns.str.contains('_AdjPval$', regex=True)].columns.tolist()
    for col_n in tempL:
        for template in tempL1:
            tempDF4.loc[(template, module), col_n] = tempDF.loc[(template, module), col_n]
display(tempDF4)

statDF2 = tempDF4

In [None]:
tempL1 = ['Aca', '17aE2', 'Rapa']#Target rank consensus groups
tempL2 = ['Ctrl', 'Aca', '17aE2', 'Rapa']#Target sample groups to be summarized
tempDF1 = rmsDF.loc[rmsDF['Template'].isin(tempL1)].set_index(['Template', 'ModuleID'])
tempDF2 = sampleDF.loc[sampleDF['Phenotype'].isin(tempL2)]
tempDF3 = statDF#Inter-group RCI comparisons, for sorting purpose

#Calculate general statistics per rank consensus
tempD = {}
for template in tempL1:
    #Calculate general statistics per intervention group
    tempL3 = []
    for phenotype in tempL2:
        #Select the target RMSs
        tempDF = tempDF1.loc[template]
        #Select the target samples
        tempL = tempDF2.loc[tempDF2['Phenotype']==phenotype].index.tolist()
        tempDF = tempDF[tempL]
        #Calculate general statistics
        tempS1 = len(tempL) - tempDF.isnull().sum(axis=1)
        tempS1.name = phenotype+'_N'
        tempS2 = tempDF.mean(axis=1)
        tempS2.name = phenotype+'_RMSmean'
        tempS3 = tempDF.sem(axis=1, ddof=1)
        tempS3.name = phenotype+'_RMSsem'
        #Merge
        tempDF = pd.concat([tempS1, tempS2, tempS3], axis=1)
        tempL3.append(tempDF)
    tempDF = pd.concat(tempL3, axis=1)
    tempD[template] = tempDF
##Clean all general statistics tables
tempDF4 = pd.DataFrame()
for template in tempD.keys():
    tempDF = tempD[template]
    tempDF['Template'] = template
    tempDF = tempDF.reset_index().set_index(['Template', 'ModuleID'])
    tempDF4 = pd.concat([tempDF4, tempDF], axis=0)
display(tempDF4)

#Merge all the tables
print('General statistics table:', tempDF4.shape)
print('Test table:', statDF2.shape)
tempDF = pd.merge(moduleDF['ModuleName'], tempDF4.reset_index(), on='ModuleID', how='right')
tempDF = pd.concat([tempDF.set_index(['Template', 'ModuleID']), statDF2], axis=1)

#Sort based on P-value in ANOVA of the inter-group RCI comparisons
tempL = [(template, module) for template in tempL1 for module in tempDF3.index.tolist()]
tempDF = tempDF.loc[tempL]
display(tempDF)

#Save per rank consensus
fileDir = './ExportData/'
ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
for template in tempL1:
    fileName = 'inter-group-comparison_'+template+'-fixed-RMSmean.tsv'
    tempDF4 = tempDF.loc[template]
    tempDF4 = tempDF4.dropna(axis=1)#NaN due to the eliminated test for the consensus group
    tempDF4.to_csv(fileDir+ipynbName+fileName, sep='\t', index=True)
    print('Saved .tsv table for '+template)
    display(tempDF4)

statDF_fixed = tempDF

#### 5-1-2. Changed modules

> Focus only the changed modules based on RCI.  
> –> Utilize and update the above pvalDF and diffDF (4-2-3 section).  

In [None]:
#Spread statDF by template to flatten multi-index
tempDF = moduleDF['ModuleName']#pd.Series() for now
for template in ['Aca', '17aE2', 'Rapa']:
    tempDF1 = statDF_fixed.loc[template].drop(columns=['ModuleName'])
    tempDF1 = tempDF1.dropna(axis=1)#NaN due to the eliminated test for the consensus group
    tempDF1.columns = template+'-fixed_'+tempDF1.columns
    tempDF = pd.merge(tempDF, tempDF1, left_index=True, right_index=True, how='left')

#Extract only the changed modules
tempDF = tempDF.loc[pvalDF.index.tolist()]
print('Changed modules (adjusted P < 0.05):', len(tempDF))

#Take adjusted P-value and add to the previous DF
tempDF1 = tempDF.loc[:, tempDF.columns.str.contains('-vs-.*_AdjPval$')]
tempDF1.columns = tempDF1.columns.str.replace('_AdjPval$', '')
tempDF1 = pd.merge(pvalDF, tempDF1, left_index=True, right_index=True, how='left')
print('Adjusted P-value:')
display(tempDF1)
display(tempDF1.describe())

#Take t-statistic (for direction) and add to the previous DF
tempDF2 = tempDF.loc[:, tempDF.columns.str.contains('-vs-.*_tStat$')]
tempDF2.columns = tempDF2.columns.str.replace('_tStat$', '')
tempDF2 = pd.merge(diffDF, tempDF2, left_index=True, right_index=True, how='left')
print('Changed direction (t-statistic):')
display(tempDF2)
display(tempDF2.describe())

pvalDF = tempDF1#Update
diffDF = tempDF2#Update

### 5-2. Visualization: venn diagram

#### 5-2-1. RMS under Aca consensus

In [None]:
template = 'Aca'

#Prepare label and color
tempD0 = {'Ctrl':'Control', 'Aca':'Acarbose',
          '17aE2':'17'+r'$\alpha$'+'-Estradiol', 'Rapa':'Rapamycin'}
tempD1 = {'Dissimilarly changed\nby 17'+r'$\alpha$'+'-Estradiol':plt.get_cmap('tab20')(5),
          'Similarly changed\nby 17'+r'$\alpha$'+'-Estradiol':'tab:green',
          'Similarly changed\nby Rapamycin':'tab:purple',
          'Dissimilarly changed\nby Rapamycin':plt.get_cmap('tab20')(9)}
target = template+'-vs-Ctrl'

#Visualization per direction
for regulation in ['Tightened', 'Loosened']:
    #Prepare the changed module set
    tempS1 = pvalDF[target]
    tempS2 = diffDF[target]
    if regulation=='Changed':
        tempS2 = tempS2.loc[(tempS1<0.05)]
    elif regulation=='Tightened':
        tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2>0)]
    elif regulation=='Loosened':
        tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2<0)]
    tempL = tempS2.index.tolist()
    print(regulation+' modules (vs. Control):', len(tempL))
    
    #Skip the followings if no changed module
    if len(tempL)==0:
        continue
    
    #Prepare similar/dissimilar module set
    tempD2 = {}
    tempL1 = pvalDF.loc[:, pvalDF.columns.str.contains(template+'-fixed')].columns.tolist()
    for col_n in tempL1:
        tempS1 = pvalDF.loc[tempL, col_n]
        tempS2 = diffDF.loc[tempL, col_n]
        for similarity in ['Similar', 'Dissimilar']:
            if similarity=='Similar':
                tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2>0)]
            elif similarity=='Dissimilar':
                tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2<0)]
            comparison = re.sub('^.*-fixed_', '', col_n)
            label = similarity+'ly changed\nby '+tempD0[re.sub('-vs-.*$', '', comparison)]
            tempD2[label] = set(tempS2.index.tolist())
    ##Sort to make consistent order in manual legend generation
    tempD = {}
    for label in tempD1.keys():
        tempD[label] = tempD2[label]
    
    #Not significant in all comparisons
    tempDF = pvalDF.loc[tempL]
    for moduleS in tempD.values():
        tempDF = tempDF.loc[~tempDF.index.isin(moduleS)]
    count = len(tempDF)
    print(' -> Not similarly/dissimilarly changed in the other interventions:', count)
    
    #Skip the followings if no similarly/dissimilarly changed module
    if count==len(tempL):
        continue
    
    #Venn diagram
    sns.set(style='ticks', font='Arial', context='talk')
    fig, ax = plt.subplots(figsize=(3, 3))
    venn(tempD, fmt='{size:,}', cmap=list(tempD1.values()), legend_loc=None, ax=ax)
    plt.setp(ax, ylim=(0.1, 1.05))#Otherwise, weird space...
    #Replace text for the n/a regions
    for text_i, text in enumerate(ax.texts):
        if text_i in [2, 6, 10, 11, 12, 13, 14]:
            text.set_text('n/a')
    #Add legend annotation
    x_coord = [0.1, 0.1, 0.9, 0.9]
    y_coord = [0.25, 0.7, 0.7, 0.25]
    h_align = ['right', 'right', 'left', 'left']
    v_align = ['top', 'bottom', 'bottom', 'top']
    for i in range(len(tempD1)):
        key = list(tempD1.keys())[i]
        total = f'{len(tempD[key]):,}'
        ax.text(x_coord[i], y_coord[i], key+'\n('+total+' modules)',
                fontsize='small', multialignment='center',
                horizontalalignment=h_align[i], verticalalignment=v_align[i],
                bbox={'boxstyle':'round', 'facecolor':tempD1[key], 'pad':0.2, 'alpha':0.5})
    title = 'Similarity to '+tempD0[template]+' consensus'
    ax.set_title(title, fontsize='medium')
    ##Save
    if regulation!='Changed':
        fileDir = './ExportFigures/'
        ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
        fileName = template+'-fixed-RMSmean-venn-'+regulation.lower()+'.pdf'
        plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04, transparent=True)
    plt.show()

In [None]:
target = template+'-vs-Ctrl'

#Export module list in each subset in the venn diagram
for regulation in ['Tightened', 'Loosened']:
    #Prepare the changed module set
    contrast = template.replace('-fixed', '-vs-Cont')
    tempS1 = pvalDF[target]
    tempS2 = diffDF[target]
    if regulation=='Changed':
        tempS2 = tempS2.loc[(tempS1<0.05)]
    elif regulation=='Tightened':
        tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2>0)]
    elif regulation=='Loosened':
        tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2<0)]
    tempL = tempS2.index.tolist()
    print(regulation+' modules (vs. Control):', len(tempL))
    
    #Skip the followings if no changed module
    if len(tempL)==0:
        continue
    
    #Prepare similar/dissimilar module set
    tempD = {}
    tempL1 = pvalDF.loc[:, pvalDF.columns.str.contains(template+'-fixed')].columns.tolist()
    for col_n in tempL1:
        tempS1 = pvalDF.loc[tempL, col_n]
        tempS2 = diffDF.loc[tempL, col_n]
        for similarity in ['Similar', 'Dissimilar']:
            if similarity=='Similar':
                tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2>0)]
            elif similarity=='Dissimilar':
                tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2<0)]
            label = col_n+'_'+similarity+'lyChanged'
            tempD[label] = set(tempS2.index.tolist())
    
    #Not significant in all contrasts
    tempDF = pvalDF.loc[tempL]
    for moduleS in tempD.values():
        tempDF = tempDF.loc[~tempDF.index.isin(moduleS)]
    count = len(tempDF)
    print(' -> Not similarly/dissimilarly changed in the other interventions:', count)
    
    #Skip the followings if no similarly/dissimilarly changed module
    if count==len(tempL):
        continue
    
    #Prepare a new .xlsx file (dummy README)
    tempL1 = [len(tempD[key]) for key in tempD.keys()]
    tempDF = pd.DataFrame({'Group':tempD.keys(), 'nModules':tempL1})
    tempDF = tempDF.reset_index().rename(columns={'index':'VennOrder'})
    tempDF['VennOrder'] = tempDF['VennOrder'] + 1
    fileDir = './ExportData/'
    ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
    fileName = template+'-fixed-RMSmean-venn-'+regulation.lower()+'.xlsx'
    tempDF.to_excel(fileDir+ipynbName+fileName, sheet_name='README', header=True, index=False)
    display(tempDF)#Check
    
    #Prepare summary table for the template
    tempDF1 = statDF_fixed.loc[template]
    tempDF1 = tempDF1.dropna(axis=1)#NaN due to the eliminated test for the consensus group
    
    t_start = time.time()
    #Extract overall set
    for key_i in range(len(tempD)):
        key = list(tempD.keys())[key_i]
        tempS = tempD[key]
        tempDF = tempDF1.loc[tempDF1.index.isin(tempS)]
        #Save summary table by appended to the above .xlsx file
        ##Prepare sheet name
        tempL1 = ['NA' for i in range(len(tempD))]
        tempL1[key_i] = '1'
        setName = '('+','.join(tempL1)+')'
        with pd.ExcelWriter(fileDir+ipynbName+fileName, mode='a', engine='openpyxl') as writer:
            tempDF.to_excel(writer, sheet_name=setName, header=True, index=True)
        print(' - '+setName+':', len(tempDF))
    
    #Extract subset
    tempL1 = ['1', '0']
    tempL2 = [[k1, k2, k3, k4] for k1 in tempL1 for k2 in tempL1 for k3 in tempL1 for k4 in tempL1]
    #tempL2.remove(['0', '0', '0', '0'])
    for tempL1 in tempL2:
        #Positive module set
        tempL3 = [list(tempD.values())[key_i] for key_i, binary in enumerate(tempL1) if binary=='1']
        tempS1 = set(pvalDF.index.tolist())#Initialize
        for tempS in tempL3:
            tempS1 = tempS1 & tempS
        #Negative module set
        tempL3 = [list(tempD.values())[key_i] for key_i, binary in enumerate(tempL1) if binary=='0']
        tempS2 = set()#Initialize
        for tempS in tempL3:
            tempS2 = tempS2 | tempS
        #Extract subset
        tempS = tempS1 - tempS2
        tempDF = tempDF1.loc[tempDF1.index.isin(tempS)]
        #Save summary table by appended to the above .xlsx file
        ##Prepare sheet name
        setName = '('+','.join(tempL1)+')'
        with pd.ExcelWriter(fileDir+ipynbName+fileName, mode='a', engine='openpyxl') as writer:
            tempDF.to_excel(writer, sheet_name=setName, header=True, index=True)
        print(' - '+setName+':', len(tempDF))
    
    t_elapsed = time.time() - t_start
    print(' - Elapsed time:', round(t_elapsed//60), 'min', round(t_elapsed%60, 1), 'sec')

#### 5-2-2. RMS under 17aE2 consensus

In [None]:
template = '17aE2'

#Prepare label and color
tempD0 = {'Ctrl':'Control', 'Aca':'Acarbose',
          '17aE2':'17'+r'$\alpha$'+'-Estradiol', 'Rapa':'Rapamycin'}
tempD1 = {'Dissimilarly changed\nby Rapamycin':plt.get_cmap('tab20')(9),
          'Similarly changed\nby Rapamycin':'tab:purple',
          'Similarly changed\nby Acarbose':'tab:red',
          'Dissimilarly changed\nby Acarbose':plt.get_cmap('tab20')(7)}
target = template+'-vs-Ctrl'

#Visualization per direction
for regulation in ['Tightened', 'Loosened']:
    #Prepare the changed module set
    tempS1 = pvalDF[target]
    tempS2 = diffDF[target]
    if regulation=='Changed':
        tempS2 = tempS2.loc[(tempS1<0.05)]
    elif regulation=='Tightened':
        tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2>0)]
    elif regulation=='Loosened':
        tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2<0)]
    tempL = tempS2.index.tolist()
    print(regulation+' modules (vs. Control):', len(tempL))
    
    #Skip the followings if no changed module
    if len(tempL)==0:
        continue
    
    #Prepare similar/dissimilar module set
    tempD2 = {}
    tempL1 = pvalDF.loc[:, pvalDF.columns.str.contains(template+'-fixed')].columns.tolist()
    for col_n in tempL1:
        tempS1 = pvalDF.loc[tempL, col_n]
        tempS2 = diffDF.loc[tempL, col_n]
        for similarity in ['Similar', 'Dissimilar']:
            if similarity=='Similar':
                tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2>0)]
            elif similarity=='Dissimilar':
                tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2<0)]
            comparison = re.sub('^.*-fixed_', '', col_n)
            label = similarity+'ly changed\nby '+tempD0[re.sub('-vs-.*$', '', comparison)]
            tempD2[label] = set(tempS2.index.tolist())
    ##Sort to make consistent order in manual legend generation
    tempD = {}
    for label in tempD1.keys():
        tempD[label] = tempD2[label]
    
    #Not significant in all comparisons
    tempDF = pvalDF.loc[tempL]
    for moduleS in tempD.values():
        tempDF = tempDF.loc[~tempDF.index.isin(moduleS)]
    count = len(tempDF)
    print(' -> Not similarly/dissimilarly changed in the other interventions:', count)
    
    #Skip the followings if no similarly/dissimilarly changed module
    if count==len(tempL):
        continue
    
    #Venn diagram
    sns.set(style='ticks', font='Arial', context='talk')
    fig, ax = plt.subplots(figsize=(3, 3))
    venn(tempD, fmt='{size:,}', cmap=list(tempD1.values()), legend_loc=None, ax=ax)
    plt.setp(ax, ylim=(0.1, 1.05))#Otherwise, weird space...
    #Replace text for the n/a regions
    for text_i, text in enumerate(ax.texts):
        if text_i in [2, 6, 10, 11, 12, 13, 14]:
            text.set_text('n/a')
    #Add legend annotation
    x_coord = [0.1, 0.1, 0.9, 0.9]
    y_coord = [0.25, 0.7, 0.7, 0.25]
    h_align = ['right', 'right', 'left', 'left']
    v_align = ['top', 'bottom', 'bottom', 'top']
    for i in range(len(tempD1)):
        key = list(tempD1.keys())[i]
        total = f'{len(tempD[key]):,}'
        ax.text(x_coord[i], y_coord[i], key+'\n('+total+' modules)',
                fontsize='small', multialignment='center',
                horizontalalignment=h_align[i], verticalalignment=v_align[i],
                bbox={'boxstyle':'round', 'facecolor':tempD1[key], 'pad':0.2, 'alpha':0.5})
    title = 'Similarity to '+tempD0[template]+' consensus'
    ax.set_title(title, fontsize='medium')
    ##Save
    if regulation!='Changed':
        fileDir = './ExportFigures/'
        ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
        fileName = template+'-fixed-RMSmean-venn-'+regulation.lower()+'.pdf'
        plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04, transparent=True)
    plt.show()

In [None]:
target = template+'-vs-Ctrl'

#Export module list in each subset in the venn diagram
for regulation in ['Tightened', 'Loosened']:
    #Prepare the changed module set
    contrast = template.replace('-fixed', '-vs-Cont')
    tempS1 = pvalDF[target]
    tempS2 = diffDF[target]
    if regulation=='Changed':
        tempS2 = tempS2.loc[(tempS1<0.05)]
    elif regulation=='Tightened':
        tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2>0)]
    elif regulation=='Loosened':
        tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2<0)]
    tempL = tempS2.index.tolist()
    print(regulation+' modules (vs. Control):', len(tempL))
    
    #Skip the followings if no changed module
    if len(tempL)==0:
        continue
    
    #Prepare similar/dissimilar module set
    tempD = {}
    tempL1 = pvalDF.loc[:, pvalDF.columns.str.contains(template+'-fixed')].columns.tolist()
    for col_n in tempL1:
        tempS1 = pvalDF.loc[tempL, col_n]
        tempS2 = diffDF.loc[tempL, col_n]
        for similarity in ['Similar', 'Dissimilar']:
            if similarity=='Similar':
                tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2>0)]
            elif similarity=='Dissimilar':
                tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2<0)]
            label = col_n+'_'+similarity+'lyChanged'
            tempD[label] = set(tempS2.index.tolist())
    
    #Not significant in all contrasts
    tempDF = pvalDF.loc[tempL]
    for moduleS in tempD.values():
        tempDF = tempDF.loc[~tempDF.index.isin(moduleS)]
    count = len(tempDF)
    print(' -> Not similarly/dissimilarly changed in the other interventions:', count)
    
    #Skip the followings if no similarly/dissimilarly changed module
    if count==len(tempL):
        continue
    
    #Prepare a new .xlsx file (dummy README)
    tempL1 = [len(tempD[key]) for key in tempD.keys()]
    tempDF = pd.DataFrame({'Group':tempD.keys(), 'nModules':tempL1})
    tempDF = tempDF.reset_index().rename(columns={'index':'VennOrder'})
    tempDF['VennOrder'] = tempDF['VennOrder'] + 1
    fileDir = './ExportData/'
    ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
    fileName = template+'-fixed-RMSmean-venn-'+regulation.lower()+'.xlsx'
    tempDF.to_excel(fileDir+ipynbName+fileName, sheet_name='README', header=True, index=False)
    display(tempDF)#Check
    
    #Prepare summary table for the template
    tempDF1 = statDF_fixed.loc[template]
    tempDF1 = tempDF1.dropna(axis=1)#NaN due to the eliminated test for the consensus group
    
    t_start = time.time()
    #Extract overall set
    for key_i in range(len(tempD)):
        key = list(tempD.keys())[key_i]
        tempS = tempD[key]
        tempDF = tempDF1.loc[tempDF1.index.isin(tempS)]
        #Save summary table by appended to the above .xlsx file
        ##Prepare sheet name
        tempL1 = ['NA' for i in range(len(tempD))]
        tempL1[key_i] = '1'
        setName = '('+','.join(tempL1)+')'
        with pd.ExcelWriter(fileDir+ipynbName+fileName, mode='a', engine='openpyxl') as writer:
            tempDF.to_excel(writer, sheet_name=setName, header=True, index=True)
        print(' - '+setName+':', len(tempDF))
    
    #Extract subset
    tempL1 = ['1', '0']
    tempL2 = [[k1, k2, k3, k4] for k1 in tempL1 for k2 in tempL1 for k3 in tempL1 for k4 in tempL1]
    #tempL2.remove(['0', '0', '0', '0'])
    for tempL1 in tempL2:
        #Positive module set
        tempL3 = [list(tempD.values())[key_i] for key_i, binary in enumerate(tempL1) if binary=='1']
        tempS1 = set(pvalDF.index.tolist())#Initialize
        for tempS in tempL3:
            tempS1 = tempS1 & tempS
        #Negative module set
        tempL3 = [list(tempD.values())[key_i] for key_i, binary in enumerate(tempL1) if binary=='0']
        tempS2 = set()#Initialize
        for tempS in tempL3:
            tempS2 = tempS2 | tempS
        #Extract subset
        tempS = tempS1 - tempS2
        tempDF = tempDF1.loc[tempDF1.index.isin(tempS)]
        #Save summary table by appended to the above .xlsx file
        ##Prepare sheet name
        setName = '('+','.join(tempL1)+')'
        with pd.ExcelWriter(fileDir+ipynbName+fileName, mode='a', engine='openpyxl') as writer:
            tempDF.to_excel(writer, sheet_name=setName, header=True, index=True)
        print(' - '+setName+':', len(tempDF))
    
    t_elapsed = time.time() - t_start
    print(' - Elapsed time:', round(t_elapsed//60), 'min', round(t_elapsed%60, 1), 'sec')

#### 5-2-3. RMS under Rapa consensus

In [None]:
template = 'Rapa'

#Prepare label and color
tempD0 = {'Ctrl':'Control', 'Aca':'Acarbose',
          '17aE2':'17'+r'$\alpha$'+'-Estradiol', 'Rapa':'Rapamycin'}
tempD1 = {'Dissimilarly changed\nby Acarbose':plt.get_cmap('tab20')(7),
          'Similarly changed\nby Acarbose':'tab:red',
          'Similarly changed\nby 17'+r'$\alpha$'+'-Estradiol':'tab:green',
          'Dissimilarly changed\nby 17'+r'$\alpha$'+'-Estradiol':plt.get_cmap('tab20')(5)}
target = template+'-vs-Ctrl'

#Visualization per direction
for regulation in ['Tightened', 'Loosened']:
    #Prepare the changed module set
    tempS1 = pvalDF[target]
    tempS2 = diffDF[target]
    if regulation=='Changed':
        tempS2 = tempS2.loc[(tempS1<0.05)]
    elif regulation=='Tightened':
        tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2>0)]
    elif regulation=='Loosened':
        tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2<0)]
    tempL = tempS2.index.tolist()
    print(regulation+' modules (vs. Control):', len(tempL))
    
    #Skip the followings if no changed module
    if len(tempL)==0:
        continue
    
    #Prepare similar/dissimilar module set
    tempD2 = {}
    tempL1 = pvalDF.loc[:, pvalDF.columns.str.contains(template+'-fixed')].columns.tolist()
    for col_n in tempL1:
        tempS1 = pvalDF.loc[tempL, col_n]
        tempS2 = diffDF.loc[tempL, col_n]
        for similarity in ['Similar', 'Dissimilar']:
            if similarity=='Similar':
                tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2>0)]
            elif similarity=='Dissimilar':
                tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2<0)]
            comparison = re.sub('^.*-fixed_', '', col_n)
            label = similarity+'ly changed\nby '+tempD0[re.sub('-vs-.*$', '', comparison)]
            tempD2[label] = set(tempS2.index.tolist())
    ##Sort to make consistent order in manual legend generation
    tempD = {}
    for label in tempD1.keys():
        tempD[label] = tempD2[label]
    
    #Not significant in all comparisons
    tempDF = pvalDF.loc[tempL]
    for moduleS in tempD.values():
        tempDF = tempDF.loc[~tempDF.index.isin(moduleS)]
    count = len(tempDF)
    print(' -> Not similarly/dissimilarly changed in the other interventions:', count)
    
    #Skip the followings if no similarly/dissimilarly changed module
    if count==len(tempL):
        continue
    
    #Venn diagram
    sns.set(style='ticks', font='Arial', context='talk')
    fig, ax = plt.subplots(figsize=(3, 3))
    venn(tempD, fmt='{size:,}', cmap=list(tempD1.values()), legend_loc=None, ax=ax)
    plt.setp(ax, ylim=(0.1, 1.05))#Otherwise, weird space...
    #Replace text for the n/a regions
    for text_i, text in enumerate(ax.texts):
        if text_i in [2, 6, 10, 11, 12, 13, 14]:
            text.set_text('n/a')
    #Add legend annotation
    x_coord = [0.1, 0.1, 0.9, 0.9]
    y_coord = [0.25, 0.7, 0.7, 0.25]
    h_align = ['right', 'right', 'left', 'left']
    v_align = ['top', 'bottom', 'bottom', 'top']
    for i in range(len(tempD1)):
        key = list(tempD1.keys())[i]
        total = f'{len(tempD[key]):,}'
        ax.text(x_coord[i], y_coord[i], key+'\n('+total+' modules)',
                fontsize='small', multialignment='center',
                horizontalalignment=h_align[i], verticalalignment=v_align[i],
                bbox={'boxstyle':'round', 'facecolor':tempD1[key], 'pad':0.2, 'alpha':0.5})
    title = 'Similarity to '+tempD0[template]+' consensus'
    ax.set_title(title, fontsize='medium')
    ##Save
    if regulation!='Changed':
        fileDir = './ExportFigures/'
        ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
        fileName = template+'-fixed-RMSmean-venn-'+regulation.lower()+'.pdf'
        plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04, transparent=True)
    plt.show()

In [None]:
target = template+'-vs-Ctrl'

#Export module list in each subset in the venn diagram
for regulation in ['Tightened', 'Loosened']:
    #Prepare the changed module set
    contrast = template.replace('-fixed', '-vs-Cont')
    tempS1 = pvalDF[target]
    tempS2 = diffDF[target]
    if regulation=='Changed':
        tempS2 = tempS2.loc[(tempS1<0.05)]
    elif regulation=='Tightened':
        tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2>0)]
    elif regulation=='Loosened':
        tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2<0)]
    tempL = tempS2.index.tolist()
    print(regulation+' modules (vs. Control):', len(tempL))
    
    #Skip the followings if no changed module
    if len(tempL)==0:
        continue
    
    #Prepare similar/dissimilar module set
    tempD = {}
    tempL1 = pvalDF.loc[:, pvalDF.columns.str.contains(template+'-fixed')].columns.tolist()
    for col_n in tempL1:
        tempS1 = pvalDF.loc[tempL, col_n]
        tempS2 = diffDF.loc[tempL, col_n]
        for similarity in ['Similar', 'Dissimilar']:
            if similarity=='Similar':
                tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2>0)]
            elif similarity=='Dissimilar':
                tempS2 = tempS2.loc[(tempS1<0.05)&(tempS2<0)]
            label = col_n+'_'+similarity+'lyChanged'
            tempD[label] = set(tempS2.index.tolist())
    
    #Not significant in all contrasts
    tempDF = pvalDF.loc[tempL]
    for moduleS in tempD.values():
        tempDF = tempDF.loc[~tempDF.index.isin(moduleS)]
    count = len(tempDF)
    print(' -> Not similarly/dissimilarly changed in the other interventions:', count)
    
    #Skip the followings if no similarly/dissimilarly changed module
    if count==len(tempL):
        continue
    
    #Prepare a new .xlsx file (dummy README)
    tempL1 = [len(tempD[key]) for key in tempD.keys()]
    tempDF = pd.DataFrame({'Group':tempD.keys(), 'nModules':tempL1})
    tempDF = tempDF.reset_index().rename(columns={'index':'VennOrder'})
    tempDF['VennOrder'] = tempDF['VennOrder'] + 1
    fileDir = './ExportData/'
    ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
    fileName = template+'-fixed-RMSmean-venn-'+regulation.lower()+'.xlsx'
    tempDF.to_excel(fileDir+ipynbName+fileName, sheet_name='README', header=True, index=False)
    display(tempDF)#Check
    
    #Prepare summary table for the template
    tempDF1 = statDF_fixed.loc[template]
    tempDF1 = tempDF1.dropna(axis=1)#NaN due to the eliminated test for the consensus group
    
    t_start = time.time()
    #Extract overall set
    for key_i in range(len(tempD)):
        key = list(tempD.keys())[key_i]
        tempS = tempD[key]
        tempDF = tempDF1.loc[tempDF1.index.isin(tempS)]
        #Save summary table by appended to the above .xlsx file
        ##Prepare sheet name
        tempL1 = ['NA' for i in range(len(tempD))]
        tempL1[key_i] = '1'
        setName = '('+','.join(tempL1)+')'
        with pd.ExcelWriter(fileDir+ipynbName+fileName, mode='a', engine='openpyxl') as writer:
            tempDF.to_excel(writer, sheet_name=setName, header=True, index=True)
        print(' - '+setName+':', len(tempDF))
    
    #Extract subset
    tempL1 = ['1', '0']
    tempL2 = [[k1, k2, k3, k4] for k1 in tempL1 for k2 in tempL1 for k3 in tempL1 for k4 in tempL1]
    #tempL2.remove(['0', '0', '0', '0'])
    for tempL1 in tempL2:
        #Positive module set
        tempL3 = [list(tempD.values())[key_i] for key_i, binary in enumerate(tempL1) if binary=='1']
        tempS1 = set(pvalDF.index.tolist())#Initialize
        for tempS in tempL3:
            tempS1 = tempS1 & tempS
        #Negative module set
        tempL3 = [list(tempD.values())[key_i] for key_i, binary in enumerate(tempL1) if binary=='0']
        tempS2 = set()#Initialize
        for tempS in tempL3:
            tempS2 = tempS2 | tempS
        #Extract subset
        tempS = tempS1 - tempS2
        tempDF = tempDF1.loc[tempDF1.index.isin(tempS)]
        #Save summary table by appended to the above .xlsx file
        ##Prepare sheet name
        setName = '('+','.join(tempL1)+')'
        with pd.ExcelWriter(fileDir+ipynbName+fileName, mode='a', engine='openpyxl') as writer:
            tempDF.to_excel(writer, sheet_name=setName, header=True, index=True)
        print(' - '+setName+':', len(tempDF))
    
    t_elapsed = time.time() - t_start
    print(' - Elapsed time:', round(t_elapsed//60), 'min', round(t_elapsed%60, 1), 'sec')

### 5-3. Visualization: pointplot

> More modules are checked later (in a separate notebook), and so representative modules are checked for now.  

#### 5-3-1. Modules tightened by all interventions

In [None]:
#Prepare the target module set
posL = ['Aca-vs-Ctrl', '17aE2-vs-Ctrl', 'Rapa-vs-Ctrl']
negL = ['']
regulation = 'Tightened'
tempS = pd.Series(np.repeat(True, len(pvalDF)), index=pvalDF.index)#Initialize
tempDF = pvalDF.loc[:, pvalDF.columns.str.contains('-vs-')]#Adjusted P-value
tempDF = tempDF.loc[:, ~tempDF.columns.str.contains('-fixed')]
for col_n in tempDF.columns.tolist():
    tempS1 = pvalDF[col_n]
    tempS2 = diffDF[col_n]
    if col_n in posL:
        if regulation=='Tightened':
            tempS1 = (tempS1<0.05) & (tempS2>0)
        elif regulation=='Loosened':
            tempS1 = (tempS1<0.05) & (tempS2<0)
    elif col_n in negL:
        tempS3 = (tempS1>=0.05)
        #Significance for inverse regulation
        if regulation=='Tightened':
            tempS1 = (tempS1<0.05) & (tempS2<0)
        elif regulation=='Loosened':
            tempS1 = (tempS1<0.05) & (tempS2>0)
        tempS1 = tempS3 | tempS1
    else:
        tempS1 = (tempS1>=0.0)
    #Update True
    tempS = tempS & tempS1
tempL = tempS.loc[tempS.tolist()].index.tolist()
print(len(tempL), regulation.lower()+' modules with significance in', posL, 'but not in', negL)

#Select representatives
topX = np.min([30, len(tempL)])
topX_plot = np.min([10, len(tempL)])
tempDF = statDF.loc[:, statDF.columns.str.contains('Pval$')]
tempDF = pd.merge(statDF['ModuleName'], tempDF, left_index=True, right_index=True, how='left')
tempDF = tempDF.loc[tempL].sort_values(by=variable+'_AdjPval', ascending=True)
print('Top', topX, 'modules (sort by the main effect of '+variable+'):')
display(tempDF.iloc[:topX])
plotL = tempDF.index.tolist()[:topX_plot]

#Prepare DF for plot
tempDF = rmsDF.reset_index().melt(var_name='SampleID', value_name='RMS', id_vars=['ModuleID', 'Template'])
tempDF1 = sampleDF.reset_index()[['SampleID', 'Phenotype']]
tempDF = pd.merge(tempDF, tempDF1, on='SampleID', how='left')

#Prepare label and color
tempD0 = {'Ctrl':'Control', 'Aca':'Acarbose',
          '17aE2':'17'+r'$\alpha$'+'-Estradiol', 'Rapa':'Rapamycin'}
tempDF['Template'] = tempDF['Template'].map(tempD0)
tempDF['Group'] = tempDF['Phenotype'].map(tempD0)
tempD1 = {'Control':'tab:blue', 'Acarbose':'tab:red',
          '17'+r'$\alpha$'+'-Estradiol':'tab:green', 'Rapamycin':'tab:purple'}
tempD2 = {}
for label in tempD1.keys():
    if tempD1[label]=='tab:blue':
        tempD2[label] = plt.get_cmap('tab20')(1)
    elif tempD1[label]=='tab:orange':
        tempD2[label] = plt.get_cmap('tab20')(3)
    elif tempD1[label]=='tab:green':
        tempD2[label] = plt.get_cmap('tab20')(5)
    elif tempD1[label]=='tab:red':
        tempD2[label] = plt.get_cmap('tab20')(7)
    elif tempD1[label]=='tab:purple':
        tempD2[label] = plt.get_cmap('tab20')(9)
    elif tempD1[label]=='tab:olive':
        tempD2[label] = plt.get_cmap('tab20')(17)
    else:
        tempD2[label] = 'Error?'

#Visualize each representative
for rank_i in range(len(plotL)):
    print(' - Rank '+str(rank_i+1)+' (sort by the main effect of '+variable+'):')
    module = plotL[rank_i]
    #Check module summary
    tempDF1 = pd.DataFrame(moduleDF.loc[module]).T
    display(tempDF1)
    
    #Select RMS
    tempDF1 = tempDF.loc[tempDF['ModuleID']==module]
    
    #Check RMS summary
    tempDF2 = tempDF1.groupby(['Template', 'Group'])['RMS'].agg(['count', 'mean', 'std'])
    tempL1 = []
    tempL2 = []
    for row_n in tempDF2.index.tolist():
        count, mean, std = tempDF2.loc[row_n]
        tempL1.append(mean - 1.96*std/np.sqrt(count))
        tempL2.append(mean + 1.96*std/np.sqrt(count))
    tempDF2['0.025'] = tempL1
    tempDF2['0.975'] = tempL2
    ##Multiindex sort
    tempDF2 = tempDF2.reset_index()
    tempDF2['Template'] = pd.Categorical(tempDF2['Template'], categories=list(tempD2.keys()))
    tempDF2['Group'] = pd.Categorical(tempDF2['Group'], categories=list(tempD1.keys()))
    tempDF2 = tempDF2.sort_values(by=['Template', 'Group']).set_index(['Template', 'Group'])
    display(tempDF2)
    
    #Prepare significance labels
    ##Retrieve statistical significance
    tempS = pvalDF.loc[module, pvalDF.columns.str.contains('-vs-')]#Adjusted P-value
    tempS = tempS.loc[tempS.index.str.contains('-fixed')]
    tempS.name = 'AdjPval'
    ##Clean
    tempDF2 = tempS.index.to_series().str.split(pat='-fixed_', expand=True)
    tempDF2 = tempDF2.rename(columns={0:'Template', 1:'Comparison'})
    tempS1 = tempDF2['Template']
    tempDF2 = tempDF2['Comparison'].str.split(pat='-vs-', expand=True)
    tempDF2 = tempDF2.rename(columns={0:'Contrast', 1:'Baseline'})
    tempDF2 = pd.merge(tempS1, tempDF2, left_index=True, right_index=True, how='left')
    tempDF2 = pd.merge(tempDF2, tempS, left_index=True, right_index=True, how='left')
    tempDF2['Template'] = tempDF2['Template'].map(tempD0)
    tempDF2['Contrast'] = tempDF2['Contrast'].map(tempD0)
    tempDF2['Baseline'] = tempDF2['Baseline'].map(tempD0)
    ##Convert p-value to label
    tempL = []
    for row_i in range(len(tempDF2)):
        pval = tempDF2['AdjPval'].iloc[row_i]
        if pval<0.001:
            tempL.append('***')
        elif pval<0.01:
            tempL.append('**')
        elif pval<0.05:
            tempL.append('*')
        else:
            pval_text = Decimal(str(pval)).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
            tempL.append(r'$P$ = '+str(pval_text))
    tempDF2['SignifLabel'] = tempL
    display(tempDF2)
    
    #Visualization
    ymax = 1.0
    ymin = 0.0
    yinter = 0.2
    ymargin_t = 0.275
    ymargin_b = 0.05
    aline_ymin = 1.0
    aline_ymargin = 0.125
    sns.set(style='ticks', font='Arial', context='talk')
    fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(8.8, 5.6), sharex=True, sharey=True,
                             gridspec_kw={'width_ratios':[2.2, 2.2, 2.2, 2.2]})
    for ax_i, ax in enumerate(axes.flat):
        template = list(tempD2.keys())[ax_i]
        tempDF3 = tempDF1.loc[tempDF1['Template']==template]
        sns.pointplot(data=tempDF3, x='Group', y='RMS', order=list(tempD1.keys()), palette=tempD1,
                      markers='o', dodge=False, join=False, capsize=0.6, estimator=np.mean, ci=95, ax=ax)
        sns.stripplot(data=tempDF3, x='Group', y='RMS',
                      order=list(tempD1.keys()), palette=tempD1, dodge=False, jitter=0.15,
                      size=5, edgecolor='black', linewidth=1, **{'marker':'o', 'alpha':0.5}, ax=ax)
        #Add RCI line
        rci = tempDF3['RMS'].loc[tempDF3['Group']==template].mean()
        ax.axhline(y=rci, **{'linestyle':'--', 'color':tempD1[template], 'zorder':0})
        #Set axis
        sns.despine()
        ax.set(ylim=(ymin-ymargin_b, ymax+ymargin_t), yticks=np.arange(ymin, ymax + yinter/10, yinter))
        plt.setp(ax.get_xticklabels(), rotation=70, horizontalalignment='right',
                 verticalalignment='center', rotation_mode='anchor')
        if ax_i==0:
            plt.setp(ax, xlabel='', ylabel='Sample RMS')
        else:
            plt.setp(ax.get_yticklabels(), visible=False)
            plt.setp(ax, xlabel='', ylabel='')
        #Add significance labels
        tempDF3 = tempDF2.loc[tempDF2['Template']==template]
        for row_i in range(len(tempDF3)):
            #Baseline
            group_0 = tempDF3['Baseline'].iloc[row_i]
            index_0 = list(tempD1.keys()).index(group_0)
            xcoord_0 = index_0
            #Contrast
            group_1 = tempDF3['Contrast'].iloc[row_i]
            index_1 = list(tempD1.keys()).index(group_1)
            xcoord_1 = index_1
            #Standard point of marker
            xcoord = (xcoord_0+xcoord_1)/2
            ycoord = aline_ymin + aline_ymargin*row_i
            label = tempDF3['SignifLabel'].iloc[row_i]
            #Add annotation lines
            aline_offset = yinter/5
            aline_length = yinter/5 + aline_offset/2
            ax.plot([xcoord_0, xcoord_0, xcoord_1, xcoord_1],
                    [ycoord+aline_offset, ycoord+aline_length, ycoord+aline_length, ycoord+aline_offset],
                    lw=1.5, c='k')
            #Add annotation text
            if label in ['***', '**', '*']:
                text_offset = yinter/21
                ax.annotate(label, xy=(xcoord, ycoord+text_offset),
                            horizontalalignment='center', verticalalignment='bottom',
                            fontsize='medium', color='k')
            else:
                text_offset = yinter/3.5
                ax.annotate(label, xy=(xcoord, ycoord+text_offset),
                            horizontalalignment='center', verticalalignment='bottom',
                            fontsize='x-small', color='k')
        #Add annotation
        ax.set_title('Consensus:\n'+template, {'fontsize':'small'})
        xoff = 0.015
        yoff = 0.01
        rect = plt.Rectangle((xoff, 1), 1-xoff, 0.17,#Manual adjustment
                             transform=ax.transAxes, facecolor=tempD2[template],
                             clip_on=False, linewidth=0, zorder=0.5)
        ax.add_patch(rect)
    fig.tight_layout()
    #Set title
    modulename = moduleDF.loc[module, 'ModuleName']
    initial = modulename[0].capitalize()
    title = re.sub('^.', initial, modulename)+' ('+module+')'
    title = '\n'.join(wrap(title, 100))#Because the below wrap=True didn't work
    fig.suptitle(title, size='small',
                 verticalalignment='baseline', horizontalalignment='center', wrap=True, y=0.925)
    ##Save
    #fileDir = './ExportFigures/'
    #ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
    #fileName = 'fixed-RMSmean-pointplot-'+module.replace('GO:', 'GO')+'.pdf'
    #plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04, transparent=True)
    plt.show()
    print('')

#### 5-3-2. Top changed modules by Aca

In [None]:
#Prepare the target module set
target = 'Aca-vs-Ctrl'
tempL = pvalDF.loc[pvalDF[target]<0.05].index.tolist()
print(len(tempL), 'changed modules with significance in '+target)

#Select representatives
topX = np.min([30, len(tempL)])
topX_plot = np.min([3, len(tempL)])
tempDF = statDF.loc[:, statDF.columns.str.contains('Pval$')]
tempDF = pd.merge(statDF['ModuleName'], tempDF, left_index=True, right_index=True, how='left')
tempDF = tempDF.loc[tempL].sort_values(by=target+'_AdjPval', ascending=True)
print('Top', topX, 'modules (sort by significance in '+target+'):')
display(tempDF.iloc[:topX])
plotL = tempDF.index.tolist()[:topX_plot]

#Prepare DF for plot
tempDF = rmsDF.reset_index().melt(var_name='SampleID', value_name='RMS', id_vars=['ModuleID', 'Template'])
tempDF1 = sampleDF.reset_index()[['SampleID', 'Phenotype']]
tempDF = pd.merge(tempDF, tempDF1, on='SampleID', how='left')

#Prepare label and color
tempD0 = {'Ctrl':'Control', 'Aca':'Acarbose',
          '17aE2':'17'+r'$\alpha$'+'-Estradiol', 'Rapa':'Rapamycin'}
tempDF['Template'] = tempDF['Template'].map(tempD0)
tempDF['Group'] = tempDF['Phenotype'].map(tempD0)
tempD1 = {'Control':'tab:blue', 'Acarbose':'tab:red',
          '17'+r'$\alpha$'+'-Estradiol':'tab:green', 'Rapamycin':'tab:purple'}
tempD2 = {}
for label in tempD1.keys():
    if tempD1[label]=='tab:blue':
        tempD2[label] = plt.get_cmap('tab20')(1)
    elif tempD1[label]=='tab:orange':
        tempD2[label] = plt.get_cmap('tab20')(3)
    elif tempD1[label]=='tab:green':
        tempD2[label] = plt.get_cmap('tab20')(5)
    elif tempD1[label]=='tab:red':
        tempD2[label] = plt.get_cmap('tab20')(7)
    elif tempD1[label]=='tab:purple':
        tempD2[label] = plt.get_cmap('tab20')(9)
    elif tempD1[label]=='tab:olive':
        tempD2[label] = plt.get_cmap('tab20')(17)
    else:
        tempD2[label] = 'Error?'

#Visualize each representative
for rank_i in range(len(plotL)):
    print(' - Rank '+str(rank_i+1)+' (sort by significance in '+target+'):')
    module = plotL[rank_i]
    #Check module summary
    tempDF1 = pd.DataFrame(moduleDF.loc[module]).T
    display(tempDF1)
    
    #Select RMS
    tempDF1 = tempDF.loc[tempDF['ModuleID']==module]
    
    #Check RMS summary
    tempDF2 = tempDF1.groupby(['Template', 'Group'])['RMS'].agg(['count', 'mean', 'std'])
    tempL1 = []
    tempL2 = []
    for row_n in tempDF2.index.tolist():
        count, mean, std = tempDF2.loc[row_n]
        tempL1.append(mean - 1.96*std/np.sqrt(count))
        tempL2.append(mean + 1.96*std/np.sqrt(count))
    tempDF2['0.025'] = tempL1
    tempDF2['0.975'] = tempL2
    ##Multiindex sort
    tempDF2 = tempDF2.reset_index()
    tempDF2['Template'] = pd.Categorical(tempDF2['Template'], categories=list(tempD2.keys()))
    tempDF2['Group'] = pd.Categorical(tempDF2['Group'], categories=list(tempD1.keys()))
    tempDF2 = tempDF2.sort_values(by=['Template', 'Group']).set_index(['Template', 'Group'])
    display(tempDF2)
    
    #Prepare significance labels
    ##Retrieve statistical significance
    tempS = pvalDF.loc[module, pvalDF.columns.str.contains('-vs-')]#Adjusted P-value
    tempS = tempS.loc[tempS.index.str.contains('-fixed')]
    tempS.name = 'AdjPval'
    ##Clean
    tempDF2 = tempS.index.to_series().str.split(pat='-fixed_', expand=True)
    tempDF2 = tempDF2.rename(columns={0:'Template', 1:'Comparison'})
    tempS1 = tempDF2['Template']
    tempDF2 = tempDF2['Comparison'].str.split(pat='-vs-', expand=True)
    tempDF2 = tempDF2.rename(columns={0:'Contrast', 1:'Baseline'})
    tempDF2 = pd.merge(tempS1, tempDF2, left_index=True, right_index=True, how='left')
    tempDF2 = pd.merge(tempDF2, tempS, left_index=True, right_index=True, how='left')
    tempDF2['Template'] = tempDF2['Template'].map(tempD0)
    tempDF2['Contrast'] = tempDF2['Contrast'].map(tempD0)
    tempDF2['Baseline'] = tempDF2['Baseline'].map(tempD0)
    ##Convert p-value to label
    tempL = []
    for row_i in range(len(tempDF2)):
        pval = tempDF2['AdjPval'].iloc[row_i]
        if pval<0.001:
            tempL.append('***')
        elif pval<0.01:
            tempL.append('**')
        elif pval<0.05:
            tempL.append('*')
        else:
            pval_text = Decimal(str(pval)).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
            tempL.append(r'$P$ = '+str(pval_text))
    tempDF2['SignifLabel'] = tempL
    display(tempDF2)
    
    #Visualization
    ymax = 1.0
    ymin = 0.0
    yinter = 0.2
    ymargin_t = 0.275
    ymargin_b = 0.05
    aline_ymin = 1.0
    aline_ymargin = 0.125
    sns.set(style='ticks', font='Arial', context='talk')
    fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(8.8, 5.6), sharex=True, sharey=True,
                             gridspec_kw={'width_ratios':[2.2, 2.2, 2.2, 2.2]})
    for ax_i, ax in enumerate(axes.flat):
        template = list(tempD2.keys())[ax_i]
        tempDF3 = tempDF1.loc[tempDF1['Template']==template]
        sns.pointplot(data=tempDF3, x='Group', y='RMS', order=list(tempD1.keys()), palette=tempD1,
                      markers='o', dodge=False, join=False, capsize=0.6, estimator=np.mean, ci=95, ax=ax)
        sns.stripplot(data=tempDF3, x='Group', y='RMS',
                      order=list(tempD1.keys()), palette=tempD1, dodge=False, jitter=0.15,
                      size=5, edgecolor='black', linewidth=1, **{'marker':'o', 'alpha':0.5}, ax=ax)
        #Add RCI line
        rci = tempDF3['RMS'].loc[tempDF3['Group']==template].mean()
        ax.axhline(y=rci, **{'linestyle':'--', 'color':tempD1[template], 'zorder':0})
        #Set axis
        sns.despine()
        ax.set(ylim=(ymin-ymargin_b, ymax+ymargin_t), yticks=np.arange(ymin, ymax + yinter/10, yinter))
        plt.setp(ax.get_xticklabels(), rotation=70, horizontalalignment='right',
                 verticalalignment='center', rotation_mode='anchor')
        if ax_i==0:
            plt.setp(ax, xlabel='', ylabel='Sample RMS')
        else:
            plt.setp(ax.get_yticklabels(), visible=False)
            plt.setp(ax, xlabel='', ylabel='')
        #Add significance labels
        tempDF3 = tempDF2.loc[tempDF2['Template']==template]
        for row_i in range(len(tempDF3)):
            #Baseline
            group_0 = tempDF3['Baseline'].iloc[row_i]
            index_0 = list(tempD1.keys()).index(group_0)
            xcoord_0 = index_0
            #Contrast
            group_1 = tempDF3['Contrast'].iloc[row_i]
            index_1 = list(tempD1.keys()).index(group_1)
            xcoord_1 = index_1
            #Standard point of marker
            xcoord = (xcoord_0+xcoord_1)/2
            ycoord = aline_ymin + aline_ymargin*row_i
            label = tempDF3['SignifLabel'].iloc[row_i]
            #Add annotation lines
            aline_offset = yinter/5
            aline_length = yinter/5 + aline_offset/2
            ax.plot([xcoord_0, xcoord_0, xcoord_1, xcoord_1],
                    [ycoord+aline_offset, ycoord+aline_length, ycoord+aline_length, ycoord+aline_offset],
                    lw=1.5, c='k')
            #Add annotation text
            if label in ['***', '**', '*']:
                text_offset = yinter/21
                ax.annotate(label, xy=(xcoord, ycoord+text_offset),
                            horizontalalignment='center', verticalalignment='bottom',
                            fontsize='medium', color='k')
            else:
                text_offset = yinter/3.5
                ax.annotate(label, xy=(xcoord, ycoord+text_offset),
                            horizontalalignment='center', verticalalignment='bottom',
                            fontsize='x-small', color='k')
        #Add annotation
        ax.set_title('Consensus:\n'+template, {'fontsize':'small'})
        xoff = 0.015
        yoff = 0.01
        rect = plt.Rectangle((xoff, 1), 1-xoff, 0.17,#Manual adjustment
                             transform=ax.transAxes, facecolor=tempD2[template],
                             clip_on=False, linewidth=0, zorder=0.5)
        ax.add_patch(rect)
    fig.tight_layout()
    #Set title
    modulename = moduleDF.loc[module, 'ModuleName']
    initial = modulename[0].capitalize()
    title = re.sub('^.', initial, modulename)+' ('+module+')'
    title = '\n'.join(wrap(title, 100))#Because the below wrap=True didn't work
    fig.suptitle(title, size='small',
                 verticalalignment='baseline', horizontalalignment='center', wrap=True, y=0.925)
    ##Save
    #fileDir = './ExportFigures/'
    #ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
    #fileName = 'fixed-RMSmean-pointplot-'+module.replace('GO:', 'GO')+'.pdf'
    #plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04, transparent=True)
    plt.show()
    print('')

#### 5-3-3. Top changed modules by 17aE2

In [None]:
#Prepare the target module set
target = '17aE2-vs-Ctrl'
tempL = pvalDF.loc[pvalDF[target]<0.05].index.tolist()
print(len(tempL), 'changed modules with significance in '+target)

#Select representatives
topX = np.min([30, len(tempL)])
topX_plot = np.min([3, len(tempL)])
tempDF = statDF.loc[:, statDF.columns.str.contains('Pval$')]
tempDF = pd.merge(statDF['ModuleName'], tempDF, left_index=True, right_index=True, how='left')
tempDF = tempDF.loc[tempL].sort_values(by=target+'_AdjPval', ascending=True)
print('Top', topX, 'modules (sort by significance in '+target+'):')
display(tempDF.iloc[:topX])
plotL = tempDF.index.tolist()[:topX_plot]

#Prepare DF for plot
tempDF = rmsDF.reset_index().melt(var_name='SampleID', value_name='RMS', id_vars=['ModuleID', 'Template'])
tempDF1 = sampleDF.reset_index()[['SampleID', 'Phenotype']]
tempDF = pd.merge(tempDF, tempDF1, on='SampleID', how='left')

#Prepare label and color
tempD0 = {'Ctrl':'Control', 'Aca':'Acarbose',
          '17aE2':'17'+r'$\alpha$'+'-Estradiol', 'Rapa':'Rapamycin'}
tempDF['Template'] = tempDF['Template'].map(tempD0)
tempDF['Group'] = tempDF['Phenotype'].map(tempD0)
tempD1 = {'Control':'tab:blue', 'Acarbose':'tab:red',
          '17'+r'$\alpha$'+'-Estradiol':'tab:green', 'Rapamycin':'tab:purple'}
tempD2 = {}
for label in tempD1.keys():
    if tempD1[label]=='tab:blue':
        tempD2[label] = plt.get_cmap('tab20')(1)
    elif tempD1[label]=='tab:orange':
        tempD2[label] = plt.get_cmap('tab20')(3)
    elif tempD1[label]=='tab:green':
        tempD2[label] = plt.get_cmap('tab20')(5)
    elif tempD1[label]=='tab:red':
        tempD2[label] = plt.get_cmap('tab20')(7)
    elif tempD1[label]=='tab:purple':
        tempD2[label] = plt.get_cmap('tab20')(9)
    elif tempD1[label]=='tab:olive':
        tempD2[label] = plt.get_cmap('tab20')(17)
    else:
        tempD2[label] = 'Error?'

#Visualize each representative
for rank_i in range(len(plotL)):
    print(' - Rank '+str(rank_i+1)+' (sort by significance in '+target+'):')
    module = plotL[rank_i]
    #Check module summary
    tempDF1 = pd.DataFrame(moduleDF.loc[module]).T
    display(tempDF1)
    
    #Select RMS
    tempDF1 = tempDF.loc[tempDF['ModuleID']==module]
    
    #Check RMS summary
    tempDF2 = tempDF1.groupby(['Template', 'Group'])['RMS'].agg(['count', 'mean', 'std'])
    tempL1 = []
    tempL2 = []
    for row_n in tempDF2.index.tolist():
        count, mean, std = tempDF2.loc[row_n]
        tempL1.append(mean - 1.96*std/np.sqrt(count))
        tempL2.append(mean + 1.96*std/np.sqrt(count))
    tempDF2['0.025'] = tempL1
    tempDF2['0.975'] = tempL2
    ##Multiindex sort
    tempDF2 = tempDF2.reset_index()
    tempDF2['Template'] = pd.Categorical(tempDF2['Template'], categories=list(tempD2.keys()))
    tempDF2['Group'] = pd.Categorical(tempDF2['Group'], categories=list(tempD1.keys()))
    tempDF2 = tempDF2.sort_values(by=['Template', 'Group']).set_index(['Template', 'Group'])
    display(tempDF2)
    
    #Prepare significance labels
    ##Retrieve statistical significance
    tempS = pvalDF.loc[module, pvalDF.columns.str.contains('-vs-')]#Adjusted P-value
    tempS = tempS.loc[tempS.index.str.contains('-fixed')]
    tempS.name = 'AdjPval'
    ##Clean
    tempDF2 = tempS.index.to_series().str.split(pat='-fixed_', expand=True)
    tempDF2 = tempDF2.rename(columns={0:'Template', 1:'Comparison'})
    tempS1 = tempDF2['Template']
    tempDF2 = tempDF2['Comparison'].str.split(pat='-vs-', expand=True)
    tempDF2 = tempDF2.rename(columns={0:'Contrast', 1:'Baseline'})
    tempDF2 = pd.merge(tempS1, tempDF2, left_index=True, right_index=True, how='left')
    tempDF2 = pd.merge(tempDF2, tempS, left_index=True, right_index=True, how='left')
    tempDF2['Template'] = tempDF2['Template'].map(tempD0)
    tempDF2['Contrast'] = tempDF2['Contrast'].map(tempD0)
    tempDF2['Baseline'] = tempDF2['Baseline'].map(tempD0)
    ##Convert p-value to label
    tempL = []
    for row_i in range(len(tempDF2)):
        pval = tempDF2['AdjPval'].iloc[row_i]
        if pval<0.001:
            tempL.append('***')
        elif pval<0.01:
            tempL.append('**')
        elif pval<0.05:
            tempL.append('*')
        else:
            pval_text = Decimal(str(pval)).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
            tempL.append(r'$P$ = '+str(pval_text))
    tempDF2['SignifLabel'] = tempL
    display(tempDF2)
    
    #Visualization
    ymax = 1.0
    ymin = 0.0
    yinter = 0.2
    ymargin_t = 0.275
    ymargin_b = 0.05
    aline_ymin = 1.0
    aline_ymargin = 0.125
    sns.set(style='ticks', font='Arial', context='talk')
    fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(8.8, 5.6), sharex=True, sharey=True,
                             gridspec_kw={'width_ratios':[2.2, 2.2, 2.2, 2.2]})
    for ax_i, ax in enumerate(axes.flat):
        template = list(tempD2.keys())[ax_i]
        tempDF3 = tempDF1.loc[tempDF1['Template']==template]
        sns.pointplot(data=tempDF3, x='Group', y='RMS', order=list(tempD1.keys()), palette=tempD1,
                      markers='o', dodge=False, join=False, capsize=0.6, estimator=np.mean, ci=95, ax=ax)
        sns.stripplot(data=tempDF3, x='Group', y='RMS',
                      order=list(tempD1.keys()), palette=tempD1, dodge=False, jitter=0.15,
                      size=5, edgecolor='black', linewidth=1, **{'marker':'o', 'alpha':0.5}, ax=ax)
        #Add RCI line
        rci = tempDF3['RMS'].loc[tempDF3['Group']==template].mean()
        ax.axhline(y=rci, **{'linestyle':'--', 'color':tempD1[template], 'zorder':0})
        #Set axis
        sns.despine()
        ax.set(ylim=(ymin-ymargin_b, ymax+ymargin_t), yticks=np.arange(ymin, ymax + yinter/10, yinter))
        plt.setp(ax.get_xticklabels(), rotation=70, horizontalalignment='right',
                 verticalalignment='center', rotation_mode='anchor')
        if ax_i==0:
            plt.setp(ax, xlabel='', ylabel='Sample RMS')
        else:
            plt.setp(ax.get_yticklabels(), visible=False)
            plt.setp(ax, xlabel='', ylabel='')
        #Add significance labels
        tempDF3 = tempDF2.loc[tempDF2['Template']==template]
        for row_i in range(len(tempDF3)):
            #Baseline
            group_0 = tempDF3['Baseline'].iloc[row_i]
            index_0 = list(tempD1.keys()).index(group_0)
            xcoord_0 = index_0
            #Contrast
            group_1 = tempDF3['Contrast'].iloc[row_i]
            index_1 = list(tempD1.keys()).index(group_1)
            xcoord_1 = index_1
            #Standard point of marker
            xcoord = (xcoord_0+xcoord_1)/2
            ycoord = aline_ymin + aline_ymargin*row_i
            label = tempDF3['SignifLabel'].iloc[row_i]
            #Add annotation lines
            aline_offset = yinter/5
            aline_length = yinter/5 + aline_offset/2
            ax.plot([xcoord_0, xcoord_0, xcoord_1, xcoord_1],
                    [ycoord+aline_offset, ycoord+aline_length, ycoord+aline_length, ycoord+aline_offset],
                    lw=1.5, c='k')
            #Add annotation text
            if label in ['***', '**', '*']:
                text_offset = yinter/21
                ax.annotate(label, xy=(xcoord, ycoord+text_offset),
                            horizontalalignment='center', verticalalignment='bottom',
                            fontsize='medium', color='k')
            else:
                text_offset = yinter/3.5
                ax.annotate(label, xy=(xcoord, ycoord+text_offset),
                            horizontalalignment='center', verticalalignment='bottom',
                            fontsize='x-small', color='k')
        #Add annotation
        ax.set_title('Consensus:\n'+template, {'fontsize':'small'})
        xoff = 0.015
        yoff = 0.01
        rect = plt.Rectangle((xoff, 1), 1-xoff, 0.17,#Manual adjustment
                             transform=ax.transAxes, facecolor=tempD2[template],
                             clip_on=False, linewidth=0, zorder=0.5)
        ax.add_patch(rect)
    fig.tight_layout()
    #Set title
    modulename = moduleDF.loc[module, 'ModuleName']
    initial = modulename[0].capitalize()
    title = re.sub('^.', initial, modulename)+' ('+module+')'
    title = '\n'.join(wrap(title, 100))#Because the below wrap=True didn't work
    fig.suptitle(title, size='small',
                 verticalalignment='baseline', horizontalalignment='center', wrap=True, y=0.925)
    ##Save
    #fileDir = './ExportFigures/'
    #ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
    #fileName = 'fixed-RMSmean-pointplot-'+module.replace('GO:', 'GO')+'.pdf'
    #plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04, transparent=True)
    plt.show()
    print('')

#### 5-3-4. Top changed modules by Rapa

In [None]:
#Prepare the target module set
target = 'Rapa-vs-Ctrl'
tempL = pvalDF.loc[pvalDF[target]<0.05].index.tolist()
print(len(tempL), 'changed modules with significance in '+target)

#Select representatives
topX = np.min([30, len(tempL)])
topX_plot = np.min([3, len(tempL)])
tempDF = statDF.loc[:, statDF.columns.str.contains('Pval$')]
tempDF = pd.merge(statDF['ModuleName'], tempDF, left_index=True, right_index=True, how='left')
tempDF = tempDF.loc[tempL].sort_values(by=target+'_AdjPval', ascending=True)
print('Top', topX, 'modules (sort by significance in '+target+'):')
display(tempDF.iloc[:topX])
plotL = tempDF.index.tolist()[:topX_plot]

#Prepare DF for plot
tempDF = rmsDF.reset_index().melt(var_name='SampleID', value_name='RMS', id_vars=['ModuleID', 'Template'])
tempDF1 = sampleDF.reset_index()[['SampleID', 'Phenotype']]
tempDF = pd.merge(tempDF, tempDF1, on='SampleID', how='left')

#Prepare label and color
tempD0 = {'Ctrl':'Control', 'Aca':'Acarbose',
          '17aE2':'17'+r'$\alpha$'+'-Estradiol', 'Rapa':'Rapamycin'}
tempDF['Template'] = tempDF['Template'].map(tempD0)
tempDF['Group'] = tempDF['Phenotype'].map(tempD0)
tempD1 = {'Control':'tab:blue', 'Acarbose':'tab:red',
          '17'+r'$\alpha$'+'-Estradiol':'tab:green', 'Rapamycin':'tab:purple'}
tempD2 = {}
for label in tempD1.keys():
    if tempD1[label]=='tab:blue':
        tempD2[label] = plt.get_cmap('tab20')(1)
    elif tempD1[label]=='tab:orange':
        tempD2[label] = plt.get_cmap('tab20')(3)
    elif tempD1[label]=='tab:green':
        tempD2[label] = plt.get_cmap('tab20')(5)
    elif tempD1[label]=='tab:red':
        tempD2[label] = plt.get_cmap('tab20')(7)
    elif tempD1[label]=='tab:purple':
        tempD2[label] = plt.get_cmap('tab20')(9)
    elif tempD1[label]=='tab:olive':
        tempD2[label] = plt.get_cmap('tab20')(17)
    else:
        tempD2[label] = 'Error?'

#Visualize each representative
for rank_i in range(len(plotL)):
    print(' - Rank '+str(rank_i+1)+' (sort by significance in '+target+'):')
    module = plotL[rank_i]
    #Check module summary
    tempDF1 = pd.DataFrame(moduleDF.loc[module]).T
    display(tempDF1)
    
    #Select RMS
    tempDF1 = tempDF.loc[tempDF['ModuleID']==module]
    
    #Check RMS summary
    tempDF2 = tempDF1.groupby(['Template', 'Group'])['RMS'].agg(['count', 'mean', 'std'])
    tempL1 = []
    tempL2 = []
    for row_n in tempDF2.index.tolist():
        count, mean, std = tempDF2.loc[row_n]
        tempL1.append(mean - 1.96*std/np.sqrt(count))
        tempL2.append(mean + 1.96*std/np.sqrt(count))
    tempDF2['0.025'] = tempL1
    tempDF2['0.975'] = tempL2
    ##Multiindex sort
    tempDF2 = tempDF2.reset_index()
    tempDF2['Template'] = pd.Categorical(tempDF2['Template'], categories=list(tempD2.keys()))
    tempDF2['Group'] = pd.Categorical(tempDF2['Group'], categories=list(tempD1.keys()))
    tempDF2 = tempDF2.sort_values(by=['Template', 'Group']).set_index(['Template', 'Group'])
    display(tempDF2)
    
    #Prepare significance labels
    ##Retrieve statistical significance
    tempS = pvalDF.loc[module, pvalDF.columns.str.contains('-vs-')]#Adjusted P-value
    tempS = tempS.loc[tempS.index.str.contains('-fixed')]
    tempS.name = 'AdjPval'
    ##Clean
    tempDF2 = tempS.index.to_series().str.split(pat='-fixed_', expand=True)
    tempDF2 = tempDF2.rename(columns={0:'Template', 1:'Comparison'})
    tempS1 = tempDF2['Template']
    tempDF2 = tempDF2['Comparison'].str.split(pat='-vs-', expand=True)
    tempDF2 = tempDF2.rename(columns={0:'Contrast', 1:'Baseline'})
    tempDF2 = pd.merge(tempS1, tempDF2, left_index=True, right_index=True, how='left')
    tempDF2 = pd.merge(tempDF2, tempS, left_index=True, right_index=True, how='left')
    tempDF2['Template'] = tempDF2['Template'].map(tempD0)
    tempDF2['Contrast'] = tempDF2['Contrast'].map(tempD0)
    tempDF2['Baseline'] = tempDF2['Baseline'].map(tempD0)
    ##Convert p-value to label
    tempL = []
    for row_i in range(len(tempDF2)):
        pval = tempDF2['AdjPval'].iloc[row_i]
        if pval<0.001:
            tempL.append('***')
        elif pval<0.01:
            tempL.append('**')
        elif pval<0.05:
            tempL.append('*')
        else:
            pval_text = Decimal(str(pval)).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
            tempL.append(r'$P$ = '+str(pval_text))
    tempDF2['SignifLabel'] = tempL
    display(tempDF2)
    
    #Visualization
    ymax = 1.0
    ymin = 0.0
    yinter = 0.2
    ymargin_t = 0.275
    ymargin_b = 0.05
    aline_ymin = 1.0
    aline_ymargin = 0.125
    sns.set(style='ticks', font='Arial', context='talk')
    fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(8.8, 5.6), sharex=True, sharey=True,
                             gridspec_kw={'width_ratios':[2.2, 2.2, 2.2, 2.2]})
    for ax_i, ax in enumerate(axes.flat):
        template = list(tempD2.keys())[ax_i]
        tempDF3 = tempDF1.loc[tempDF1['Template']==template]
        sns.pointplot(data=tempDF3, x='Group', y='RMS', order=list(tempD1.keys()), palette=tempD1,
                      markers='o', dodge=False, join=False, capsize=0.6, estimator=np.mean, ci=95, ax=ax)
        sns.stripplot(data=tempDF3, x='Group', y='RMS',
                      order=list(tempD1.keys()), palette=tempD1, dodge=False, jitter=0.15,
                      size=5, edgecolor='black', linewidth=1, **{'marker':'o', 'alpha':0.5}, ax=ax)
        #Add RCI line
        rci = tempDF3['RMS'].loc[tempDF3['Group']==template].mean()
        ax.axhline(y=rci, **{'linestyle':'--', 'color':tempD1[template], 'zorder':0})
        #Set axis
        sns.despine()
        ax.set(ylim=(ymin-ymargin_b, ymax+ymargin_t), yticks=np.arange(ymin, ymax + yinter/10, yinter))
        plt.setp(ax.get_xticklabels(), rotation=70, horizontalalignment='right',
                 verticalalignment='center', rotation_mode='anchor')
        if ax_i==0:
            plt.setp(ax, xlabel='', ylabel='Sample RMS')
        else:
            plt.setp(ax.get_yticklabels(), visible=False)
            plt.setp(ax, xlabel='', ylabel='')
        #Add significance labels
        tempDF3 = tempDF2.loc[tempDF2['Template']==template]
        for row_i in range(len(tempDF3)):
            #Baseline
            group_0 = tempDF3['Baseline'].iloc[row_i]
            index_0 = list(tempD1.keys()).index(group_0)
            xcoord_0 = index_0
            #Contrast
            group_1 = tempDF3['Contrast'].iloc[row_i]
            index_1 = list(tempD1.keys()).index(group_1)
            xcoord_1 = index_1
            #Standard point of marker
            xcoord = (xcoord_0+xcoord_1)/2
            ycoord = aline_ymin + aline_ymargin*row_i
            label = tempDF3['SignifLabel'].iloc[row_i]
            #Add annotation lines
            aline_offset = yinter/5
            aline_length = yinter/5 + aline_offset/2
            ax.plot([xcoord_0, xcoord_0, xcoord_1, xcoord_1],
                    [ycoord+aline_offset, ycoord+aline_length, ycoord+aline_length, ycoord+aline_offset],
                    lw=1.5, c='k')
            #Add annotation text
            if label in ['***', '**', '*']:
                text_offset = yinter/21
                ax.annotate(label, xy=(xcoord, ycoord+text_offset),
                            horizontalalignment='center', verticalalignment='bottom',
                            fontsize='medium', color='k')
            else:
                text_offset = yinter/3.5
                ax.annotate(label, xy=(xcoord, ycoord+text_offset),
                            horizontalalignment='center', verticalalignment='bottom',
                            fontsize='x-small', color='k')
        #Add annotation
        ax.set_title('Consensus:\n'+template, {'fontsize':'small'})
        xoff = 0.015
        yoff = 0.01
        rect = plt.Rectangle((xoff, 1), 1-xoff, 0.17,#Manual adjustment
                             transform=ax.transAxes, facecolor=tempD2[template],
                             clip_on=False, linewidth=0, zorder=0.5)
        ax.add_patch(rect)
    fig.tight_layout()
    #Set title
    modulename = moduleDF.loc[module, 'ModuleName']
    initial = modulename[0].capitalize()
    title = re.sub('^.', initial, modulename)+' ('+module+')'
    title = '\n'.join(wrap(title, 100))#Because the below wrap=True didn't work
    fig.suptitle(title, size='small',
                 verticalalignment='baseline', horizontalalignment='center', wrap=True, y=0.925)
    ##Save
    #fileDir = './ExportFigures/'
    #ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
    #fileName = 'fixed-RMSmean-pointplot-'+module.replace('GO:', 'GO')+'.pdf'
    #plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04, transparent=True)
    plt.show()
    print('')

#### 5-3-5. Top RCI modules in Ctrl

In [None]:
target = 'Ctrl'

#Select representatives
topX = 30
topX_plot = 3
tempDF = statDF.loc[:, statDF.columns.str.contains('Pval$')]
tempDF = pd.merge(statDF['ModuleName'], tempDF, left_index=True, right_index=True, how='left')
tempDF = pd.merge(tempDF, rciDF_kk, left_index=True, right_index=True, how='left')
tempDF = tempDF.sort_values(by=target, ascending=False)
print('Top', topX, 'modules (sort by RCI of '+target+'):')
display(tempDF.iloc[:topX])
plotL = tempDF.index.tolist()[:topX_plot]

#Prepare DF for plot
tempDF = rmsDF.reset_index().melt(var_name='SampleID', value_name='RMS', id_vars=['ModuleID', 'Template'])
tempDF1 = sampleDF.reset_index()[['SampleID', 'Phenotype']]
tempDF = pd.merge(tempDF, tempDF1, on='SampleID', how='left')

#Prepare label and color
tempD0 = {'Ctrl':'Control', 'Aca':'Acarbose',
          '17aE2':'17'+r'$\alpha$'+'-Estradiol', 'Rapa':'Rapamycin'}
tempDF['Template'] = tempDF['Template'].map(tempD0)
tempDF['Group'] = tempDF['Phenotype'].map(tempD0)
tempD1 = {'Control':'tab:blue', 'Acarbose':'tab:red',
          '17'+r'$\alpha$'+'-Estradiol':'tab:green', 'Rapamycin':'tab:purple'}
tempD2 = {}
for label in tempD1.keys():
    if tempD1[label]=='tab:blue':
        tempD2[label] = plt.get_cmap('tab20')(1)
    elif tempD1[label]=='tab:orange':
        tempD2[label] = plt.get_cmap('tab20')(3)
    elif tempD1[label]=='tab:green':
        tempD2[label] = plt.get_cmap('tab20')(5)
    elif tempD1[label]=='tab:red':
        tempD2[label] = plt.get_cmap('tab20')(7)
    elif tempD1[label]=='tab:purple':
        tempD2[label] = plt.get_cmap('tab20')(9)
    elif tempD1[label]=='tab:olive':
        tempD2[label] = plt.get_cmap('tab20')(17)
    else:
        tempD2[label] = 'Error?'

#Re-prepare p-values because these modules can be filtered out in pvalDF
tempDF4 = statDF.loc[:, statDF.columns.str.contains('-vs-.*_AdjPval$')]
tempDF4.columns = tempDF4.columns.str.replace('_AdjPval$', '')
for template in ['Aca', '17aE2', 'Rapa']:
    #Reproduce the fixed consensus version of summary table
    fileDir = './ExportData/'
    ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
    fileName = 'inter-group-comparison_'+template+'-fixed-RMSmean.tsv'
    tempDF1 = pd.read_csv(fileDir+ipynbName+fileName, sep='\t')
    tempDF1 = tempDF1.set_index('ModuleID')
    #Take adjusted P-value
    tempDF1 = tempDF1.loc[:, tempDF1.columns.str.contains('-vs-.*_AdjPval$')]
    tempDF1.columns = tempDF1.columns.str.replace('_AdjPval$', '')
    tempDF1.columns = template+'-fixed_'+tempDF1.columns
    #Update
    tempDF4 = pd.merge(tempDF4, tempDF1, left_index=True, right_index=True, how='left')

#Visualize each representative
for rank_i in range(len(plotL)):
    print(' - Rank '+str(rank_i+1)+' (sort by RCI of '+target+'):')
    module = plotL[rank_i]
    #Check module summary
    tempDF1 = pd.DataFrame(moduleDF.loc[module]).T
    display(tempDF1)
    
    #Select RMS
    tempDF1 = tempDF.loc[tempDF['ModuleID']==module]
    
    #Check RMS summary
    tempDF2 = tempDF1.groupby(['Template', 'Group'])['RMS'].agg(['count', 'mean', 'std'])
    tempL1 = []
    tempL2 = []
    for row_n in tempDF2.index.tolist():
        count, mean, std = tempDF2.loc[row_n]
        tempL1.append(mean - 1.96*std/np.sqrt(count))
        tempL2.append(mean + 1.96*std/np.sqrt(count))
    tempDF2['0.025'] = tempL1
    tempDF2['0.975'] = tempL2
    ##Multiindex sort
    tempDF2 = tempDF2.reset_index()
    tempDF2['Template'] = pd.Categorical(tempDF2['Template'], categories=list(tempD2.keys()))
    tempDF2['Group'] = pd.Categorical(tempDF2['Group'], categories=list(tempD1.keys()))
    tempDF2 = tempDF2.sort_values(by=['Template', 'Group']).set_index(['Template', 'Group'])
    display(tempDF2)
    
    #Prepare significance labels
    ##Re-prepare p-values because these modules can be filtered out in pvalDF
    ##-> Moved to outside of for-loop
    ##Retrieve statistical significance
    tempS = tempDF4.loc[module]
    tempS = tempS.loc[tempS.index.str.contains('-fixed')]
    tempS.name = 'AdjPval'
    ##Clean
    tempDF2 = tempS.index.to_series().str.split(pat='-fixed_', expand=True)
    tempDF2 = tempDF2.rename(columns={0:'Template', 1:'Comparison'})
    tempS1 = tempDF2['Template']
    tempDF2 = tempDF2['Comparison'].str.split(pat='-vs-', expand=True)
    tempDF2 = tempDF2.rename(columns={0:'Contrast', 1:'Baseline'})
    tempDF2 = pd.merge(tempS1, tempDF2, left_index=True, right_index=True, how='left')
    tempDF2 = pd.merge(tempDF2, tempS, left_index=True, right_index=True, how='left')
    tempDF2['Template'] = tempDF2['Template'].map(tempD0)
    tempDF2['Contrast'] = tempDF2['Contrast'].map(tempD0)
    tempDF2['Baseline'] = tempDF2['Baseline'].map(tempD0)
    ##Convert p-value to label
    tempL = []
    for row_i in range(len(tempDF2)):
        pval = tempDF2['AdjPval'].iloc[row_i]
        if pval<0.001:
            tempL.append('***')
        elif pval<0.01:
            tempL.append('**')
        elif pval<0.05:
            tempL.append('*')
        else:
            pval_text = Decimal(str(pval)).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
            tempL.append(r'$P$ = '+str(pval_text))
    tempDF2['SignifLabel'] = tempL
    display(tempDF2)
    
    #Visualization
    ymax = 1.0
    ymin = 0.0
    yinter = 0.2
    ymargin_t = 0.275
    ymargin_b = 0.05
    aline_ymin = 1.0
    aline_ymargin = 0.125
    sns.set(style='ticks', font='Arial', context='talk')
    fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(8.8, 5.6), sharex=True, sharey=True,
                             gridspec_kw={'width_ratios':[2.2, 2.2, 2.2, 2.2]})
    for ax_i, ax in enumerate(axes.flat):
        template = list(tempD2.keys())[ax_i]
        tempDF3 = tempDF1.loc[tempDF1['Template']==template]
        sns.pointplot(data=tempDF3, x='Group', y='RMS', order=list(tempD1.keys()), palette=tempD1,
                      markers='o', dodge=False, join=False, capsize=0.6, estimator=np.mean, ci=95, ax=ax)
        sns.stripplot(data=tempDF3, x='Group', y='RMS',
                      order=list(tempD1.keys()), palette=tempD1, dodge=False, jitter=0.15,
                      size=5, edgecolor='black', linewidth=1, **{'marker':'o', 'alpha':0.5}, ax=ax)
        #Add RCI line
        rci = tempDF3['RMS'].loc[tempDF3['Group']==template].mean()
        ax.axhline(y=rci, **{'linestyle':'--', 'color':tempD1[template], 'zorder':0})
        #Set axis
        sns.despine()
        ax.set(ylim=(ymin-ymargin_b, ymax+ymargin_t), yticks=np.arange(ymin, ymax + yinter/10, yinter))
        plt.setp(ax.get_xticklabels(), rotation=70, horizontalalignment='right',
                 verticalalignment='center', rotation_mode='anchor')
        if ax_i==0:
            plt.setp(ax, xlabel='', ylabel='Sample RMS')
        else:
            plt.setp(ax.get_yticklabels(), visible=False)
            plt.setp(ax, xlabel='', ylabel='')
        #Add significance labels
        tempDF3 = tempDF2.loc[tempDF2['Template']==template]
        for row_i in range(len(tempDF3)):
            #Baseline
            group_0 = tempDF3['Baseline'].iloc[row_i]
            index_0 = list(tempD1.keys()).index(group_0)
            xcoord_0 = index_0
            #Contrast
            group_1 = tempDF3['Contrast'].iloc[row_i]
            index_1 = list(tempD1.keys()).index(group_1)
            xcoord_1 = index_1
            #Standard point of marker
            xcoord = (xcoord_0+xcoord_1)/2
            ycoord = aline_ymin + aline_ymargin*row_i
            label = tempDF3['SignifLabel'].iloc[row_i]
            #Add annotation lines
            aline_offset = yinter/5
            aline_length = yinter/5 + aline_offset/2
            ax.plot([xcoord_0, xcoord_0, xcoord_1, xcoord_1],
                    [ycoord+aline_offset, ycoord+aline_length, ycoord+aline_length, ycoord+aline_offset],
                    lw=1.5, c='k')
            #Add annotation text
            if label in ['***', '**', '*']:
                text_offset = yinter/21
                ax.annotate(label, xy=(xcoord, ycoord+text_offset),
                            horizontalalignment='center', verticalalignment='bottom',
                            fontsize='medium', color='k')
            else:
                text_offset = yinter/3.5
                ax.annotate(label, xy=(xcoord, ycoord+text_offset),
                            horizontalalignment='center', verticalalignment='bottom',
                            fontsize='x-small', color='k')
        #Add annotation
        ax.set_title('Consensus:\n'+template, {'fontsize':'small'})
        xoff = 0.015
        yoff = 0.01
        rect = plt.Rectangle((xoff, 1), 1-xoff, 0.17,#Manual adjustment
                             transform=ax.transAxes, facecolor=tempD2[template],
                             clip_on=False, linewidth=0, zorder=0.5)
        ax.add_patch(rect)
    fig.tight_layout()
    #Set title
    modulename = moduleDF.loc[module, 'ModuleName']
    initial = modulename[0].capitalize()
    title = re.sub('^.', initial, modulename)+' ('+module+')'
    title = '\n'.join(wrap(title, 100))#Because the below wrap=True didn't work
    fig.suptitle(title, size='small',
                 verticalalignment='baseline', horizontalalignment='center', wrap=True, y=0.925)
    ##Save
    #fileDir = './ExportFigures/'
    #ipynbName = '230214_LC-M001-proteomics-DIRAC-ver7-2_DIRAC-GOBP_'
    #fileName = 'fixed-RMSmean-pointplot-'+module.replace('GO:', 'GO')+'.pdf'
    #plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04, transparent=True)
    plt.show()
    print('')

# — End of notebook —