In [None]:
import glob
import os
import itertools
import time
import warnings

import pandas as pd
import pandarallel
import numpy as np
import tqdm

import networkx as nx
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

import statsmodels.stats.multitest

warnings.filterwarnings("ignore")
np.seterr(all="ignore")

pandarallel.pandarallel.initialize(nb_workers=14)

In [None]:
data = pd.read_csv(r'data./novelty_result.csv')
# data = data.loc[data.loc[:, 'FDR'] == True, :].copy()
data.loc[:, 'gene_1'] = data.apply(lambda x: x['gene_1'].replace('B_', ''), axis=1, result_type='expand')
data.loc[:, 'gene_2'] = data.apply(lambda x: x['gene_2'].replace('B_', ''), axis=1, result_type='expand')

clinicals = ['PRT16_U', 'GLU16_U', 'BLOOD16_U','HBA1C', 'GLU0',
             'BUN', 'ALBUMIN', 'CREATINE', 'AST', 'T_BIL', 'ALT',
             'TCHL', 'R_GTP', 'HDL', 'LDL', 'TG',
             'WBC_B', 'RBC_B', 'HB', 'HCT', 'PLAT']

cohorts_1 = ['NORMAL',
             'HTN', 'DM', 'LIP', 'THY', 'OBESITY',
             'BRCA', 'HCCCA', 'GCA', 'LCA', 'GALLCA', 'PROCA', 'PACA', 'THYCA', 'COLCA', 'UTCA']
cohorts_2 = ['HTN', 'DM', 'LIP', 'THY', 'OBESITY']
cohorts_3 = ['HTN_comp', 'DM_comp', 'LIP_comp', 'THY_comp', 'OBESITY_comp']
cohorts_4 = ['BRCA', 'HCCCA', 'GCA', 'LCA', 'GALLCA', 'PROCA', 'PACA', 'THYCA', 'COLCA', 'UTCA']

#### Heatmap drawing

Cohort heatmap

In [None]:
# for cohort in set(data.cohort):
for cohort in ['HTN', 'DM', 'LIP', 'THY', 'OBESITY']:
    fig, ax = plt.subplots(figsize=(6, 5), facecolor='w')
    tmp = data.loc[(data.loc[:, 'cohort'] == cohort) &
                   (data.loc[:, 'is_weighted'] == True) &
                   (data.loc[:, 'with_complication_only'] == True), data.columns[3:]].copy()
    tmp = tmp.pivot_table(index='gene_1', columns='gene_2', values='EIS')
    tmp.index.name = ''
    tmp.columns.name = ''
    tmp = tmp.fillna(0)
    tmp.loc[:, [x for x in clinicals if x not in tmp.columns]] = 0
    tmp = tmp.transpose()
    tmp.loc[:, [x for x in clinicals if x not in tmp.columns]] = 0
    tmp.transpose()
    tmp = tmp.loc[clinicals, clinicals].copy()
    tmp = tmp.abs()

    mask = np.triu(np.ones(tmp.shape, dtype=bool))
    sns.heatmap(tmp, mask=mask, ax=ax, square=True, cmap='crest', linewidth=0.5)
    ax.set_title(f"{cohort} only with complication")

Corhorts Correlation 

In [None]:
# network similarity calculation
sim_matrix = pd.DataFrame(index=cohorts_1 + cohorts_3, columns=cohorts_1 + cohorts_3)

for cohort_1 in sim_matrix.index:
    for cohort_2 in sim_matrix.columns:
        # generating adjacency matrices
        if '_comp' in cohort_1:
            tmp1 = data.loc[(data.loc[:, 'cohort'] == cohort_1.replace('_comp', '')) &
                            (data.loc[:, 'is_weighted'] == True) &
                            (data.loc[:, 'with_complication_only'] == True), data.columns[3:]].copy()
        else:
            tmp1 = data.loc[(data.loc[:, 'cohort'] == cohort_1) &
                            (data.loc[:, 'is_weighted'] == True) &
                            (data.loc[:, 'with_complication_only'] == False), data.columns[3:]].copy()
        if '_comp' in cohort_2:
            tmp2 = data.loc[(data.loc[:, 'cohort'] == cohort_2.replace('_comp', '')) &
                            (data.loc[:, 'is_weighted'] == True) &
                            (data.loc[:, 'with_complication_only'] == True), data.columns[3:]].copy()
        else:
            tmp2 = data.loc[(data.loc[:, 'cohort'] == cohort_2) &
                            (data.loc[:, 'is_weighted'] == True) &
                            (data.loc[:, 'with_complication_only'] == False), data.columns[3:]].copy()
        
        tmp1 = tmp1.pivot_table(index='gene_1', columns='gene_2', values='EIS')
        tmp1.index.name = ''
        tmp1.columns.name = ''
        tmp2 = tmp2.pivot_table(index='gene_1', columns='gene_2', values='EIS')
        tmp2.index.name = ''
        tmp2.columns.name = ''

        tmp1.loc[:, [x for x in clinicals if x not in tmp1.columns]] = 0
        tmp1 = tmp1.transpose()
        tmp1.loc[:, [x for x in clinicals if x not in tmp1.columns]] = 0
        tmp1 = tmp1.loc[clinicals, clinicals].copy()

        tmp2.loc[:, [x for x in clinicals if x not in tmp2.columns]] = 0
        tmp2 = tmp2.transpose()
        tmp2.loc[:, [x for x in clinicals if x not in tmp2.columns]] = 0
        tmp2 = tmp2.loc[clinicals, clinicals].copy()

        tmp1 = tmp1.fillna(0).abs()
        tmp2 = tmp2.fillna(0).abs()
        
        sim_matrix.loc[cohort_1, cohort_2] = np.linalg.norm(tmp1 - tmp2)


sim_matrix = sim_matrix.drop_duplicates().transpose().drop_duplicates()

fig, ax = plt.subplots(figsize=(7, 7), facecolor='w')
mask = np.triu(np.ones(sim_matrix.shape, dtype=bool))
sns.heatmap(sim_matrix.astype('float'), ax=ax, mask=mask, square=True, cmap='crest', linewidth=0.5)
ax.set_title('Euclidean distance of Networks')