In [1]:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from matplotlib.colors import Normalize
from matplotlib.cm import ScalarMappable, get_cmap
from matplotlib.colors import ListedColormap

from matplotlib.lines import Line2D

from matplotlib.ticker import FuncFormatter
import textwrap 

In [2]:
# pathway hits for ACM 
df_acm = pd.read_excel("TB Go AllLists.xlsx")
# pathway hits for scRNA-seq markers 
df_sc = pd.read_excel("GO_AllLists scRNA-seq PTB.xlsx")

In [3]:
pathway_ids = sorted(df_acm.GO.unique())
pathway_descr = [df_acm.loc[df_acm.GO==x,"Description"].iloc[0] for x in pathway_ids]

In [4]:
# vectorize gene ratio hits 
gene_ratio_list = []
for pid in pathway_ids:
    gene_ratios_df = df_acm.loc[df_acm.GO == pid,["GeneRatio","GeneList"]].copy()
    gene_ratios_vec = [0,0,0]
    if "PTB A" in gene_ratios_df["GeneList"].values:
        gene_ratios_vec[0] = gene_ratios_df.loc[gene_ratios_df["GeneList"]=="PTB A","GeneRatio"].values[0]
    if "PTB C" in gene_ratios_df["GeneList"].values:
        gene_ratios_vec[1] = gene_ratios_df.loc[gene_ratios_df["GeneList"]=="PTB C","GeneRatio"].values[0]
    if "PTB M" in gene_ratios_df["GeneList"].values:
        gene_ratios_vec[2] = gene_ratios_df.loc[gene_ratios_df["GeneList"]=="PTB M","GeneRatio"].values[0]
    gene_ratio_list += [gene_ratios_vec]

df_acm_vec = pd.DataFrame(gene_ratio_list,index=pathway_descr,columns=["Alveoli","Core","Mantle"])

In [5]:
# vectorize pvals 
gene_q_list = []
for pid in pathway_ids:
    gene_q_df = df_acm.loc[df_acm.GO == pid,["Log(q-value)","GeneList"]].copy()
    gene_q_vec = [0,0,0]
    if "PTB A" in gene_q_df["GeneList"].values:
        gene_q_vec[0] = gene_q_df.loc[gene_q_df["GeneList"]=="PTB A","Log(q-value)"].values[0]
    if "PTB C" in gene_q_df["GeneList"].values:
        gene_q_vec[1] = gene_q_df.loc[gene_q_df["GeneList"]=="PTB C","Log(q-value)"].values[0]
    if "PTB M" in gene_q_df["GeneList"].values:
        gene_q_vec[2] = gene_q_df.loc[gene_q_df["GeneList"]=="PTB M","Log(q-value)"].values[0]
    gene_q_list += [gene_q_vec]

df_acm_pvec = pd.DataFrame(gene_q_list,index=pathway_descr,columns=["Alveoli","Core","Mantle"])

In [6]:
# we want to wrap text for pathway names 
def wrap_ticks(val, pos):
    s = str(val)
    return "\n".join(textwrap.wrap(s, 20))

In [7]:
data = df_acm_vec
pv = -df_acm_pvec

# we want to grab the dendrogram 
g = sns.clustermap(data, cbar=False, figsize=(8,12),col_cluster=False, dendrogram_ratio=(0.15, 0.3),method="ward")
g.ax_heatmap.clear()

# positioning 
x_labels = g.data2d.columns
y_labels = g.data2d.index
n_rows, n_cols = data.shape

gap = 0.01
# get current positions
pos_hm = g.ax_heatmap.get_position()
pos_rd = g.ax_row_dendrogram.get_position()

g.ax_heatmap.set_position([
    pos_rd.x1 + gap,
    pos_hm.y0,
    pos_hm.width * 0.4,
    pos_hm.height
])

g.ax_row_dendrogram.set_position([
    pos_rd.x0 + gap-0.005,  
    pos_rd.y0,
    pos_rd.width,
    pos_rd.height
])

# hsv is ending 30% is blue-purple-red
hsv = get_cmap("hsv")
top30 = [hsv(i) for i in np.linspace(0.7, 1.0, 256)]
cmap = ListedColormap(top30, name="blue_purple_red")

norm = Normalize(vmin=0, vmax=21)
sm = ScalarMappable(norm=norm, cmap=cmap)
sm.set_array([])

# dotplots with size as gene ratio and color as pval 
for i, y in enumerate(y_labels):
    for j, x in enumerate(x_labels):
        val = pv.loc[y, x]
        gr_val = data.loc[y,x]
        color = cmap(norm(val))
        g.ax_heatmap.scatter(
            j, i,
            s=gr_val * 1E3, 
            color=color,
            edgecolors='none',
            linewidths=0.3,
            alpha=0.9
        )

# ticklabels 
g.ax_heatmap.set_xticklabels(x_labels, rotation=90,fontsize=20)
g.ax_heatmap.set_yticklabels(list(map(lambda x: "\n".join(textwrap.wrap(x.capitalize(), 30)), y_labels)),fontsize=13)
g.ax_heatmap.set_xticks(np.arange(n_cols))
g.ax_heatmap.set_yticks(np.arange(n_rows))

g.ax_heatmap.set_xticks(np.arange(-0.5, n_cols+0.5, 1), minor=True)
g.ax_heatmap.set_yticks(np.arange(0.5, n_rows-0.5, 1), minor=True)
g.ax_heatmap.grid(which='minor', color='gray', linestyle='-', linewidth=0.5)

g.ax_heatmap.set_ylim(n_rows - 0.5, -0.5)

g.ax_heatmap.tick_params(
    which='both',    
    top=False,
    bottom=False,
    left=False,
    right=False,
    length=0
)

for side in ['top','right','bottom','left']:
    spine = g.ax_heatmap.spines[side]
    spine.set_visible(True)
    spine.set_linewidth(2)
    spine.set_edgecolor('black')

plt.axis("off")

# cbar adjustments 
cbar = g.fig.colorbar(sm, cax = g.fig.add_axes([-0.125, 0.3, 0.05, 0.1]),
                      ax=g.ax_heatmap, orientation='vertical')

ticks = [2, 6, 10, 20]
cbar.set_ticks(ticks)
cbar.set_ticklabels([
    r"$10^{-2}$",
    r"$10^{-6}$",
    r"$10^{-10}$",
    r"$10^{-20}$",
])
cbar.ax.tick_params(labelsize=16)

cbar.ax.set_title(
    "q-value", 
    fontsize=20,
    pad=20  
)

for spine in cbar.ax.spines.values():
    spine.set_visible(False)

# gene ratio legend adjustments 
ratio_vals = [0.05, 0.10, 0.20] 
markers = []
labels  = []
for rv in ratio_vals:
    ms = np.sqrt(rv * 1e3)
    markers.append(
        Line2D(
            [], [], linestyle='None', marker='o',
            markersize=ms,
            markerfacecolor='gray',
            markeredgecolor='none',
            alpha=0.9
        )
    )
    labels.append(f'{rv:.0%}')

g.ax_heatmap.legend(
    markers,
    labels,
    title='Gene ratio',
    loc='upper left',
    bbox_to_anchor=(-1.8, 0.85), 
    frameon=False,         
    title_fontsize=20,   
    fontsize=16            
)

plt.savefig("plots/pathways_acm.png", bbox_inches="tight", dpi=400)
plt.close()
plt.clf()

  hsv = get_cmap("hsv")
  g.ax_heatmap.set_xticklabels(x_labels, rotation=90,fontsize=20)
  g.ax_heatmap.set_yticklabels(list(map(lambda x: "\n".join(textwrap.wrap(x.capitalize(), 30)), y_labels)),fontsize=13)


<Figure size 640x480 with 0 Axes>

In [8]:
# ignore KEGG hits (duplicate names)
df_sc = df_sc.loc[(df_sc.Category.values != "KEGG Pathway")]

# handpick terms 
unique_terms = [
    "antimicrobial humoral immune response mediated by antimicrobial peptide",
    "cellular response to molecule of bacterial origin",
    "cholesterol efflux",
    "Cholesterol metabolism",
    "cytokine-mediated signaling pathway",
    "defense response to bacterium",
    "fatty acid catabolic process",
    "fatty acid metabolic process",
    "Interferon Signaling",
    "lipid storage",
    "phagocytosis",
    "positive regulation of T cell activation",
    "T cell activation",
    "positive regulation of angiogenesis",
    "Oxidative phosphorylation",
    "Metabolism of lipids",
    "Degradation pathway of sphingolipids including diseases"
]

df_sc = df_sc[df_sc["Description"].isin(unique_terms)]

# handpick macrophages 
celltypes = ["IFN-activated macro","SPP1+ macro","TREM2 lipid associated macro","alveolar macro"]

df_sc = df_sc[df_sc["GeneList"].isin(celltypes)]

In [9]:
pathway_ids = sorted(df_sc.GO.unique())
pathway_descr = [df_sc.loc[df_sc.GO==x,"Description"].iloc[0] for x in pathway_ids]

In [10]:
# gene ratio vectorization 
gene_ratio_list = []
for pid in pathway_ids:
    gene_ratios_df = df_sc.loc[df_sc.GO == pid,["GeneRatio","GeneList"]].copy()
    gene_ratios_vec = [0,0,0,0]
    if "IFN-activated macro" in gene_ratios_df["GeneList"].values:
        gene_ratios_vec[0] = gene_ratios_df.loc[gene_ratios_df["GeneList"]=="IFN-activated macro","GeneRatio"].values[0]
    if "SPP1+ macro" in gene_ratios_df["GeneList"].values:
        gene_ratios_vec[1] = gene_ratios_df.loc[gene_ratios_df["GeneList"]=="SPP1+ macro","GeneRatio"].values[0]
    if "TREM2 lipid associated macro" in gene_ratios_df["GeneList"].values:
        gene_ratios_vec[2] = gene_ratios_df.loc[gene_ratios_df["GeneList"]=="TREM2 lipid associated macro","GeneRatio"].values[0]
    if "alveolar macro" in gene_ratios_df["GeneList"].values:
        gene_ratios_vec[3] = gene_ratios_df.loc[gene_ratios_df["GeneList"]=="alveolar macro","GeneRatio"].values[0]
    gene_ratio_list += [gene_ratios_vec]

df_sc_vec = pd.DataFrame(gene_ratio_list,index=pathway_descr,columns=["IFN-activated MΦ",
                                                                       "SPP1+ MΦ",
                                                                       "TREM2+ MΦ",
                                                                       "Alveolar MΦ"])

In [11]:
# pval vectorization 
gene_q_list = []
for pid in pathway_ids:
    gene_q_df = df_sc.loc[df_sc.GO == pid,["Log(q-value)","GeneList"]].copy()
    gene_q_vec = [0,0,0,0]
    if "IFN-activated macro" in gene_q_df["GeneList"].values:
        gene_q_vec[0] = gene_q_df.loc[gene_q_df["GeneList"]=="IFN-activated macro","Log(q-value)"].values[0]
    if "SPP1+ macro" in gene_q_df["GeneList"].values:
        gene_q_vec[1] = gene_q_df.loc[gene_q_df["GeneList"]=="SPP1+ macro","Log(q-value)"].values[0]
    if "TREM2 lipid associated macro" in gene_q_df["GeneList"].values:
        gene_q_vec[2] = gene_q_df.loc[gene_q_df["GeneList"]=="TREM2 lipid associated macro","Log(q-value)"].values[0]
    if "alveolar macro" in gene_q_df["GeneList"].values:
        gene_q_vec[3] = gene_q_df.loc[gene_q_df["GeneList"]=="alveolar macro","Log(q-value)"].values[0]
    gene_q_list += [gene_q_vec]

df_sc_pvec = pd.DataFrame(gene_q_list,index=pathway_descr,columns=["IFN-activated MΦ",
                                                                       "SPP1+ MΦ",
                                                                       "TREM2+ MΦ",
                                                                       "Alveolar MΦ"])

In [12]:
data = df_sc_vec
pv = -df_sc_pvec

# keep just dendrogram
g = sns.clustermap(data, cbar=False, figsize=(8,12),col_cluster=False, dendrogram_ratio=(0.15, 0.3), method="ward")
g.ax_heatmap.clear()

# position 
x_labels = g.data2d.columns
y_labels = g.data2d.index
n_rows, n_cols = data.shape

gap = 0.01
pos_hm = g.ax_heatmap.get_position()
pos_rd = g.ax_row_dendrogram.get_position()

g.ax_heatmap.set_position([
    pos_rd.x1 + gap,
    pos_hm.y0,
    pos_hm.width * 0.9,
    pos_hm.height
])

g.ax_row_dendrogram.set_position([
    pos_rd.x0 + gap-0.005,  
    pos_rd.y0,
    pos_rd.width,
    pos_rd.height
])

# pval colorscale 
hsv = get_cmap("hsv")
top30 = [hsv(i) for i in np.linspace(0.7, 1.0, 256)]
cmap = ListedColormap(top30, name="blue_purple_red")
norm = Normalize(vmin=0, vmax=21)
sm = ScalarMappable(norm=norm, cmap=cmap)
sm.set_array([])

# dotplot: color (pval), size (gene ratio)
for i, y in enumerate(y_labels):
    for j, x in enumerate(x_labels):
        val = pv.loc[y, x]
        gr_val = data.loc[y,x]
        color = cmap(norm(val))
        g.ax_heatmap.scatter(
            j, i,
            s=gr_val * 1E3,   
            color=color,
            edgecolors='none',
            linewidths=0.3,
            alpha=0.9
        )

# ticklabels
g.ax_heatmap.set_xticklabels(x_labels, rotation=90,fontsize=20)
g.ax_heatmap.set_yticklabels(list(map(lambda x: "\n".join(textwrap.wrap(x.capitalize(), 30)), y_labels)),fontsize=13)
g.ax_heatmap.set_xticks(np.arange(n_cols))
g.ax_heatmap.set_yticks(np.arange(n_rows))

g.ax_heatmap.set_xticks(np.arange(-0.5, n_cols+0.5, 1), minor=True)
g.ax_heatmap.set_yticks(np.arange(0.5, n_rows-0.5, 1), minor=True)
g.ax_heatmap.grid(which='minor', color='gray', linestyle='-', linewidth=0.5)

g.ax_heatmap.set_ylim(n_rows - 0.5, -0.5)

g.ax_heatmap.tick_params(
    which='both',    
    top=False,
    bottom=False,
    left=False,
    right=False,
    length=0
)

for side in ['top','right','bottom','left']:
    spine = g.ax_heatmap.spines[side]
    spine.set_visible(True)
    spine.set_linewidth(2)
    spine.set_edgecolor('black')

plt.axis("off")

# cbar customization 
cbar = g.fig.colorbar(sm, cax = g.fig.add_axes([-0.125, 0.3, 0.05, 0.1]),
                      ax=g.ax_heatmap, orientation='vertical')

ticks = [2, 6, 10, 20]
cbar.set_ticks(ticks)

cbar.set_ticklabels([
    r"$10^{-2}$",
    r"$10^{-6}$",
    r"$10^{-10}$",
    r"$10^{-20}$",
])
cbar.ax.tick_params(labelsize=16)

cbar.ax.set_title(
    "q-value", 
    fontsize=20,
    pad=20  
)

for spine in cbar.ax.spines.values():
    spine.set_visible(False)

# gene ratios 
ratio_vals = [0.05, 0.10, 0.20]  # e.g. 5%, 10%, 20%
markers = []
labels  = []
for rv in ratio_vals:
    ms = np.sqrt(rv * 1e3)
    markers.append(
        Line2D(
            [], [], linestyle='None', marker='o',
            markersize=ms,
            markerfacecolor='gray',
            markeredgecolor='none',
            alpha=0.9
        )
    )
    labels.append(f'{rv:.0%}')

g.ax_heatmap.legend(
    markers,
    labels,
    title='Gene ratio',
    loc='upper left',
    bbox_to_anchor=(-1.8, 0.85),
    frameon=False,        
    title_fontsize=20,    
    fontsize=16        
)

plt.savefig("plots/pathways_sc.png", bbox_inches="tight", dpi=400)
plt.close()
plt.clf()

  hsv = get_cmap("hsv")
  g.ax_heatmap.set_xticklabels(x_labels, rotation=90,fontsize=20)
  g.ax_heatmap.set_yticklabels(list(map(lambda x: "\n".join(textwrap.wrap(x.capitalize(), 30)), y_labels)),fontsize=13)


<Figure size 640x480 with 0 Axes>