In [1]:
import os
import glob
import pickle
import pandas as pd
import numpy as np
from dask.diagnostics import ProgressBar
from arboreto.utils import load_tf_names
from arboreto.algo import grnboost2
from ctxcore.rnkdb import FeatherRankingDatabase as RankingDatabase
from pyscenic.utils import modules_from_adjacencies, load_motifs
from pyscenic.prune import prune2df, df2regulons
from pyscenic.aucell import aucell
import seaborn as sns
import scanpy as sc
from pyscenic.rss import regulon_specificity_scores

In [None]:
# define data folder and files
DATA_FOLDER="/home/jovyan/scripts/renal_covid_19/steroid_pipeline/regulon_analysis/"
RESOURCES_FOLDER="/lustre/scratch117/cellgen/team298/win/regulon_own_data/"
DATABASES_GLOB = os.path.join(RESOURCES_FOLDER, "hg38__refseq-r80__10kb_up_and_down_tss.mc9nr.feather")#from https://resources.aertslab.org/cistarget/
MOTIF_ANNOTATIONS_FNAME = os.path.join(RESOURCES_FOLDER, "motifs-v9-nr.hgnc-m0.001-o0.0.tbl") #from https://resources.aertslab.org/cistarget/
MM_TFS_FNAME = os.path.join(RESOURCES_FOLDER, 'lambert2018.txt') # from https://github.com/aertslab/pySCENIC/blob/master/resources/lambert2018.txt
REGULONS_FNAME = os.path.join(DATA_FOLDER, "CD14_wave2_steroid_2021.p") #CD14_wave2_steroid_2021.p
MOTIFS_FNAME = os.path.join(DATA_FOLDER, "CD14_wave2_steroid_2021.csv")


# RSS

In [2]:
#reload saved auc_mtx
auc_mtx = pd.read_csv('/home/jovyan/scripts/renal_covid_19/steroid_pipeline/regulon_analysis/CD14_wave2_steroid_2021_auc_mtx.csv')

In [3]:
auc_mtx.set_index('Cell',inplace=True)
auc_mtx.head()

Unnamed: 0_level_0,ADNP(+),AHR(+),AHRR(+),ARID3A(+),ARNT(+),ARNT2(+),ARNTL2(+),ATF1(+),ATF2(+),ATF3(+),...,ZNF543(+),ZNF547(+),ZNF554(+),ZNF585B(+),ZNF597(+),ZNF69(+),ZNF786(+),ZNF853(+),ZNF879(+),ZSCAN10(+)
Cell,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
UK-CIC10690366+UK-CIC10690558_AACTTTCCAAGTCATC-1,0.224699,0.196717,0.0,0.021823,0.070526,0.060885,0.05659,0.140596,0.059012,0.06281,...,0.0,0.0,0.0,0.0,0.04317,0.0,0.108929,0.218491,0.0,0.0
UK-CIC10690366+UK-CIC10690558_TATTACCAGAGTAATC-1,0.215969,0.262624,0.0,0.085342,0.121985,0.00132,0.089021,0.155788,0.035741,0.092892,...,0.188938,0.0,0.079939,0.0,0.056417,0.0,0.028173,0.0,0.0,0.069237
UK-CIC10690367+UK-CIC10690559_TTGGCAAGTATATGGA-1,0.19307,0.247264,0.0,0.018258,0.091297,0.008105,0.128998,0.101283,0.077642,0.153467,...,0.0,0.0,0.032829,0.053117,0.034293,0.0,0.024901,0.0,0.0,0.0
UK-CIC10690325+UK-CIC10690517_GCGCGATCACCCAGTG-1,0.0,0.167521,0.0,0.231044,0.080503,0.016434,0.043809,0.041385,0.107354,0.082618,...,0.0,0.0,0.0,0.0,0.012845,0.0,0.031986,0.0,0.047017,0.0
UK-CIC10690319+UK-CIC10690511_GCTTGAAAGATCCCAT-1,0.0,0.273218,0.0,0.010737,0.095786,0.028837,0.128989,0.165506,0.036856,0.126623,...,0.127844,0.0,0.059699,0.0,0.066366,0.0,0.05646,0.0,0.0,0.0


In [4]:
adata = sc.read('/home/jovyan/scripts/renal_covid_19/steroid_pipeline/regulon_analysis/adata_CD14_wave2_steroid_2021_auc_mtx.h5ad')

In [5]:
adata.obs['case_control'].unique()

['POSITIVE', 'RECOVERY']
Categories (2, object): ['POSITIVE', 'RECOVERY']

In [7]:
adata.obs['annot4'].unique()

['CD14mono', 'CD14mono_anti_inflammatory', 'CD14mono_IFN', 'CD14mono_activated']
Categories (4, object): ['CD14mono', 'CD14mono_IFN', 'CD14mono_activated', 'CD14mono_anti_inflammatory']

In [None]:
# Calculate regulon Specificity Score
rss_cellType = regulon_specificity_scores(auc_mtx, adata.obs['steroid_timeline'])
rss_cellType

In [None]:
#rss_cellType.to_csv('/lustre/scratch117/cellgen/team298/win/for_lisa/regulons_before_after_steroid.csv')

In [None]:
adata.obs['annot4'].unique()

In [None]:
import matplotlib.pyplot as plt
from adjustText import adjust_text
from pyscenic.plotting import plot_rss
# RSS panel plot with all cell types
plt.rcParams.update({'font.size': 18})
cats = sorted(list(set(adata.obs['steroid_timeline'])))

fig = plt.figure(figsize=(8, 8))
for c,num in zip(cats, range(1,len(cats)+1)):
    x=rss_cellType.T[c]

    ax = fig.add_subplot(1,2,num)
    plot_rss(rss_cellType, c, top_n=5, max_n=None, ax=ax)
    ax.set_ylim( x.min()-(x.max()-x.min())*0.05 , x.max()+(x.max()-x.min())*0.05 )
    for t in ax.texts:
        t.set_fontsize(12)
    ax.set_ylabel('')
    ax.set_xlabel('')
    adjust_text(ax.texts, autoalign='xy', ha='right', va='bottom', arrowprops=dict(arrowstyle='-',color='lightgrey'), precision=0.001 )
 
fig.text(0.5, 0.0, 'Regulon', ha='center', va='center', size='x-large')
fig.text(0.00, 0.5, 'Regulon specificity score (RSS)', ha='center', va='center', rotation='vertical', size='x-large')
plt.tight_layout()
plt.rcParams.update({
    'figure.autolayout': True,
        'figure.titlesize': 'large' ,
        'axes.labelsize': 'large',
        'axes.titlesize':'large',
        'xtick.labelsize':'large',
        'ytick.labelsize':'large'
        })

#plt.show()
plt.savefig('regulon_before_after.pdf',bbox_inches="tight",dpi=300)

# customised plot

In [None]:
T_rss_cellType = rss_cellType.T
T_rss_cellType.head()

In [None]:
before_top =T_rss_cellType.sort_values('before_steroid',ascending=False)
before_top.head(n=10)

In [None]:
before_top.index[0:10]

In [None]:
after_top =T_rss_cellType.sort_values('after_steroid',ascending=False)
after_top.head(n=10)

In [None]:
after_top['row_num'] = np.arange(len(after_top))
after_top

In [None]:
after_top['row_names']=after_top.index

In [None]:
after_top.set_index('row_num',inplace=True)


In [None]:
ind =[]
for l in list(before_top.index[0:5]):
    ind.append(after_top.index[after_top.row_names == l][0])
    

In [None]:
ind

In [None]:
additional_plot=list(before_top.index[0:5])

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from math import ceil, floor
def plot_rss1(rss, cell_type,ind,list_to_plot,top_n=5, max_n=None, ax=None):
    if ax is None:
        _, ax = plt.subplots(1, 1, figsize=(4, 4))
    if max_n is None:
        max_n = rss.shape[1]
    data = rss.T[cell_type].sort_values(ascending=False)[0:max_n]
    ax.plot(np.arange(len(data)), data, '.')
    ax.set_ylim([floor(data.min() * 100.0) / 100.0, ceil(data.max() * 100.0) / 100.0])
    ax.set_ylabel('RSS')
    ax.set_xlabel('Regulon')
    ax.set_title(cell_type)
    ax.set_xticklabels([])

    font = {
        'color': 'red',
        'weight': 'normal',
        'size': 2,
    }
    
    for idx, (regulon_name, rss_val) in enumerate(zip(data[0:top_n].index, data[0:top_n].values)):
        ax.plot([idx, idx], [rss_val, rss_val], 'r.')
        ax.text(
            idx + (max_n / 25),
            rss_val,
            regulon_name,
            fontdict=font,
            horizontalalignment='left',
            verticalalignment='center',
        )
    
    font1 = {
        'color': 'green',
        'weight': 'normal',
        'size': 1,
    }
    #ind = [138, 132, 134, 122, 129, 96, 118, 95, 146, 99]
    num=0
    for idx, (regulon_name, rss_val) in enumerate(zip(data[list_to_plot].index, data[list_to_plot].values)):
        idx =ind[num]
        ax.plot([idx, idx], [rss_val, rss_val], 'r.')
        
        num=num+1
        ax.text(
            idx + (max_n / 25),
            rss_val,
            regulon_name,
            fontdict=font1,
            horizontalalignment='left',
            verticalalignment='center',
        )

In [None]:
import matplotlib.pyplot as plt
from adjustText import adjust_text
from pyscenic.plotting import plot_rss
# RSS panel plot with all cell types
plt.rcParams.update({'font.size': 18})
cats = ['after_steroid']

fig = plt.figure(figsize=(8, 8))
for c,num in zip(cats, range(1,len(cats)+1)):
    
    x=rss_cellType.T[c]
    
    ax = fig.add_subplot(1,2,num)
    plot_rss1(rss_cellType, c,ind,additional_plot, top_n=5, max_n=None, ax=ax)
    ax.set_ylim( x.min()-(x.max()-x.min())*0.05 , x.max()+(x.max()-x.min())*0.05 )
    for t in ax.texts:
        t.set_fontsize(12)
    ax.set_ylabel('')
    ax.set_xlabel('')
    adjust_text(ax.texts, autoalign='xy', ha='right', va='bottom', arrowprops=dict(arrowstyle='-',color='lightgrey'), precision=0.001 )
 
fig.text(0.5, 0.0, 'Regulon', ha='center', va='center', size='x-large')
fig.text(0.00, 0.5, 'Regulon specificity score (RSS)', ha='center', va='center', rotation='vertical', size='x-large')
plt.tight_layout()
plt.rcParams.update({
    'figure.autolayout': True,
        'figure.titlesize': 'large' ,
        'axes.labelsize': 'large',
        'axes.titlesize':'large',
        'xtick.labelsize':'medium',
        'ytick.labelsize':'medium'
        })


plt.savefig('regulon_combined.pdf',bbox_inches="tight",dpi=300)