In [1]:
import os, glob, re, pickle
from functools import partial
from collections import OrderedDict
import operator as op
from cytoolz import compose

import pandas as pd
import seaborn as sns
import numpy as np
import scanpy as sc
import anndata as ad
import matplotlib as mpl
import matplotlib.pyplot as plt
from pyscenic.export import export2loom, add_scenic_metadata
from pyscenic.utils import load_motifs
from pyscenic.transform import df2regulons
from pyscenic.aucell import aucell
from pyscenic.binarization import binarize
from pyscenic.rss import regulon_specificity_scores
from pyscenic.plotting import plot_binarization, plot_rss

from IPython.display import HTML, display

In [2]:
# Set maximum number of jobs for Scanpy.
sc.settings.njobs = 48

Folder structure.

In [3]:
RESOURCES_FOLDERNAME = "./SCENIC/resources/"
AUXILLIARIES_FOLDERNAME = "./pySCENIC_use/"
RESULTS_FOLDERNAME = "./SCENIC/results/"
FIGURES_FOLDERNAME = "./SCENIC/results/figures/"

In [4]:
sc.settings.figdir = FIGURES_FOLDERNAME

Auxilliary functions.

In [5]:
BASE_URL = "http://motifcollections.aertslab.org/v9/logos/"
COLUMN_NAME_LOGO = "MotifLogo"
COLUMN_NAME_MOTIF_ID = "MotifID"
COLUMN_NAME_TARGETS = "TargetGenes"

In [6]:
def savesvg(fname: str, fig, folder: str=FIGURES_FOLDERNAME) -> None:
    """
    Save figure as vector-based SVG image format.
    """
    fig.tight_layout()
    fig.savefig(os.path.join(folder, fname), format='svg')

In [7]:
def display_logos(df: pd.DataFrame, top_target_genes: int = 3, base_url: str = BASE_URL):
    """
    :param df:
    :param base_url:
    """
    # Make sure the original dataframe is not altered.
    df = df.copy()
    
    # Add column with URLs to sequence logo.
    def create_url(motif_id):
        return '<img src="{}{}.png" style="max-height:124px;"></img>'.format(base_url, motif_id)
    df[("Enrichment", COLUMN_NAME_LOGO)] = list(map(create_url, df.index.get_level_values(COLUMN_NAME_MOTIF_ID)))
    
    # Truncate TargetGenes.
    def truncate(col_val):
        return sorted(col_val, key=op.itemgetter(1))[:top_target_genes]
    df[("Enrichment", COLUMN_NAME_TARGETS)] = list(map(truncate, df[("Enrichment", COLUMN_NAME_TARGETS)]))
    
    MAX_COL_WIDTH = pd.get_option('display.max_colwidth')
    pd.set_option('display.max_colwidth', -1)
    display(HTML(df.head().to_html(escape=False)))
    pd.set_option('display.max_colwidth', MAX_COL_WIDTH)

Auxilliary data sets.

In [8]:
# Downloaded fromm pySCENIC github repo: https://github.com/aertslab/pySCENIC/tree/master/resources
DMEL_TFS_FNAME = os.path.join(AUXILLIARIES_FOLDERNAME, 'hh_total_tfs.txt')
# Ranking databases. Downloaded from cisTargetDB: https://resources.aertslab.org/cistarget/
#RANKING_DBS_FNAMES = list(map(lambda fn: os.path.join(AUXILLIARIES_FOLDERNAME, fn),
#                       ['hg19-tss-centered-10kb-10species.mc9nr.feather',
#                       'hg19-500bp-upstream-10species.mc9nr.feather',
#                       'hg19-tss-centered-5kb-10species.mc9nr.feather']))
RANKING_DBS_FNAMES = list(map(lambda fn: os.path.join(AUXILLIARIES_FOLDERNAME, fn),
                       ['hg38__refseq-r80__10kb_up_and_down_tss.mc9nr.genes_vs_motifs.rankings.feather',
                        'hg38__refseq-r80__500bp_up_and_100bp_down_tss.mc9nr.genes_vs_motifs.rankings.feather'
                       ]))

# Motif annotations. Downloaded from cisTargetDB: https://resources.aertslab.org/cistarget/
MOTIF_ANNOTATIONS_FNAME = os.path.join(AUXILLIARIES_FOLDERNAME, 'motifs-v9-nr.hgnc-m0.001-o0.0.tbl')

In [9]:
DATASET_ID = "PC_Bcell"
#TCGA_CODE = 'SKCM'

Resources downloaded.

In [10]:
# Downloaded from GEO on 28 FEB 2019.
CELL_ANNOTATIONS_FNAME = os.path.join(RESOURCES_FOLDERNAME, "samplecluster_counts_19294cell_1122_anno.csv")
# Downloaded from Cell Journal website on 1 MAR 2019.
#SAMPLE_METADATA_FNAME = os.path.join(RESOURCES_FOLDERNAME, "1-s2.0-S0092867418311784-mmc1.xlsx")
# Downloaded from GEO on 1 MAR 2019.
EXP_MTX_TPM_FNAME = os.path.join(RESOURCES_FOLDERNAME, 'samplecluster_counts_19294cell_1122.csv')
#EXP_MTX_COUNTS_FNAME = os.path.join(RESOURCES_FOLDERNAME, 'GSE115978_counts.csv')

Results created.

In [11]:
METADATA_FNAME = os.path.join(RESULTS_FOLDERNAME, '{}.metadata.csv'.format(DATASET_ID))
EXP_MTX_QC_FNAME = os.path.join(RESULTS_FOLDERNAME, '{}.qc.tpm.csv'.format(DATASET_ID))
ADJACENCIES_FNAME = os.path.join(RESULTS_FOLDERNAME, '{}.adjacencies.tsv'.format(DATASET_ID))
MOTIFS_FNAME = os.path.join(RESULTS_FOLDERNAME, '{}.motifs.csv'.format(DATASET_ID))
REGULONS_DAT_FNAME = os.path.join(RESULTS_FOLDERNAME, '{}.regulons.dat'.format(DATASET_ID))
AUCELL_MTX_FNAME = os.path.join(RESULTS_FOLDERNAME, '{}.auc.csv'.format(DATASET_ID))
BIN_MTX_FNAME = os.path.join(RESULTS_FOLDERNAME, '{}.bin.csv'.format(DATASET_ID))
THR_FNAME = os.path.join(RESULTS_FOLDERNAME, '{}.thresholds.csv'.format(DATASET_ID))
ANNDATA_FNAME = os.path.join(RESULTS_FOLDERNAME, '{}.h5ad'.format(DATASET_ID))
LOOM_FNAME = os.path.join(RESULTS_FOLDERNAME, '{}.loom'.format(DATASET_ID))

### STEP 0: Preprocessing

__METADATA CLEANING__

In [39]:
df_annotations = pd.read_csv(CELL_ANNOTATIONS_FNAME,index_col=0)
df_annotations

Unnamed: 0,celltype_l7,cancer,type,cellID
COAD20.CGAACATCAGCTGCTG-1_1,B.c01.TCL1A+naïveB,COAD,Cancer,COAD20.CGAACATCAGCTGCTG-1_1
CRLM_P8_Colon_P_TAACACGAGTTGTCAC,B.c01.TCL1A+naïveB,COAD,Adjacent,CRLM_P8_Colon_P_TAACACGAGTTGTCAC
COAD20.TACTCATCATGCCTAA-1_1,B.c01.TCL1A+naïveB,COAD,Cancer_PBMC,COAD20.TACTCATCATGCCTAA-1_1
COAD21.TGGTTCCGTCTGGTCG-1_1,B.c01.TCL1A+naïveB,COAD,Cancer,COAD21.TGGTTCCGTCTGGTCG-1_1
COAD15B_T_GCTGGGTCACAGACTT-1,B.c01.TCL1A+naïveB,COAD,Cancer,COAD15B_T_GCTGGGTCACAGACTT-1
...,...,...,...,...
GIST2.CCTCTGAAGGTGCAAC-1_1,B.c10.LMO2+LZGC,GIST,Cancer,GIST2.CCTCTGAAGGTGCAAC-1_1
GIST2.GACCTGGAGCCACGCT-1_1,B.c10.LMO2+LZGC,GIST,Cancer,GIST2.GACCTGGAGCCACGCT-1_1
GIST2.TAGCCGGTCAGCTGGC-1_1,B.c10.LMO2+LZGC,GIST,Cancer,GIST2.TAGCCGGTCAGCTGGC-1_1
GIST2.TCGTACCTCTATCCTA-1_1,B.c10.LMO2+LZGC,GIST,Cancer,GIST2.TCGTACCTCTATCCTA-1_1


In [40]:
df_metadata = df_annotations
df_metadata.to_csv(METADATA_FNAME, index=False)
df_metadata.head()

Unnamed: 0,celltype_l7,cancer,type,cellID
COAD20.CGAACATCAGCTGCTG-1_1,B.c01.TCL1A+naïveB,COAD,Cancer,COAD20.CGAACATCAGCTGCTG-1_1
CRLM_P8_Colon_P_TAACACGAGTTGTCAC,B.c01.TCL1A+naïveB,COAD,Adjacent,CRLM_P8_Colon_P_TAACACGAGTTGTCAC
COAD20.TACTCATCATGCCTAA-1_1,B.c01.TCL1A+naïveB,COAD,Cancer_PBMC,COAD20.TACTCATCATGCCTAA-1_1
COAD21.TGGTTCCGTCTGGTCG-1_1,B.c01.TCL1A+naïveB,COAD,Cancer,COAD21.TGGTTCCGTCTGGTCG-1_1
COAD15B_T_GCTGGGTCACAGACTT-1,B.c01.TCL1A+naïveB,COAD,Cancer,COAD15B_T_GCTGGGTCACAGACTT-1


__EXPRESSION MATRIX QC__

In [44]:
df_tpm = pd.read_csv(EXP_MTX_TPM_FNAME, index_col=0)
df_tpm

Unnamed: 0,COAD20.CGAACATCAGCTGCTG-1_1,CRLM_P8_Colon_P_TAACACGAGTTGTCAC,COAD20.TACTCATCATGCCTAA-1_1,COAD21.TGGTTCCGTCTGGTCG-1_1,COAD15B_T_GCTGGGTCACAGACTT-1,CRLM_P8_Colon_P_CTACGGGTCTGAGATC,PCall_COAD2_T_CGATTGAGTCCGTCAG-1_3,COAD21.CGTGTAATCTGACCTC-1_1,CRLM_P2_Colon_P_AGCTCAACATCAGCGC,COAD21.AACCATGAGAGTTGGC-1_1,...,GIST2.GGGCATCTCCAGGGCT-1_1,GIST2.GTACTCCAGACAATAC-1_1,GIST2.GTCACGGGTGGACGAT-1_1,GIST2.TAGTTGGCAGGAATGC-1_1,GIST2.TGACGGCAGCCCGAAA-1_1,GIST2.CCTCTGAAGGTGCAAC-1_1,GIST2.GACCTGGAGCCACGCT-1_1,GIST2.TAGCCGGTCAGCTGGC-1_1,GIST2.TCGTACCTCTATCCTA-1_1,GIST2.TGAAAGAAGCGTAGTG-1_1
AL627309.1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
AL669831.5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
FAM87B,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
LINC00115,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
FAM41C,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
AC106037.2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
AL357078.3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
AC110048.2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
AC093525.1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [35]:
#df_counts = pd.read_csv(EXP_MTX_COUNTS_FNAME, index_col=0)
#df_counts.shape

In [48]:
adata = sc.AnnData(X=df_tpm.T.sort_index())
df_obs = df_metadata[['celltype_l7',"cancer","type","cellID"]].set_index('cellID').sort_index()
adata.obs = df_obs
adata.var_names_make_unique()
sc.pp.filter_cells(adata, min_genes=0)
sc.pp.filter_genes(adata, min_cells=3)
# Store non-log transformed data as raw. This data can be used via the use_raw parameters available for many functions.
# In the scanpy's tutorials this is used to stored all genes in log-transformed counts before retaining only Highly Variable Genes (HVG). 
# Because in this case no filtering is done we use this feature to store raw counts.
adata.raw = adata 
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
adata

  """Entry point for launching an IPython kernel.


AnnData object with n_obs × n_vars = 19294 × 21472
    obs: 'celltype_l7', 'cancer', 'type', 'n_genes'
    var: 'n_cells'
    uns: 'log1p'

In [50]:
adata.write_h5ad(ANNDATA_FNAME) # Categorical dtypes are created.

In [51]:
adata.to_df().to_csv(EXP_MTX_QC_FNAME)

### STEP 1: Network inference based on GRNBoost2 from CLI

For this step the CLI version of SCENIC is used. This step can be deployed on an High Performance Computing system.

_Output:_ List of adjacencies between a TF and its targets stored in `ADJACENCIES_FNAME`.

need env:python

In [15]:
!pyscenic grn {EXP_MTX_QC_FNAME} {DMEL_TFS_FNAME} -o {ADJACENCIES_FNAME} --num_workers 48


2022-11-22 20:47:54,571 - pyscenic.cli.pyscenic - INFO - Loading expression matrix.

2022-11-22 20:51:05,975 - pyscenic.cli.pyscenic - INFO - Inferring regulatory networks.
Perhaps you already have a cluster running?
Hosting the HTTP server on port 41343 instead
Numba: Attempted to fork from a non-main thread, the TBB library may be in an invalid state in the child process.
Numba: Attempted to fork from a non-main thread, the TBB library may be in an invalid state in the child process.
Numba: Attempted to fork from a non-main thread, the TBB library may be in an invalid state in the child process.
Numba: Attempted to fork from a non-main thread, the TBB library may be in an invalid state in the child process.
Numba: Attempted to fork from a non-main thread, the TBB library may be in an invalid state in the child process.
Numba: Attempted to fork from a non-main thread, the TBB library may be in an invalid state in the child process.
Numba: Attempted to fork from a non-main thread, the

In [13]:
import numba
numba.__version__

'0.55.0'

In [14]:
import pyscenic
import dask
print(pyscenic.__version__)
print(dask.__version__)

0.11.2
2022.02.0


```
2019-04-25 11:22:20,360 - pyscenic.cli.pyscenic - INFO - Loading expression matrix.
2019-04-25 11:23:37,612 - pyscenic.cli.pyscenic - INFO - Inferring regulatory networks.
preparing dask client
parsing input
creating dask graph
4 partitions
computing dask graph
not shutting down client, client was created externally
finished
2019-04-26 03:58:01,096 - pyscenic.cli.pyscenic - INFO - Writing results to file.
```

### STEP 2-3: Regulon prediction aka cisTarget from CLI

For this step the CLI version of SCENIC is used. This step can be deployed on an High Performance Computing system.

_Output:_ List of adjacencies between a TF and its targets stored in `MOTIFS_FNAME`.

In [16]:
DBS_PARAM = ' '.join(RANKING_DBS_FNAMES)
DBS_PARAM

'/media/ggj/ggjlab/pySCENIC_use/hg38__refseq-r80__10kb_up_and_down_tss.mc9nr.genes_vs_motifs.rankings.feather /media/ggj/ggjlab/pySCENIC_use/hg38__refseq-r80__500bp_up_and_100bp_down_tss.mc9nr.genes_vs_motifs.rankings.feather'

In [17]:
!/home/ggj/anaconda3/envs/pyscenic-test/bin/pyscenic ctx {ADJACENCIES_FNAME} {DBS_PARAM} \
            --annotations_fname {MOTIF_ANNOTATIONS_FNAME} \
            --expression_mtx_fname {EXP_MTX_QC_FNAME} \
            --output {MOTIFS_FNAME} \
            --num_workers 26 \
            --frac_mapping_module 0.6 


2022-11-22 23:57:59,247 - pyscenic.cli.pyscenic - INFO - Creating modules.

2022-11-22 23:58:00,659 - pyscenic.cli.pyscenic - INFO - Loading expression matrix.

2022-11-23 00:01:10,361 - pyscenic.utils - INFO - Calculating Pearson correlations.

	Dropout masking is currently set to [False].

2022-11-23 00:06:36,018 - pyscenic.utils - INFO - Creating modules.

2022-11-23 00:10:06,778 - pyscenic.cli.pyscenic - INFO - Loading databases.

2022-11-23 00:10:07,355 - pyscenic.cli.pyscenic - INFO - Calculating regulons.
[                                        ] | 0% Completed |  3min 17.0s
[                                        ] | 0% Completed |  3min 21.5s
[                                        ] | 0% Completed |  3min 22.4s
[                                        ] | 0% Completed |  3min 24.8s
[                                        ] | 0% Completed |  3min 29.5s
[                                        ] | 0% Completed |  3min 30.8s
[                                        ] | 0% C

[                                        ] | 0% Completed |  4min  3.8s
[                                        ] | 0% Completed |  4min  4.7s
[                                        ] | 0% Completed |  4min  6.0s
[                                        ] | 0% Completed |  4min  9.4s
[                                        ] | 0% Completed |  4min 10.4s
[                                        ] | 0% Completed |  4min 11.2s
[                                        ] | 0% Completed |  4min 13.8s
[                                        ] | 0% Completed |  4min 17.9s

[                                        ] | 0% Completed |  4min 18.1s
[                                        ] | 0% Completed |  4min 18.3s
[                                        ] | 0% Completed |  4min 18.7s
[                                        ] | 0% Completed |  4min 19.5s
[                                        ] | 0% Completed |  4min 20.7s
[                                        ] | 0% Completed |  4m

[                                        ] | 0% Completed |  4min 38.0s
[                                        ] | 0% Completed |  4min 39.2s
[                                        ] | 0% Completed |  4min 39.4s
[                                        ] | 0% Completed |  4min 39.8s
[                                        ] | 0% Completed |  4min 41.3s

[                                        ] | 0% Completed |  4min 42.0s

[                                        ] | 0% Completed |  4min 42.8s
[                                        ] | 0% Completed |  4min 44.2s
[                                        ] | 0% Completed |  4min 44.9s
[                                        ] | 0% Completed |  4min 46.0s
[                                        ] | 0% Completed |  4min 46.5s
[                                        ] | 0% Completed |  4min 47.2s

[                                        ] | 0% Completed |  4min 48.2s
[                                        ] | 0% Completed |  


[                                        ] | 0% Completed |  4min 54.6s
[                                        ] | 0% Completed |  4min 54.8s
[                                        ] | 0% Completed |  4min 55.1s
[                                        ] | 0% Completed |  4min 55.3s
[                                        ] | 0% Completed |  4min 55.7s


[                                        ] | 0% Completed |  4min 55.9s
[                                        ] | 0% Completed |  4min 56.0s
[                                        ] | 0% Completed |  4min 56.2s
[                                        ] | 0% Completed |  4min 56.3s
[                                        ] | 0% Completed |  4min 56.8s
[                                        ] | 0% Completed |  4min 57.7s

[                                        ] | 0% Completed |  4min 58.0s
[                                        ] | 0% Completed |  4min 58.8s
[                                        ] | 0% Completed | 

[                                        ] | 0% Completed |  5min  2.9s
[                                        ] | 0% Completed |  5min  3.0s
[                                        ] | 0% Completed |  5min  3.6s


[                                        ] | 0% Completed |  5min  3.7s
[                                        ] | 0% Completed |  5min  4.2s
[                                        ] | 0% Completed |  5min  4.9s
[                                        ] | 0% Completed |  5min  5.4s
[                                        ] | 0% Completed |  5min  5.6s
[                                        ] | 0% Completed |  5min  6.0s
[                                        ] | 0% Completed |  5min  6.6s

[                                        ] | 0% Completed |  5min  7.0s
[                                        ] | 0% Completed |  5min  7.2s

[                                        ] | 0% Completed |  5min  7.4s
[                                        ] | 0% Completed | 

[                                        ] | 0% Completed |  5min 11.6s

[                                        ] | 0% Completed |  5min 11.8s
[                                        ] | 0% Completed |  5min 12.1s

[                                        ] | 0% Completed |  5min 12.3s

[                                        ] | 0% Completed |  5min 12.4s
[                                        ] | 0% Completed |  5min 12.9s
[                                        ] | 0% Completed |  5min 13.0s
[                                        ] | 0% Completed |  5min 13.1s

[                                        ] | 0% Completed |  5min 13.4s
[                                        ] | 0% Completed |  5min 13.7s
[                                        ] | 0% Completed |  5min 13.8s
[                                        ] | 0% Completed |  5min 14.0s
[                                        ] | 0% Completed |  5min 14.2s

[                                        ] | 0% Completed |

[                                        ] | 0% Completed |  5min 26.5s
[                                        ] | 0% Completed |  5min 26.7s
[                                        ] | 0% Completed |  5min 26.9s
[                                        ] | 0% Completed |  5min 27.0s
[                                        ] | 0% Completed |  5min 27.4s

[                                        ] | 0% Completed |  5min 28.3s
[                                        ] | 0% Completed |  5min 28.7s
[                                        ] | 0% Completed |  5min 29.0s
[                                        ] | 0% Completed |  5min 29.2s
[                                        ] | 0% Completed |  5min 29.5s

[                                        ] | 0% Completed |  5min 29.9s
[                                        ] | 0% Completed |  5min 30.2s
[                                        ] | 0% Completed |  5min 30.5s

[                                        ] | 0% Completed |  

[                                        ] | 0% Completed |  5min 37.0s
[                                        ] | 0% Completed |  5min 37.3s
[                                        ] | 0% Completed |  5min 37.5s
[                                        ] | 0% Completed |  5min 38.7s
[                                        ] | 0% Completed |  5min 38.9s
[                                        ] | 0% Completed |  5min 39.0s
[                                        ] | 0% Completed |  5min 39.1s

[                                        ] | 0% Completed |  5min 39.5s
[                                        ] | 0% Completed |  5min 39.6s
[                                        ] | 0% Completed |  5min 39.8s
[                                        ] | 0% Completed |  5min 40.1s
[                                        ] | 0% Completed |  5min 40.3s
[                                        ] | 0% Completed |  5min 40.6s
[                                        ] | 0% Completed |  5m

[                                        ] | 0% Completed |  5min 52.1s
[                                        ] | 0% Completed |  5min 52.6s

[                                        ] | 0% Completed |  5min 52.8s

[                                        ] | 0% Completed |  5min 53.2s
[                                        ] | 0% Completed |  5min 53.3s
[                                        ] | 0% Completed |  5min 53.7s
[                                        ] | 0% Completed |  5min 53.8s
[                                        ] | 0% Completed |  5min 54.4s
[                                        ] | 0% Completed |  5min 54.6s
[                                        ] | 0% Completed |  5min 55.6s
[                                        ] | 0% Completed |  5min 55.8s
[                                        ] | 0% Completed |  5min 56.0s
[                                        ] | 0% Completed |  5min 57.0s
[                                        ] | 0% Completed |  5



[                                        ] | 0% Completed |  6min  0.0s
[                                        ] | 0% Completed |  6min  0.2s
[                                        ] | 0% Completed |  6min  0.5s

[                                        ] | 0% Completed |  6min  0.7s
[                                        ] | 0% Completed |  6min  0.8s
[                                        ] | 0% Completed |  6min  1.0s
[                                        ] | 0% Completed |  6min  2.1s
[                                        ] | 0% Completed |  6min  2.6s
[                                        ] | 0% Completed |  6min  2.7s
[                                        ] | 0% Completed |  6min  2.9s
[                                        ] | 0% Completed |  6min  3.0s
[                                        ] | 0% Completed |  6min  3.3s
[                                        ] | 0% Completed |  6min  3.8s
[                                        ] | 0% Completed |  

[                                        ] | 0% Completed |  6min  7.9s
[                                        ] | 0% Completed |  6min  9.2s
[                                        ] | 0% Completed |  6min  9.7s
[                                        ] | 0% Completed |  6min 10.3s
[                                        ] | 0% Completed |  6min 10.5s
[                                        ] | 0% Completed |  6min 10.6s
[                                        ] | 0% Completed |  6min 10.8s
[                                        ] | 0% Completed |  6min 11.3s
[                                        ] | 0% Completed |  6min 11.6s
[                                        ] | 0% Completed |  6min 12.0s

[                                        ] | 0% Completed |  6min 12.1s
[                                        ] | 0% Completed |  6min 12.7s
[                                        ] | 0% Completed |  6min 13.0s
[                                        ] | 0% Completed |  6m

[                                        ] | 0% Completed |  6min 20.3s
[                                        ] | 0% Completed |  6min 20.5s
[                                        ] | 0% Completed |  6min 20.7s

[                                        ] | 0% Completed |  6min 20.8s
[                                        ] | 0% Completed |  6min 21.6s
[                                        ] | 0% Completed |  6min 22.0s
[                                        ] | 0% Completed |  6min 22.7s
[                                        ] | 0% Completed |  6min 22.8s
[                                        ] | 0% Completed |  6min 22.9s
[                                        ] | 0% Completed |  6min 23.5s
[                                        ] | 0% Completed |  6min 24.8s
[                                        ] | 0% Completed |  6min 25.1s
[                                        ] | 0% Completed |  6min 25.3s

[                                        ] | 0% Completed |  6


[                                        ] | 0% Completed |  6min 37.7s


[                                        ] | 0% Completed |  6min 37.8s

[                                        ] | 0% Completed |  6min 38.5s
[                                        ] | 0% Completed |  6min 39.0s


[                                        ] | 0% Completed |  6min 39.2s

[                                        ] | 0% Completed |  6min 39.3s
[                                        ] | 0% Completed |  6min 39.4s
[                                        ] | 0% Completed |  6min 39.8s
[                                        ] | 0% Completed |  6min 40.0s
[                                        ] | 0% Completed |  6min 40.2s
[                                        ] | 0% Completed |  6min 40.5s
[                                        ] | 0% Completed |  6min 40.7s

[                                        ] | 0% Completed |  6min 41.2s
[                                        ] | 0% Complete

[                                        ] | 0% Completed |  6min 44.1s
[                                        ] | 0% Completed |  6min 44.4s
[                                        ] | 0% Completed |  6min 45.5s
[                                        ] | 0% Completed |  6min 45.8s
[                                        ] | 0% Completed |  6min 46.2s
[                                        ] | 0% Completed |  6min 46.3s
[                                        ] | 0% Completed |  6min 46.8s
[                                        ] | 0% Completed |  6min 47.8s
[                                        ] | 0% Completed |  6min 48.5s
[                                        ] | 0% Completed |  6min 49.1s
[                                        ] | 0% Completed |  6min 49.9s

[                                        ] | 0% Completed |  6min 50.5s
[                                        ] | 0% Completed |  6min 50.8s
[                                        ] | 0% Completed |  6m

[                                        ] | 0% Completed |  6min 54.9s
[                                        ] | 0% Completed |  6min 55.4s
[                                        ] | 0% Completed |  6min 55.7s
[                                        ] | 0% Completed |  6min 55.8s
[                                        ] | 0% Completed |  6min 55.9s

[                                        ] | 0% Completed |  6min 56.0s
[                                        ] | 0% Completed |  6min 56.2s
[                                        ] | 0% Completed |  6min 56.6s
[                                        ] | 0% Completed |  6min 56.7s
[                                        ] | 0% Completed |  6min 57.1s

[                                        ] | 0% Completed |  6min 57.5s
[                                        ] | 0% Completed |  6min 57.8s
[                                        ] | 0% Completed |  6min 57.9s
[                                        ] | 0% Completed |  6

[                                        ] | 0% Completed |  7min  2.5s
[                                        ] | 0% Completed |  7min  2.8s
[                                        ] | 0% Completed |  7min  3.1s
[                                        ] | 0% Completed |  7min  3.2s
[                                        ] | 0% Completed |  7min  3.5s
[                                        ] | 0% Completed |  7min  3.7s
[                                        ] | 0% Completed |  7min  4.2s

[                                        ] | 0% Completed |  7min  4.3s
[                                        ] | 0% Completed |  7min  4.6s
[                                        ] | 0% Completed |  7min  4.7s
[                                        ] | 0% Completed |  7min  4.9s
[                                        ] | 0% Completed |  7min  5.0s
[                                        ] | 0% Completed |  7min  5.2s

[                                        ] | 0% Completed |  7

[                                        ] | 0% Completed |  7min  9.2s
[                                        ] | 0% Completed |  7min  9.9s
[                                        ] | 0% Completed |  7min 10.4s
[                                        ] | 0% Completed |  7min 10.9s
[                                        ] | 0% Completed |  7min 11.0s
[                                        ] | 0% Completed |  7min 11.2s
[                                        ] | 0% Completed |  7min 12.0s
[                                        ] | 0% Completed |  7min 12.1s
[                                        ] | 0% Completed |  7min 12.2s

[                                        ] | 0% Completed |  7min 12.3s
[                                        ] | 0% Completed |  7min 12.5s

[                                        ] | 0% Completed |  7min 12.9s

[                                        ] | 0% Completed |  7min 13.6s
[                                        ] | 0% Completed |  

[                                        ] | 0% Completed |  7min 18.1s
[                                        ] | 0% Completed |  7min 18.6s
[                                        ] | 0% Completed |  7min 18.9s

[                                        ] | 0% Completed |  7min 19.1s
[                                        ] | 0% Completed |  7min 19.2s
[                                        ] | 0% Completed |  7min 19.5s
[                                        ] | 0% Completed |  7min 19.8s
[                                        ] | 0% Completed |  7min 19.9s

[                                        ] | 0% Completed |  7min 20.1s
[                                        ] | 0% Completed |  7min 20.4s
[                                        ] | 0% Completed |  7min 21.0s
[                                        ] | 0% Completed |  7min 21.4s
[                                        ] | 0% Completed |  7min 21.7s
[                                        ] | 0% Completed |  7

[                                        ] | 0% Completed |  7min 27.7s

[                                        ] | 0% Completed |  7min 27.8s
[                                        ] | 0% Completed |  7min 28.1s
[                                        ] | 0% Completed |  7min 28.9s
[                                        ] | 0% Completed |  7min 29.2s
[                                        ] | 0% Completed |  7min 29.3s
[                                        ] | 0% Completed |  7min 29.6s
[                                        ] | 0% Completed |  7min 30.6s
[                                        ] | 0% Completed |  7min 30.7s
[                                        ] | 0% Completed |  7min 31.5s
[                                        ] | 0% Completed |  7min 31.9s
[                                        ] | 0% Completed |  7min 32.0s

[                                        ] | 0% Completed |  7min 32.7s
[                                        ] | 0% Completed |  7

[                                        ] | 0% Completed |  7min 39.8s
[                                        ] | 0% Completed |  7min 39.9s
[                                        ] | 0% Completed |  7min 40.0s
[                                        ] | 0% Completed |  7min 40.1s
[                                        ] | 0% Completed |  7min 40.7s
[                                        ] | 0% Completed |  7min 40.9s

[                                        ] | 0% Completed |  7min 41.5s
[                                        ] | 0% Completed |  7min 41.8s
[                                        ] | 0% Completed |  7min 42.8s
[                                        ] | 0% Completed |  7min 42.9s
[                                        ] | 0% Completed |  7min 43.2s
[                                        ] | 0% Completed |  7min 43.5s
[                                        ] | 0% Completed |  7min 43.8s
[                                        ] | 0% Completed |  7m

[                                        ] | 0% Completed |  7min 59.4s
[                                        ] | 0% Completed |  7min 59.6s
[                                        ] | 0% Completed |  8min  0.8s
[                                        ] | 0% Completed |  8min  1.8s
[                                        ] | 0% Completed |  8min  1.9s
[                                        ] | 0% Completed |  8min  2.1s
[                                        ] | 0% Completed |  8min  2.5s
[                                        ] | 0% Completed |  8min  2.8s
[                                        ] | 0% Completed |  8min  2.9s
[                                        ] | 0% Completed |  8min  3.0s
[                                        ] | 0% Completed |  8min  3.1s
[                                        ] | 0% Completed |  8min  4.9s
[                                        ] | 0% Completed |  8min  5.2s

[                                        ] | 0% Completed |  8m

[                                        ] | 0% Completed |  8min 15.0s

[                                        ] | 0% Completed |  8min 15.1s
[                                        ] | 0% Completed |  8min 15.3s
[                                        ] | 0% Completed |  8min 15.4s


[                                        ] | 0% Completed |  8min 15.7s
[                                        ] | 0% Completed |  8min 15.8s
[                                        ] | 0% Completed |  8min 16.3s
[                                        ] | 0% Completed |  8min 17.4s
[                                        ] | 0% Completed |  8min 17.8s
[                                        ] | 0% Completed |  8min 20.4s
[                                        ] | 0% Completed |  8min 21.1s
[                                        ] | 0% Completed |  8min 21.3s
[                                        ] | 0% Completed |  8min 21.6s
[                                        ] | 0% Completed |  

[                                        ] | 0% Completed |  8min 26.1s
[                                        ] | 0% Completed |  8min 26.8s
[                                        ] | 0% Completed |  8min 26.9s
[                                        ] | 0% Completed |  8min 27.0s
[                                        ] | 0% Completed |  8min 27.5s
[                                        ] | 0% Completed |  8min 27.7s
[                                        ] | 0% Completed |  8min 27.9s
[                                        ] | 0% Completed |  8min 29.9s

[                                        ] | 0% Completed |  8min 30.1s

[                                        ] | 0% Completed |  8min 30.4s
[                                        ] | 0% Completed |  8min 30.7s
[                                        ] | 0% Completed |  8min 31.6s
[                                        ] | 0% Completed |  8min 32.5s
[                                        ] | 0% Completed |  8

[                                        ] | 0% Completed |  8min 48.4s
[                                        ] | 0% Completed |  8min 49.2s
[                                        ] | 0% Completed |  8min 49.3s
[                                        ] | 0% Completed |  8min 49.6s

[                                        ] | 0% Completed |  8min 50.8s
[                                        ] | 0% Completed |  8min 50.9s
[                                        ] | 0% Completed |  8min 51.5s
[                                        ] | 0% Completed |  8min 51.8s
[                                        ] | 0% Completed |  8min 52.3s
[                                        ] | 0% Completed |  8min 52.4s
[                                        ] | 0% Completed |  8min 52.5s

[                                        ] | 0% Completed |  8min 52.6s
[                                        ] | 0% Completed |  8min 53.0s
[                                        ] | 0% Completed |  8

[                                        ] | 0% Completed |  9min  0.4s

[                                        ] | 0% Completed |  9min  1.7s
[                                        ] | 0% Completed |  9min  1.9s

[                                        ] | 0% Completed |  9min  2.7s
[                                        ] | 0% Completed |  9min  3.8s
[                                        ] | 0% Completed |  9min  3.9s
[                                        ] | 0% Completed |  9min  4.1s
[                                        ] | 0% Completed |  9min  4.2s
[                                        ] | 0% Completed |  9min  4.9s
[                                        ] | 0% Completed |  9min  5.2s
[                                        ] | 0% Completed |  9min  6.0s
[                                        ] | 0% Completed |  9min  7.0s
[                                        ] | 0% Completed |  9min  7.6s
[                                        ] | 0% Completed |  9

[                                        ] | 0% Completed |  9min 32.1s

[                                        ] | 0% Completed |  9min 33.4s
[                                        ] | 0% Completed |  9min 33.7s
[                                        ] | 0% Completed |  9min 34.2s
[                                        ] | 0% Completed |  9min 34.3s
[                                        ] | 0% Completed |  9min 35.2s

[                                        ] | 0% Completed |  9min 35.3s
[                                        ] | 0% Completed |  9min 35.5s
[                                        ] | 0% Completed |  9min 35.6s
[                                        ] | 0% Completed |  9min 36.1s
[                                        ] | 0% Completed |  9min 36.3s
[                                        ] | 0% Completed |  9min 36.8s
[                                        ] | 0% Completed |  9min 37.8s
[                                        ] | 0% Completed |  9

[#                                       ] | 3% Completed |  9min 51.0s

[#                                       ] | 3% Completed |  9min 51.5s

[#                                       ] | 3% Completed |  9min 53.9s

[#                                       ] | 3% Completed |  9min 54.6s
[#                                       ] | 3% Completed |  9min 54.9s
[##                                      ] | 5% Completed |  9min 55.5s
[##                                      ] | 5% Completed |  9min 56.0s
[##                                      ] | 7% Completed |  9min 56.3s
[##                                      ] | 7% Completed |  9min 57.1s
[##                                      ] | 7% Completed |  9min 58.6s
[##                                      ] | 7% Completed |  9min 59.6s
[##                                      ] | 7% Completed |  9min 59.8s
[##                                      ] | 7% Completed | 10min  0.0s
[##                                      ] | 7% Completed | 1

[####                                    ] | 11% Completed | 10min  7.8s

[####                                    ] | 11% Completed | 10min  8.2s
[####                                    ] | 11% Completed | 10min  8.7s
[####                                    ] | 11% Completed | 10min  9.0s
[####                                    ] | 11% Completed | 10min  9.8s
[####                                    ] | 11% Completed | 10min  9.9s
[#####                                   ] | 12% Completed | 10min 11.6s
[#####                                   ] | 14% Completed | 10min 11.8s
[#####                                   ] | 14% Completed | 10min 12.1s
[#####                                   ] | 14% Completed | 10min 12.8s
[#####                                   ] | 14% Completed | 10min 13.3s
[#####                                   ] | 14% Completed | 10min 13.6s
[#####                                   ] | 14% Completed | 10min 14.9s
[#####                                   ] | 14% C

[#####                                   ] | 14% Completed | 10min 39.7s
[#####                                   ] | 14% Completed | 10min 39.9s
[#####                                   ] | 14% Completed | 10min 40.2s
[#####                                   ] | 14% Completed | 10min 41.8s
[#####                                   ] | 14% Completed | 10min 42.5s
[#####                                   ] | 14% Completed | 10min 43.4s
[#####                                   ] | 14% Completed | 10min 46.0s
[#####                                   ] | 14% Completed | 10min 46.2s
[#####                                   ] | 14% Completed | 10min 47.8s
[#####                                   ] | 14% Completed | 10min 48.0s

[#####                                   ] | 14% Completed | 10min 48.7s
[#####                                   ] | 14% Completed | 10min 49.0s
[#####                                   ] | 14% Completed | 10min 49.1s
[#####                                   ] | 14% C

[#####                                   ] | 14% Completed | 10min 58.7s
[#####                                   ] | 14% Completed | 10min 58.9s
[#####                                   ] | 14% Completed | 10min 59.2s
[#####                                   ] | 14% Completed | 11min  3.3s
[#####                                   ] | 14% Completed | 11min  3.4s
[#####                                   ] | 14% Completed | 11min  3.6s
[#####                                   ] | 14% Completed | 11min  4.4s
[#####                                   ] | 14% Completed | 11min  8.4s
[#####                                   ] | 14% Completed | 11min  9.4s
[#####                                   ] | 14% Completed | 11min 10.7s
[#####                                   ] | 14% Completed | 11min 10.8s

[#####                                   ] | 14% Completed | 11min 11.0s
[#####                                   ] | 14% Completed | 11min 11.2s
[#####                                   ] | 14% C

[#####                                   ] | 14% Completed | 11min 21.0s
[#####                                   ] | 14% Completed | 11min 21.2s
[#####                                   ] | 14% Completed | 11min 21.6s
[######                                  ] | 16% Completed | 11min 22.5s
[######                                  ] | 16% Completed | 11min 22.7s
[######                                  ] | 16% Completed | 11min 22.9s
[######                                  ] | 16% Completed | 11min 23.7s
[######                                  ] | 16% Completed | 11min 23.9s
[#######                                 ] | 18% Completed | 11min 24.7s
[#######                                 ] | 18% Completed | 11min 24.8s
[#######                                 ] | 18% Completed | 11min 25.2s
[#######                                 ] | 18% Completed | 11min 25.3s
[#######                                 ] | 18% Completed | 11min 25.4s
[#######                                 ] | 18% Co

[#######                                 ] | 18% Completed | 11min 33.6s
[#######                                 ] | 18% Completed | 11min 34.0s
[#######                                 ] | 18% Completed | 11min 34.3s
[#######                                 ] | 18% Completed | 11min 35.3s
[#######                                 ] | 18% Completed | 11min 35.5s
[#######                                 ] | 18% Completed | 11min 36.2s
[#######                                 ] | 18% Completed | 11min 36.6s
[#######                                 ] | 18% Completed | 11min 36.9s

[#######                                 ] | 18% Completed | 11min 37.0s
[#######                                 ] | 18% Completed | 11min 37.1s
[#######                                 ] | 18% Completed | 11min 37.2s

[#######                                 ] | 18% Completed | 11min 37.3s
[#######                                 ] | 18% Completed | 11min 37.4s
[#######                                 ] | 18% 

[#######                                 ] | 18% Completed | 11min 44.5s
[#######                                 ] | 18% Completed | 11min 44.8s
[#######                                 ] | 18% Completed | 11min 45.0s
[#######                                 ] | 18% Completed | 11min 45.5s
[#######                                 ] | 18% Completed | 11min 46.4s
[#######                                 ] | 18% Completed | 11min 46.5s
[#######                                 ] | 18% Completed | 11min 46.6s
[#######                                 ] | 18% Completed | 11min 47.3s
[#######                                 ] | 18% Completed | 11min 47.5s
[#######                                 ] | 18% Completed | 11min 48.2s
[#######                                 ] | 18% Completed | 11min 48.4s
[#######                                 ] | 18% Completed | 11min 48.9s

[#######                                 ] | 18% Completed | 11min 50.7s
[#######                                 ] | 18% C

[#######                                 ] | 18% Completed | 11min 55.9s
[#######                                 ] | 18% Completed | 11min 56.1s
[#######                                 ] | 18% Completed | 11min 56.8s

[#######                                 ] | 18% Completed | 11min 57.4s
[#######                                 ] | 18% Completed | 11min 57.8s
[#######                                 ] | 18% Completed | 11min 57.9s

[#######                                 ] | 18% Completed | 11min 58.2s
[#######                                 ] | 18% Completed | 11min 58.6s
[#######                                 ] | 18% Completed | 11min 58.9s
[#######                                 ] | 18% Completed | 11min 59.2s
[#######                                 ] | 18% Completed | 11min 59.8s

[#######                                 ] | 18% Completed | 11min 59.9s
[#######                                 ] | 18% Completed | 12min  0.2s
[#######                                 ] | 18%

[########                                ] | 20% Completed | 12min 19.4s
[########                                ] | 20% Completed | 12min 20.1s
[########                                ] | 20% Completed | 12min 20.2s
[########                                ] | 22% Completed | 12min 21.3s
[########                                ] | 22% Completed | 12min 21.5s
[########                                ] | 22% Completed | 12min 22.0s
[########                                ] | 22% Completed | 12min 22.6s
[########                                ] | 22% Completed | 12min 23.2s
[########                                ] | 22% Completed | 12min 23.6s
[########                                ] | 22% Completed | 12min 25.4s
[########                                ] | 22% Completed | 12min 25.7s

[########                                ] | 22% Completed | 12min 26.0s
[########                                ] | 22% Completed | 12min 26.2s

[########                                ] | 22% 

[########                                ] | 22% Completed | 12min 33.3s
[########                                ] | 22% Completed | 12min 34.5s
[########                                ] | 22% Completed | 12min 34.6s
[########                                ] | 22% Completed | 12min 34.7s
[########                                ] | 22% Completed | 12min 34.9s
[########                                ] | 22% Completed | 12min 35.1s
[########                                ] | 22% Completed | 12min 35.4s
[#########                               ] | 24% Completed | 12min 38.1s
[#########                               ] | 24% Completed | 12min 38.3s
[#########                               ] | 24% Completed | 12min 38.7s
[#########                               ] | 24% Completed | 12min 38.8s
[#########                               ] | 24% Completed | 12min 39.3s
[#########                               ] | 24% Completed | 12min 39.5s
[##########                              ] | 25% Co

[#############                           ] | 33% Completed | 12min 59.1s
[#############                           ] | 33% Completed | 12min 59.3s
[#############                           ] | 33% Completed | 12min 59.6s

[#############                           ] | 33% Completed | 12min 60.0s
[#############                           ] | 33% Completed | 13min  0.2s
[#############                           ] | 33% Completed | 13min  2.6s
[#############                           ] | 33% Completed | 13min  2.9s
[#############                           ] | 33% Completed | 13min  3.4s
[#############                           ] | 33% Completed | 13min  4.2s
[#############                           ] | 33% Completed | 13min  5.3s
[#############                           ] | 33% Completed | 13min  5.7s
[#############                           ] | 33% Completed | 13min  7.0s
[#############                           ] | 33% Completed | 13min  7.3s
[#############                           ] | 33% C

[################                        ] | 40% Completed | 13min 37.2s
[################                        ] | 40% Completed | 13min 37.3s
[################                        ] | 40% Completed | 13min 37.7s
[################                        ] | 40% Completed | 13min 38.2s
[################                        ] | 40% Completed | 13min 40.5s
[################                        ] | 40% Completed | 13min 40.9s
[################                        ] | 40% Completed | 13min 41.1s
[################                        ] | 40% Completed | 13min 41.8s
[################                        ] | 40% Completed | 13min 42.3s
[################                        ] | 40% Completed | 13min 42.6s
[################                        ] | 40% Completed | 13min 42.7s
[################                        ] | 40% Completed | 13min 43.4s
[################                        ] | 40% Completed | 13min 45.0s
[################                        ] | 40% Co

[################                        ] | 40% Completed | 13min 58.4s
[################                        ] | 40% Completed | 13min 59.0s
[################                        ] | 40% Completed | 13min 59.4s
[################                        ] | 40% Completed | 14min  1.0s
[################                        ] | 40% Completed | 14min  1.3s
[################                        ] | 40% Completed | 14min  1.7s
[################                        ] | 40% Completed | 14min  2.1s
[#################                       ] | 44% Completed | 14min  2.9s
[#################                       ] | 44% Completed | 14min  4.6s
[#################                       ] | 44% Completed | 14min  7.6s
[#################                       ] | 44% Completed | 14min  8.2s
[#################                       ] | 44% Completed | 14min  9.2s
[#################                       ] | 44% Completed | 14min  9.7s
[#################                       ] | 44% Co

[#################                       ] | 44% Completed | 14min 18.0s

[#################                       ] | 44% Completed | 14min 18.2s
[#################                       ] | 44% Completed | 14min 18.4s
[#################                       ] | 44% Completed | 14min 18.5s
[#################                       ] | 44% Completed | 14min 18.6s

[#################                       ] | 44% Completed | 14min 18.8s
[#################                       ] | 44% Completed | 14min 19.9s
[#################                       ] | 44% Completed | 14min 21.1s
[#################                       ] | 44% Completed | 14min 21.9s
[#################                       ] | 44% Completed | 14min 23.5s
[#################                       ] | 44% Completed | 14min 23.6s
[#################                       ] | 44% Completed | 14min 24.9s
[#################                       ] | 44% Completed | 14min 25.2s

[#################                       ] | 44%

[#################                       ] | 44% Completed | 14min 34.8s
[##################                      ] | 46% Completed | 14min 38.0s
[##################                      ] | 46% Completed | 14min 38.1s
[##################                      ] | 46% Completed | 14min 38.2s
[##################                      ] | 46% Completed | 14min 38.3s
[##################                      ] | 46% Completed | 14min 38.6s
[###################                     ] | 48% Completed | 14min 39.1s
[###################                     ] | 48% Completed | 14min 40.1s
[###################                     ] | 48% Completed | 14min 40.7s
[###################                     ] | 48% Completed | 14min 42.1s
[###################                     ] | 48% Completed | 14min 42.3s
[###################                     ] | 48% Completed | 14min 43.8s
[###################                     ] | 48% Completed | 14min 44.2s
[###################                     ] | 48% Co

[####################                    ] | 51% Completed | 15min  1.1s
[####################                    ] | 51% Completed | 15min  2.0s
[####################                    ] | 51% Completed | 15min  2.8s
[####################                    ] | 51% Completed | 15min  3.8s
[####################                    ] | 51% Completed | 15min  3.9s
[####################                    ] | 51% Completed | 15min  5.1s
[####################                    ] | 51% Completed | 15min  5.7s
[####################                    ] | 51% Completed | 15min  5.8s
[#####################                   ] | 53% Completed | 15min  6.4s
[#####################                   ] | 53% Completed | 15min  6.6s
[######################                  ] | 55% Completed | 15min 10.3s
[######################                  ] | 57% Completed | 15min 11.3s
[#######################                 ] | 59% Completed | 15min 14.6s
[#######################                 ] | 59% Co

[#########################               ] | 62% Completed | 15min 43.4s
[#########################               ] | 62% Completed | 15min 44.7s
[#########################               ] | 62% Completed | 15min 45.2s
[#########################               ] | 62% Completed | 15min 46.0s
[#########################               ] | 62% Completed | 15min 46.9s
[#########################               ] | 62% Completed | 15min 48.7s
[#########################               ] | 62% Completed | 15min 49.3s

[#########################               ] | 62% Completed | 15min 49.6s
[#########################               ] | 62% Completed | 15min 53.3s
[#########################               ] | 62% Completed | 15min 58.5s
[#########################               ] | 62% Completed | 16min  0.1s
[#########################               ] | 62% Completed | 16min  0.6s
[#########################               ] | 62% Completed | 16min  1.5s
[#########################               ] | 62% C

[##########################              ] | 66% Completed | 16min 37.2s
[##########################              ] | 66% Completed | 16min 37.6s
[##########################              ] | 66% Completed | 16min 38.8s
[###########################             ] | 68% Completed | 16min 43.9s
[###########################             ] | 68% Completed | 16min 45.8s
[###########################             ] | 68% Completed | 16min 47.1s
[############################            ] | 72% Completed | 16min 51.5s
[############################            ] | 72% Completed | 16min 51.8s
[############################            ] | 72% Completed | 16min 55.3s
[#############################           ] | 74% Completed | 17min  0.9s
[#############################           ] | 74% Completed | 17min  4.3s
[#############################           ] | 74% Completed | 17min  6.1s
[#############################           ] | 74% Completed | 17min  6.7s
[#############################           ] | 74% Co

```
2019-04-26 10:23:50,395 - pyscenic.cli.pyscenic - INFO - Creating modules.
2019-04-26 10:23:56,229 - pyscenic.cli.pyscenic - INFO - Loading expression matrix.
2019-04-26 10:29:09,101 - pyscenic.cli.pyscenic - INFO - Loading databases.
2019-04-26 10:29:09,101 - pyscenic.cli.pyscenic - INFO - Calculating regulons.
2019-04-26 11:11:10,784 - pyscenic.cli.pyscenic - INFO - Writing results to file
```

The results is a list of enriched motifs for the modules.

| Column name | Description |
| ----------- | ----------- |
| TF | Transcription Factor (TF) for which an enriched motif is discovered. |
| motifID | The identifier of the enriched motif. |
| AUC | Area Under the recovery Curve statistic for this enriched motif. |
| NES | Normalized Enrichment Score for this enriched motif. |
| Context | Collection of tags clarifying the origin of the module for this factor: e.g. ranking database, ... |
| Annotation | Verbose description of the annotation available for this motif. |
| MotifSimilarityQvalue | The TomTom derived Q-value for motif similarity (if used for assigning the factor to this enriched motif). |
| OrthologousIdentity | The Amino Acid Identity between factors (if used for assigning the factor to this enriched motif). |
| RankAtMax | The position of the Leading Edge which is used as a threshold on the whole genome ranking of the motif to decide if a gene in the input is a direct target of a TF that binds this motif. |
| TargetGenes | A list of pairs: genes and their associated weights from GENIE3/GRNBoost2. |

In [12]:
df_motifs = load_motifs(MOTIFS_FNAME)

In [13]:
df_motifs.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Enrichment,Enrichment,Enrichment,Enrichment,Enrichment,Enrichment,Enrichment,Enrichment
Unnamed: 0_level_1,Unnamed: 1_level_1,AUC,NES,MotifSimilarityQvalue,OrthologousIdentity,Annotation,Context,TargetGenes,RankAtMax
TF,MotifID,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
ARNT,transfac_pro__M07637,0.064583,3.000286,0.000158,1.0,gene is annotated for similar motif cisbp__M29...,"(weight>75.0%, hg38__refseq-r80__10kb_up_and_d...","[(RNF115, 1.0415439162281963), (CAMK2D, 2.3980...",462
ATF1,dbcorrdb__POLR2AphosphoS2__ENCSR000ECT_1__m1,0.086429,5.954049,0.000646,1.0,motif similar to dbcorrdb__ATF1__ENCSR000DNZ_1...,"(weight>75.0%, hg38__refseq-r80__10kb_up_and_d...","[(TRMT112, 4.55877743796283), (RTFDC1, 0.76266...",4307
ATF1,dbcorrdb__BRCA1__ENCSR000EDB_1__m1,0.059128,3.422009,0.000112,1.0,gene is annotated for similar motif dbcorrdb__...,"(weight>75.0%, hg38__refseq-r80__10kb_up_and_d...","[(WDR77, 1.3201455121052337), (ATG3, 2.0067897...",1430
ATF3,transfac_pro__M08931,0.090273,3.682984,7e-06,1.0,gene is annotated for similar motif homer__DAT...,"(weight>75.0%, hg38__refseq-r80__10kb_up_and_d...","[(EMP3, 4.909643910710178), (JUNB, 0.905215986...",304
ATF3,taipale_cyt_meth__XBP1_NRTGACGTCAYN_FL,0.099599,4.34671,0.0,1.0,motif similar to taipale_cyt_meth__ATF3_NRTGAY...,"(weight>75.0%, hg38__refseq-r80__10kb_up_and_d...","[(UBE2H, 5.350288172728071), (DUSP1, 6.6559829...",1739


Display the enriched motifs with their associated sequence logos.

In [14]:
display_logos(df_motifs.head())



Unnamed: 0_level_0,Unnamed: 1_level_0,Enrichment,Enrichment,Enrichment,Enrichment,Enrichment,Enrichment,Enrichment,Enrichment,Enrichment
Unnamed: 0_level_1,Unnamed: 1_level_1,AUC,NES,MotifSimilarityQvalue,OrthologousIdentity,Annotation,Context,TargetGenes,RankAtMax,MotifLogo
TF,MotifID,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
ARNT,transfac_pro__M07637,0.064583,3.000286,0.000158,1.0,"gene is annotated for similar motif cisbp__M2957 ('ARNT[gene ID: ""ENSG00000143437"" species: ""Homo sapiens"" TF status: ""direct"" TF family: ""bHLH"" DBDs: ""HLH""]'; q-value = 0.000158)","(weight>75.0%, hg38__refseq-r80__10kb_up_and_down_tss.mc9nr.genes_vs_motifs.rankings, activating)","[(OSBPL8, 0.507470363077843), (FOXP1, 0.5640840851535086), (ZNF608, 0.5985618354581687)]",462,
ATF1,dbcorrdb__POLR2AphosphoS2__ENCSR000ECT_1__m1,0.086429,5.954049,0.000646,1.0,"motif similar to dbcorrdb__ATF1__ENCSR000DNZ_1__m2 ('ATF1 (ENCSR000DNZ-1, motif 2)'; q-value = 0.000646) which is directly annotated","(weight>75.0%, hg38__refseq-r80__10kb_up_and_down_tss.mc9nr.genes_vs_motifs.rankings, activating)","[(WDR1, 0.4868600077288404), (TXN2, 0.4953049679210165), (GNB2, 0.4965805529424902)]",4307,
ATF1,dbcorrdb__BRCA1__ENCSR000EDB_1__m1,0.059128,3.422009,0.000112,1.0,"gene is annotated for similar motif dbcorrdb__ATF1__ENCSR000DNZ_1__m1 ('ATF1 (ENCSR000DNZ-1, motif 1)'; q-value = 0.000112)","(weight>75.0%, hg38__refseq-r80__10kb_up_and_down_tss.mc9nr.genes_vs_motifs.rankings, activating)","[(GNB2, 0.4965805529424902), (VDAC3, 0.6197368761566505), (PRDX1, 0.6378873997967587)]",1430,
ATF3,transfac_pro__M08931,0.090273,3.682984,7e-06,1.0,gene is annotated for similar motif homer__DATGASTCATHN_Atf3 ('Atf3(bZIP)/GBM-ATF3-ChIP-Seq(GSE33912)/Homer'; q-value = 6.91e-06),"(weight>75.0%, hg38__refseq-r80__10kb_up_and_down_tss.mc9nr.genes_vs_motifs.rankings, activating)","[(MAPK6, 0.5367071712988085), (ZBTB7A, 0.6153846663459299), (TNFRSF12A, 0.7695044720160424)]",304,
ATF3,taipale_cyt_meth__XBP1_NRTGACGTCAYN_FL,0.099599,4.34671,0.0,1.0,motif similar to taipale_cyt_meth__ATF3_NRTGAYGTCAYN_eDBD ('ATF3 [bZIP]'; q-value = 3.5e-08) which is directly annotated,"(weight>75.0%, hg38__refseq-r80__10kb_up_and_down_tss.mc9nr.genes_vs_motifs.rankings, activating)","[(C6orf106, 0.4896699595115849), (BTG2, 0.4968094331879406), (SYNGR3, 0.5356063346964915)]",1739,


### STEP 4: Cellular enrichment aka AUCell

__REGULON CREATION__

Regulons can easily be created from this list of enriched motifs via `pyscenic.transform.df2regulons`. Here we provide an auxilliary function to carefully select the enriched motifs that contribute to the regulons.

In [31]:
def derive_regulons(motifs, db_names=('hg38__refseq-r80__10kb_up_and_down_tss.mc9nr.genes_vs_motifs.rankings.feather',
                        'hg38__refseq-r80__500bp_up_and_100bp_down_tss.mc9nr.genes_vs_motifs.rankings.feather')):
    motifs.columns = motifs.columns.droplevel(0)

    def contains(*elems):
        def f(context):
            return any(elem in context for elem in elems)
        return f

    # For the creation of regulons we only keep the 10-species databases and the activating modules. We also remove the
    # enriched motifs for the modules that were created using the method 'weight>50.0%' (because these modules are not part
    # of the default settings of modules_from_adjacencies anymore.
    motifs = motifs[
        np.fromiter(map(compose(op.not_, contains('weight>50.0%')), motifs.Context), dtype=np.bool) & \
       # np.fromiter(map(contains(*db_names), motifs.Context), dtype=np.bool) & \
        np.fromiter(map(contains('activating'), motifs.Context), dtype=np.bool)]

    # We build regulons only using enriched motifs with a NES of 1.0 or higher; 
    #we take only directly annotated TFs or TF annotated
    # for an orthologous gene into account; and we only keep regulons with at least 1 genes.
    regulons = list(filter(lambda r: len(r) >= 10, df2regulons(motifs[(motifs['NES'] >= 3.0) 
                                                                      & ((motifs['Annotation'] == 'gene is directly annotated')
                                                                        | (motifs['Annotation'].str.startswith('gene is orthologous to')
                                                                           & motifs['Annotation'].str.endswith('which is directly annotated for motif')))
                                                                     ])))
    
    # Rename regulons, i.e. remove suffix.
    return list(map(lambda r: r.rename(r.transcription_factor), regulons))

In [32]:
regulons = derive_regulons(df_motifs)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  


Create regulons from a dataframe of enriched features.
Additional columns saved: []


In [15]:
import pyscenic

In [16]:
regulons= pyscenic.transform.df2regulons(df_motifs)

Create regulons from a dataframe of enriched features.
Additional columns saved: []


In [17]:
len(regulons)

481

In [18]:
# Pickle these regulons.
with open(REGULONS_DAT_FNAME, 'wb') as f:
    pickle.dump(regulons, f)

In [52]:

data = pickle.load(open(REGULONS_DAT_FNAME, 'rb'))
fileout=open(REGULONS_DAT_FNAME+'.txt',"w")
for d in data:
    tf, gene_d = (d.name, d.gene2weight)
    for gene in gene_d:
        print("%s\t%s\t%s"%(tf, gene, gene_d[gene]))
        fileout.write("%s\t%s\t%s"%(tf, gene, gene_d[gene])+"\n")
fileout.close()

ARID3A	MARCH2	0.8501922425057341
ARID3A	RPS27	0.459157723563259
ARID3A	RAB13	0.7071241961090853
ARID3A	POU3F1	0.7881673338096377
ARID3A	KDM4B	1.092231902568085
ARID3A	RPL6	0.6228309664018286
ARID3A	CDK17	1.524732327508222
ARID3A	CCND2	1.0717372171595554
ARID3A	CD63	1.73205366637134
ARID3A	ANKRD11	1.365260763774328
ARID3A	ISOC2	2.1929861687036007
ARID3A	RPS10	1.9731570535461556
ARID3A	RPS10-NUDT3	3.9970761751955535
ARID3A	EIF4EBP1	0.5871821539338136
ARID3A	SLC4A11	0.8663659154496692
ARID3A	SYTL3	1.6098124342660285
ARID3A	TSPO	3.046936821454415
ARID3A	NME1-NME2	1.2658703917477647
ARID3A	FMNL2	0.5804306510499073
ARID3A	LINC01003	2.062921627086009
ARID3A	BCL2L1	0.417076605289418
ARID3A	PTMS	5.041378395441847
ARID3A	RAB27A	0.6312109284773404
ARID3A	CXXC5	2.8077910764615623
ARID3A	TRIM73	0.5294512506391859
ARID3A	SSU72	0.6131699945646564
ARID3A	BTG1	1.2375858657340768
ARID3A	PLS3	0.8713121230495252
ARID3A	GADD45B	0.4835014232291533
ARID3A	ANKRD9	0.3990743284929671
ARID3A	ZBTB7A	12.7891072030

IRF5	CBFA2T3	10.70451849603252
IRF5	TBX21	11.749399895743954
IRF5	IDH2	1.0415447925580632
IRF5	GAB2	3.907831650500432
IRF5	SLC25A22	0.4004363637919662
IRF5	TM9SF2	0.3760699937300538
IRF5	STAT5A	4.1225247184271625
IRF5	CSK	2.3514471480705845
IRF5	PSMB9	2.972328042479124
IRF5	C19orf66	3.412386067587037
IRF5	EPSTI1	4.43980483088305
IRF5	TLE1	0.5553570603725351
IRF5	MX1	0.675114842474956
IRF5	TNFRSF1B	5.964444472486042
IRF5	CCND1	0.6995688808776473
IRF5	CARD17	0.6333497302839508
IRF5	ACSS1	1.3690812230332423
IRF5	CASK	0.532662641762968
IRF5	H3F3A	1.750013896533498
IRF5	IFIH1	1.073389882060213
IRF5	INTS6L	0.4295175619028591
IRF5	ZEB2	0.3841176702812622
IRF5	LSM6	0.6367958908483509
IRF5	USP15	0.7585527000779861
IRF5	MESDC1	0.6105704387890525
IRF5	SETBP1	0.3688008247269533
IRF5	SMIM14	0.5411796650371486
IRF5	IL6R	0.9887989682224264
IRF5	PHACTR1	1.283753022132302
IRF5	JADE2	0.3738528540638134
IRF5	RXRA	0.7197797808438892
IRF5	STAMBPL1	1.706835869915308
IRF5	ETV6	0.7358279174232496
IRF5	RHOF	32

__AUCELL__

In [52]:
df_tpm = pd.read_csv(EXP_MTX_QC_FNAME, index_col=0)
df_tpm

Unnamed: 0_level_0,AL627309.1,AL669831.5,FAM87B,LINC00115,FAM41C,SAMD11,NOC2L,KLHL17,PLEKHN1,AL645608.8,...,AP002448.1,AC113398.2,CLCA4-AS1,LINC02050,LINC01953,AC009313.1,AC091769.2,AP002815.1,AL391152.1,SMIM40
cellID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
COAD14B_P_AAAGATGCAAGCGCTC-1,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
COAD14B_P_ATTTCTGTCCGCGTTT-1,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
COAD14B_P_CACAGTATCGAGGTAG-1,0.0,0.0,0.0,0.0,0.0,0.0,0.451142,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
COAD14B_P_CCGTTCACACCCTATC-1,0.0,0.0,0.0,0.0,0.0,0.0,0.632082,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
COAD14B_P_CGGGTCACAGCTGTTA-1,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
THYM1.TTTATGCTCTGCGACG-1_1_1,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
THYM1.TTTCCTCGTAAACACA-1_1_1,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
THYM1.TTTCCTCGTCAGATAA-1_1_1,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
THYM1.TTTGCGCAGCCCAACC-1_1_1,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [23]:
adata=sc.read_h5ad("objTN221115.h5ad")

In [24]:
sc.pp.normalize_total(adata)
adata

AnnData object with n_obs × n_vars = 342900 × 29331
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'cellid', 'dataid', 'patient', 'site', 'type', 'cancer', 'treatment', 'celltype_raw_l1', 'celltype_raw_l2', 'nCount_SCT', 'nFeature_SCT', 'meta.cluster', 'celltype', 'percent.mt', 'DIG.Score1', 'S.Score', 'G2M.Score', 'Phase', 'RNA_snn_res.1', 'seurat_clusters', 'RNA_snn_res.1.5', 'celltypeNew', 'celltypeN', 'RNA_snn_res.0.6', 'RNA_snn_res.0.8', 'RNA_snn_res.0.5', 'RNA_snn_res.2', 'B_signature', 'celltype_l1', 'BCR', 'barcode', 'celltype_l2', 'RNA_snn_res.2.5', 'celltype_l5', 'celltype_l6', 'celltype_l7', 'celltype_l3', 'celltype_l4'
    var: '_index', 'features'

In [25]:
sc.pp.subsample(adata,n_obs=200000)

In [26]:
adata

AnnData object with n_obs × n_vars = 200000 × 29331
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'cellid', 'dataid', 'patient', 'site', 'type', 'cancer', 'treatment', 'celltype_raw_l1', 'celltype_raw_l2', 'nCount_SCT', 'nFeature_SCT', 'meta.cluster', 'celltype', 'percent.mt', 'DIG.Score1', 'S.Score', 'G2M.Score', 'Phase', 'RNA_snn_res.1', 'seurat_clusters', 'RNA_snn_res.1.5', 'celltypeNew', 'celltypeN', 'RNA_snn_res.0.6', 'RNA_snn_res.0.8', 'RNA_snn_res.0.5', 'RNA_snn_res.2', 'B_signature', 'celltype_l1', 'BCR', 'barcode', 'celltype_l2', 'RNA_snn_res.2.5', 'celltype_l5', 'celltype_l6', 'celltype_l7', 'celltype_l3', 'celltype_l4'
    var: '_index', 'features'

In [27]:
df_tpm=adata.X.todense()
df_tpm=pd.DataFrame(df_tpm)

In [28]:
df_tpm.index=adata.obs.index
df_tpm.columns=adata.var.features
df_tpm

features,AL627309.1,AL669831.5,FAM87B,LINC00115,FAM41C,SAMD11,NOC2L,KLHL17,PLEKHN1,AL645608.8,...,AC012146.3,AC002347.1,AC015818.2,AC018521.7,AP001793.2,AC106037.2,AL357078.3,AC110048.2,AC093525.1,AC138932.2
PCall_CESC3_T_GAGCAGACATACTACG-1_3,0.0,0.0,0.0,0.0,0.0,0.0,0.118219,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PCall_LC10B_T_AGCGGTCCATAGGATA-1_3,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PCall_CESC4_T_GACGTTATCTAGAGTC-1_3,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PCall_OV3_T_ATCCGAAAGGCAATTA-1_3,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
OA018_P23_T_P23T-I-GACACGCGTCCTCTTG,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
LC12.TTGTAGGAGATACACA-1_1_1,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
OA008_P019_T_CAGTCCTAGTACACCT.Pre_P019_t,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CRLM_P17_Colon_P_CTGCATCGTTCATCGA-1,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
RCC5.TTATGCTTCGTTACGA-1_2_1,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [29]:
%%time
auc_mtx = aucell(df_tpm, regulons, num_workers=2)
auc_mtx.to_csv("./results/auc_allcell.csv")

CPU times: user 5min 59s, sys: 2min 4s, total: 8min 3s
Wall time: 18min 26s


In [56]:
%%time
auc_mtx = aucell(df_tpm, regulons, num_workers=12)
auc_mtx.to_csv(AUCELL_MTX_FNAME)

CPU times: user 24.7 s, sys: 7.61 s, total: 32.3 s
Wall time: 48 s


```
CPU times: user 23.1 s, sys: 10.7 s, total: 33.8 s
Wall time: 39.2 s
```

In [13]:
auc_mtx = pd.read_csv(AUCELL_MTX_FNAME, index_col=0)

In [14]:
auc_mtx

Unnamed: 0_level_0,ADNP(+),AHR(+),ARID3A(+),ARNT(+),ARNT2(+),ARNTL(+),ARNTL2(+),ASCL2(+),ATF1(+),ATF2(+),...,ZNF835(+),ZNF84(+),ZNF841(+),ZNF85(+),ZNF878(+),ZNF91(+),ZSCAN12(+),ZSCAN20(+),ZSCAN23(+),ZXDA(+)
Cell,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
COAD14B_P_AAAGATGCAAGCGCTC-1,0.022967,0.051921,0.005078,0.081324,0.000547,0.013683,0.068882,0.0,0.060447,0.575311,...,0.0,0.018715,0.263556,0.000000,0.000000,0.150416,0.155128,0.0,0.000000,0.00000
COAD14B_P_ATTTCTGTCCGCGTTT-1,0.000000,0.054397,0.068345,0.032729,0.031555,0.042554,0.000000,0.0,0.046625,0.000000,...,0.0,0.038299,0.000000,0.000000,0.000000,0.093445,0.000000,0.0,0.000000,0.15359
COAD14B_P_CACAGTATCGAGGTAG-1,0.000000,0.002266,0.025809,0.016231,0.001634,0.057467,0.028010,0.0,0.042727,0.140320,...,0.0,0.000000,0.126259,0.000000,0.000000,0.110411,0.000000,0.0,0.000000,0.00000
COAD14B_P_CCGTTCACACCCTATC-1,0.000000,0.049241,0.089378,0.011352,0.047711,0.027040,0.000000,0.0,0.061852,0.000000,...,0.0,0.082745,0.013354,0.000000,0.000000,0.037306,0.000000,0.0,0.087206,0.00000
COAD14B_P_CGGGTCACAGCTGTTA-1,0.000000,0.045117,0.030392,0.032768,0.007898,0.037202,0.029704,0.0,0.064909,0.000000,...,0.0,0.011372,0.067616,0.000000,0.000000,0.083560,0.000000,0.0,0.000000,0.00000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
THYM1.TTTATGCTCTGCGACG-1_1_1,0.000000,0.046485,0.023797,0.077468,0.000386,0.047397,0.000000,0.0,0.033361,0.000000,...,0.0,0.050028,0.062345,0.000000,0.000000,0.047504,0.000000,0.0,0.000000,0.00000
THYM1.TTTCCTCGTAAACACA-1_1_1,0.000000,0.005182,0.034181,0.064719,0.007796,0.031778,0.000000,0.0,0.038065,0.000000,...,0.0,0.000000,0.129784,0.125219,0.000000,0.047425,0.000000,0.0,0.000000,0.00000
THYM1.TTTCCTCGTCAGATAA-1_1_1,0.000000,0.087400,0.012718,0.043877,0.000723,0.063868,0.000000,0.0,0.078698,0.000000,...,0.0,0.045881,0.000000,0.000000,0.000000,0.109744,0.000000,0.0,0.000000,0.00000
THYM1.TTTGCGCAGCCCAACC-1_1_1,0.000000,0.000000,0.028467,0.023223,0.000811,0.055726,0.000000,0.0,0.047567,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.054042,0.000000,0.0,0.000000,0.00000


__CELL TYPE SPECIFIC REGULATORS - RSS__

In [24]:
auc_mtx=pd.read_csv("./results/auc_allcell.csv",index_col=0)

In [25]:
auc_mtx

Unnamed: 0_level_0,ADNP(+),AHR(+),ARID3A(+),ARNT(+),ARNT2(+),ARNTL(+),ARNTL2(+),ASCL2(+),ATF1(+),ATF2(+),...,ZNF835(+),ZNF84(+),ZNF841(+),ZNF85(+),ZNF878(+),ZNF91(+),ZSCAN12(+),ZSCAN20(+),ZSCAN23(+),ZXDA(+)
Cell,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
PCall_CESC3_T_GAGCAGACATACTACG-1_3,0.000000,0.054074,0.100749,0.010882,0.055852,0.047846,0.053790,0.009201,0.102654,0.000000,...,0.0,0.008390,0.034400,0.000000,0.000000,0.177764,0.000000,0.0,0.0,0.144336
PCall_LC10B_T_AGCGGTCCATAGGATA-1_3,0.044225,0.122180,0.047359,0.035439,0.004795,0.061226,0.000696,0.003357,0.111509,0.463422,...,0.0,0.028218,0.057536,0.000000,0.000000,0.142751,0.171502,0.0,0.0,0.000000
PCall_CESC4_T_GACGTTATCTAGAGTC-1_3,0.009231,0.095917,0.023704,0.000390,0.059742,0.022238,0.014428,0.000000,0.060849,0.000000,...,0.0,0.036850,0.121767,0.000000,0.000000,0.067949,0.152328,0.0,0.0,0.251832
PCall_OV3_T_ATCCGAAAGGCAATTA-1_3,0.149503,0.080355,0.048631,0.116808,0.001866,0.061416,0.001150,0.000000,0.089227,0.525060,...,0.0,0.025024,0.000000,0.394495,0.000000,0.162412,0.000000,0.0,0.0,0.021854
OA018_P23_T_P23T-I-GACACGCGTCCTCTTG,0.000000,0.020312,0.141958,0.022692,0.023880,0.012570,0.021280,0.000000,0.085315,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.071281,0.000000,0.0,0.0,0.030104
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
LC12.TTGTAGGAGATACACA-1_1_1,0.005439,0.111900,0.092982,0.000272,0.015920,0.030407,0.022548,0.000000,0.087092,0.000000,...,0.0,0.009990,0.063822,0.000000,0.000000,0.077878,0.000000,0.0,0.0,0.000000
OA008_P019_T_CAGTCCTAGTACACCT.Pre_P019_t,0.028731,0.084823,0.152370,0.067424,0.005658,0.028852,0.000000,0.000000,0.088801,0.025108,...,0.0,0.040804,0.051043,0.223724,0.000000,0.084745,0.000000,0.0,0.0,0.082550
CRLM_P17_Colon_P_CTGCATCGTTCATCGA-1,0.012991,0.128947,0.091748,0.031135,0.027930,0.013396,0.116199,0.023172,0.123922,0.000000,...,0.0,0.017414,0.038759,0.000000,0.000000,0.087404,0.000000,0.0,0.0,0.000000
RCC5.TTATGCTTCGTTACGA-1_2_1,0.000000,0.050563,0.045922,0.011636,0.016003,0.076804,0.000000,0.000000,0.059588,0.474266,...,0.0,0.000000,0.059593,0.274029,0.000000,0.116277,0.011262,0.0,0.0,0.000000


In [26]:
auc_mtx['cell']=auc_mtx.index

In [27]:
auc_mtx=auc_mtx.set_index('cell').sort_index()

In [28]:
auc_mtx

Unnamed: 0_level_0,ADNP(+),AHR(+),ARID3A(+),ARNT(+),ARNT2(+),ARNTL(+),ARNTL2(+),ASCL2(+),ATF1(+),ATF2(+),...,ZNF835(+),ZNF84(+),ZNF841(+),ZNF85(+),ZNF878(+),ZNF91(+),ZSCAN12(+),ZSCAN20(+),ZSCAN23(+),ZXDA(+)
cell,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
COAD14B_P_AAAGATGCAAGCGCTC-1,0.036084,0.070752,0.017929,0.100900,0.020149,0.015353,0.084085,0.0,0.076689,0.618657,...,0.000000,0.035160,0.340637,0.000000,0.000000,0.183672,0.130098,0.0,0.000000,0.000000
COAD14B_P_AAAGATGCAGTCCTTC-1,0.018803,0.100066,0.083311,0.010707,0.033872,0.000000,0.039463,0.0,0.043760,0.000000,...,0.016786,0.023088,0.085617,0.000000,0.000000,0.044004,0.000000,0.0,0.000000,0.060645
COAD14B_P_AAAGATGGTACATCCA-1,0.148947,0.035007,0.078740,0.032657,0.001940,0.019059,0.030085,0.0,0.091288,0.513075,...,0.000000,0.000000,0.078316,0.000000,0.016288,0.118043,0.000000,0.0,0.231076,0.063117
COAD14B_P_AAAGTAGCACCTGGTG-1,0.000000,0.060683,0.033765,0.036320,0.014357,0.051519,0.025056,0.0,0.082776,0.061027,...,0.000000,0.018367,0.082986,0.000000,0.000000,0.183909,0.000000,0.0,0.000000,0.000000
COAD14B_P_AACCATGAGGTGTTAA-1,0.074425,0.032427,0.077468,0.044376,0.020433,0.075368,0.022240,0.0,0.054912,0.000000,...,0.000000,0.043024,0.054530,0.000000,0.000000,0.055912,0.000000,0.0,0.000000,0.221865
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
THYM1.TTTGTCACAGGACGTA-1_1_1,0.000000,0.059179,0.064981,0.094129,0.000000,0.040542,0.000000,0.0,0.097238,0.000000,...,0.000000,0.026890,0.023591,0.000000,0.000000,0.065405,0.000000,0.0,0.000000,0.000000
THYM1.TTTGTCATCAGCTTAG-1_1_1,0.003328,0.166004,0.217817,0.025862,0.022882,0.049323,0.033152,0.0,0.076635,0.000000,...,0.000000,0.000000,0.090631,0.256819,0.000000,0.127014,0.004246,0.0,0.000000,0.105022
THYM1.TTTGTCATCCCGGATG-1_1_1,0.000000,0.126650,0.062128,0.048981,0.031074,0.037439,0.014167,0.0,0.042420,0.000000,...,0.000000,0.021837,0.051392,0.000000,0.000000,0.113596,0.000000,0.0,0.000000,0.035242
THYM1.TTTGTCATCGTAGGTT-1_1_1,0.000000,0.145232,0.187868,0.000000,0.011522,0.085936,0.000000,0.0,0.082097,0.000000,...,0.000000,0.027343,0.067639,0.000000,0.000000,0.107641,0.000000,0.0,0.000000,0.028566


In [31]:
auc_mtx.index

Index(['COAD14B_P_AAAGATGCAAGCGCTC-1', 'COAD14B_P_AAAGATGCAGTCCTTC-1',
       'COAD14B_P_AAAGATGGTACATCCA-1', 'COAD14B_P_AAAGTAGCACCTGGTG-1',
       'COAD14B_P_AACCATGAGGTGTTAA-1', 'COAD14B_P_AACCATGCACCCATTC-1',
       'COAD14B_P_AACCATGTCGTTTAGG-1', 'COAD14B_P_AACCGCGAGCTTCGCG-1',
       'COAD14B_P_AACCGCGCACCCATTC-1', 'COAD14B_P_AACCGCGTCATTGCGA-1',
       ...
       'THYM1.TTTGGTTAGAGCCTAG-1_1_1', 'THYM1.TTTGGTTAGAGGACGG-1_1_1',
       'THYM1.TTTGGTTAGCTAGTTC-1_1_1', 'THYM1.TTTGGTTCAAGCCTAT-1_1_1',
       'THYM1.TTTGGTTTCCACGTTC-1_1_1', 'THYM1.TTTGTCACAGGACGTA-1_1_1',
       'THYM1.TTTGTCATCAGCTTAG-1_1_1', 'THYM1.TTTGTCATCCCGGATG-1_1_1',
       'THYM1.TTTGTCATCGTAGGTT-1_1_1', 'THYM1.TTTGTCATCGTGGGAA-1_1_1'],
      dtype='object', name='cell', length=200000)

In [35]:
anno=pd.read_csv("./resources/data_527730cell_0506_anno.csv",index_col=0)
anno

Unnamed: 0,celltype_l3,cancer,dataid
CRLM_P17_Colon_T_AACGGGATCGAACACT-1,B_01_TCL1A_naïveB,COAD,IH001
CRLM_P17_Colon_T_AATCGTGTCTCTAGGA-1,B_c06_NR4A2_ACB2,COAD,IH001
CRLM_P17_Colon_T_AGTACTGAGGATTACA-1,B_c05_EGR1_ACB1,COAD,IH001
CRLM_P17_Colon_T_CATCCGTTCAGTGATC-1,B_01_TCL1A_naïveB,COAD,IH001
CRLM_P17_Colon_T_CCCATTGAGACATCCT-1,B_c08_ITGB1_SwBm,COAD,IH001
...,...,...,...
RCC9.TTTACTGGTCGAATCT-1,B_01_TCL1A_naïveB,RCC,PCall_new
RCC9.TTTATGCGTTCACGGC-1,B_01_TCL1A_naïveB,RCC,PCall_new
RCC9.TTTCCTCCAGGTTTCA-1,B_01_TCL1A_naïveB,RCC,PCall_new
RCC9.TTTGCGCGTGAGCGAT-1,B_01_TCL1A_naïveB,RCC,PCall_new


In [38]:
auc_mtx=auc_mtx.loc[anno.index,:]

In [39]:
auc_mtx

Unnamed: 0,ADNP(+),AHR(+),ARID3A(+),ARNT(+),ARNT2(+),ARNTL(+),ARNTL2(+),ASCL2(+),ATF1(+),ATF2(+),...,ZNF835(+),ZNF84(+),ZNF841(+),ZNF85(+),ZNF878(+),ZNF91(+),ZSCAN12(+),ZSCAN20(+),ZSCAN23(+),ZXDA(+)
CRLM_P17_Colon_T_AACGGGATCGAACACT-1,0.025237,0.030721,0.037626,0.014823,0.042495,0.009692,0.066912,0.000000,0.058959,0.000000,...,0.037089,0.037694,0.000000,0.000000,0.000000,0.013822,0.0,0.0,0.000000,0.000000
CRLM_P17_Colon_T_AATCGTGTCTCTAGGA-1,0.021538,0.141311,0.032191,0.000773,0.037561,0.000000,0.025896,0.000000,0.066058,0.000000,...,0.025530,0.027935,0.052484,0.000000,0.000000,0.019828,0.0,0.0,0.000000,0.063457
CRLM_P17_Colon_T_AGTACTGAGGATTACA-1,0.021010,0.026366,0.031432,0.064159,0.036815,0.078695,0.025379,0.000000,0.101967,0.000000,...,0.024425,0.041291,0.000000,0.000000,0.020879,0.035362,0.0,0.0,0.000000,0.000000
CRLM_P17_Colon_T_CATCCGTTCAGTGATC-1,0.023372,0.057344,0.070036,0.035863,0.040007,0.000000,0.027592,0.000000,0.073795,0.000000,...,0.031863,0.033089,0.000000,0.000000,0.000000,0.043920,0.0,0.0,0.000000,0.000000
CRLM_P17_Colon_T_CCCATTGAGACATCCT-1,0.025548,0.030982,0.049938,0.000895,0.042868,0.018695,0.029575,0.000000,0.063779,0.000000,...,0.038798,0.038832,0.000000,0.000000,0.000000,0.030997,0.0,0.0,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
RCC9.TTTACTGGTCGAATCT-1,0.011282,0.075204,0.083918,0.030452,0.023756,0.045005,0.016325,0.025729,0.066858,0.000000,...,0.000000,0.053260,0.035983,0.379271,0.000000,0.120798,0.0,0.0,0.000000,0.000000
RCC9.TTTATGCGTTCACGGC-1,0.005159,0.077453,0.126265,0.009275,0.015423,0.077371,0.010548,0.000000,0.098505,0.000000,...,0.000000,0.009688,0.000000,0.000000,0.000000,0.143925,0.0,0.0,0.000000,0.000000
RCC9.TTTCCTCCAGGTTTCA-1,0.007894,0.099945,0.032090,0.149467,0.019237,0.033326,0.013193,0.000000,0.069843,0.000000,...,0.000000,0.012464,0.092161,0.000000,0.114957,0.103259,0.0,0.0,0.185576,0.039992
RCC9.TTTGCGCGTGAGCGAT-1,0.008765,0.108492,0.013281,0.036131,0.020398,0.063885,0.045354,0.000000,0.053388,0.647193,...,0.000000,0.036910,0.214569,0.000000,0.000000,0.228672,0.0,0.0,0.000000,0.000000


In [40]:
anno=anno.loc[auc_mtx.index]

In [41]:
rss = regulon_specificity_scores(auc_mtx, anno.celltype_l3)
rss

Unnamed: 0,ADNP(+),AHR(+),ARID3A(+),ARNT(+),ARNT2(+),ARNTL(+),ARNTL2(+),ASCL2(+),ATF1(+),ATF2(+),...,ZNF835(+),ZNF84(+),ZNF841(+),ZNF85(+),ZNF878(+),ZNF91(+),ZSCAN12(+),ZSCAN20(+),ZSCAN23(+),ZXDA(+)
B_01_TCL1A_naïveB,0.298151,0.308764,0.29424,0.347656,0.335332,0.328712,0.296384,0.209831,0.327768,0.216532,...,0.232253,0.328236,0.316707,0.226488,0.197548,0.339018,0.227004,0.172662,0.221772,0.274383
B_c06_NR4A2_ACB2,0.278701,0.318474,0.30155,0.31032,0.285009,0.330861,0.25697,0.204989,0.298597,0.238184,...,0.192274,0.324069,0.316797,0.246925,0.202743,0.326586,0.229732,0.17999,0.206623,0.283273
B_c05_EGR1_ACB1,0.244989,0.246539,0.230337,0.232677,0.235028,0.243566,0.218288,0.188804,0.230685,0.210407,...,0.206715,0.244777,0.239658,0.203777,0.185974,0.241039,0.20847,0.176284,0.192946,0.225561
B_c08_ITGB1_SwBm,0.266019,0.338157,0.324989,0.301899,0.29198,0.308884,0.270587,0.213447,0.31898,0.233791,...,0.196761,0.303092,0.289517,0.225269,0.215763,0.32188,0.225744,0.179283,0.212306,0.274321
B_c09_DUSP4_AtM,0.206613,0.241436,0.246725,0.242707,0.209623,0.235828,0.205527,0.193805,0.23005,0.206532,...,0.177994,0.227067,0.227996,0.204683,0.200098,0.239838,0.204217,0.180774,0.18879,0.22419
B_02_IFIT3_B,0.200955,0.207326,0.202678,0.214436,0.205139,0.207641,0.196731,0.182284,0.210946,0.190949,...,0.18182,0.207795,0.206546,0.195037,0.18315,0.212493,0.202314,0.172117,0.189343,0.199042
B_c07_CCR7_ACB3,0.215292,0.239256,0.230896,0.219686,0.209955,0.231581,0.202095,0.185519,0.216751,0.210135,...,0.178443,0.232062,0.224191,0.209671,0.187149,0.227447,0.193752,0.181274,0.181147,0.221116
B_c03_HSP_B,0.220224,0.230737,0.215145,0.227626,0.218513,0.226666,0.206075,0.186718,0.226948,0.20481,...,0.197348,0.228295,0.225691,0.201028,0.186707,0.230805,0.222472,0.181895,0.192565,0.216657
B_14_MZB1_rASC,0.252516,0.269334,0.352302,0.236719,0.315929,0.263639,0.425333,0.276471,0.330977,0.195926,...,0.23733,0.241457,0.220238,0.200383,0.277707,0.277802,0.204984,0.173201,0.25051,0.241183
B_13_STMN1_PB,0.183977,0.198444,0.194163,0.204153,0.194711,0.200862,0.206533,0.198121,0.224538,0.192394,...,0.174101,0.18558,0.187882,0.191277,0.185015,0.202617,0.177051,0.171133,0.188518,0.191197


In [42]:
rss.to_csv("./results/rss.celltype_allcell_hg38_0506.csv")