- 1000 cell: done
- 2000 cell done
- 4000 cell: inprogress

In [1]:
#STEP 1: importing all needed moduels

import os, glob, re, pickle
from functools import partial
from collections import OrderedDict
import operator as op
from cytoolz import compose

import pandas as pd
import seaborn as sns
import numpy as np
import scanpy as sc
import anndata as ad
import matplotlib as mpl
import matplotlib.pyplot as plt
import skmisc

from pyscenic.export import export2loom, add_scenic_metadata
from pyscenic.utils import load_motifs
from pyscenic.transform import df2regulons
from pyscenic.aucell import aucell
from pyscenic.binarization import binarize
from pyscenic.rss import regulon_specificity_scores
from pyscenic.plotting import plot_binarization, plot_rss

from arboreto.utils import load_tf_names
from arboreto.algo import grnboost2
from pyscenic.utils import modules_from_adjacencies
from pyscenic.prune import prune2df, df2regulons
from pyscenic.aucell import aucell
from dask.diagnostics import ProgressBar
from ctxcore.rnkdb import FeatherRankingDatabase as RankingDatabase

#STEP 1+: Set maximum number of jobs
sc.settings.njobs = 32

  def twobit_to_dna(twobit: int, size: int) -> str:
  def dna_to_twobit(dna: str) -> int:
  def twobit_1hamming(twobit: int, size: int) -> List[int]:


Divide each file into by states and then running each state for network reconstruction
- A is patient pretreatment pdx
- B is patient posttreatment pdx

In [2]:
#prepping
DATASET_ID = 'lx33'
GENE_NUM = 2000

In [3]:
#STEP 1++: preparing pathway constant variables for easy coding

SOHRAB_RESOURCES_FOLDERNAME = "/work/shah/users/salehis/projects/cdm/data/sclc/{}".format(DATASET_ID)
RESULTS_FOLDERNAME = "/home/linl5/project/SCLC/results/{}".format(DATASET_ID)
FIGURES_FOLDERNAME = "/home/linl5/project/SCLC/figures"
AUXILLIARIES_FOLDERNAME = "/home/linl5/project/SCLC/auxilliaries"
RESOURCES_FOLDERNAME = "/home/linl5/project/SCLC/resources"
DATA_FOLDERNAME = "/home/linl5/project/SCLC/data/{}".format(DATASET_ID)

In [4]:
#Downloaded fromm pySCENIC github repo: https://github.com/aertslab/pySCENIC/tree/master/resources Aug-1-2023
RESOURCES_FOLDERNAME = "/home/linl5/project/SCLC/resources"
HUMAN_TFS_FNAME = os.path.join(RESOURCES_FOLDERNAME, 'allTFs_hg38.txt')

In [5]:
#STEP 2: Downloading metadata and expression matrix
#input: expression matrix and metadata file

#organized by cell ID and Gene matrix, values are counts of read in that cell
COUNTS_MTX_FNAME = os.path.join(SOHRAB_RESOURCES_FOLDERNAME, 'rna.h5ad')
COUNTS_MTX_FNAME

'/work/shah/users/salehis/projects/cdm/data/sclc/lx33/rna.h5ad'

In [6]:
#STEP 3: Importing and Analyzing the rna DATA

# Read the H5AD file using anndata
adata = ad.read(COUNTS_MTX_FNAME)
COUNTS_MTX_FNAME

'/work/shah/users/salehis/projects/cdm/data/sclc/lx33/rna.h5ad'

In [7]:
#STEP4: Understanding Dataset

#print out information about anndata
print(adata)

#print out shape
print("Shape of expression matrix: ", adata.shape)

# the columns (variables) of the expression matrix
print("Columns (variables):")
print(adata.var)

# the rows (observations) of the expression matrix
print("\nRows (observations):")
print(adata.obs)


AnnData object with n_obs × n_vars = 23691 × 33538
    obs: 'timepoint', 'datatag', 'batch', 'state'
    var: 'gene_ids', 'feature_types'
Shape of expression matrix:  (23691, 33538)
Columns (variables):
                    gene_ids    feature_types
MIR1302-2HG  ENSG00000243485  Gene Expression
FAM138A      ENSG00000237613  Gene Expression
OR4F5        ENSG00000186092  Gene Expression
AL627309.1   ENSG00000238009  Gene Expression
AL627309.3   ENSG00000239945  Gene Expression
...                      ...              ...
AC233755.2   ENSG00000277856  Gene Expression
AC233755.1   ENSG00000275063  Gene Expression
AC240274.1   ENSG00000271254  Gene Expression
AC213203.1   ENSG00000277475  Gene Expression
FAM231C      ENSG00000268674  Gene Expression

[33538 rows x 2 columns]

Rows (observations):
                             timepoint datatag batch state
AAACCCAAGAAAGTCT-1_Lx33_UTTU      UTTU    Lx33     0  UTTU
AAACCCAAGACGGAAA-1_Lx33_UTTU      UTTU    Lx33     0  UTTU
AAACCCAAGGTCACCC-1_L

In [8]:
#STEP 5: preprocessing and filtering

#make gene name unique
adata.var_names_make_unique()

#processing out data-prefilter
sc.pp.filter_cells(adata, min_genes=200)
sc.pp.filter_genes(adata, min_cells=3)

#saving a copy of the power filtered count into raw
adata.raw = adata

#post filer: previous was n_obs × n_vars = 33207 × 33538
print("Post Filter: ", adata.shape)


Post Filter:  (23051, 22899)


LX599: 
- Pre-Filter: 33207 × 33538
- Post Filter:  (33108, 28701)


Lx108: 
- Pre-Filter: 18481 × 33538
- Post Filter: (18436, 22723)

Lx33:
- pre filter: 23691 × 33538
- Post Filter:  (23051, 22899)

In [10]:
# STEP 6: Prepping for timepoint splitting
adata.layers['counts'] = adata.X
adata.raw = adata


In [11]:
#STEP 7: subsampling time point (IDEA-> in original sample filte by hvg, then split the following into states, then cluster and subsample from cluster)

# set the n_top_genes as necessary
sc.pp.highly_variable_genes(adata, n_top_genes=GENE_NUM, subset=True, flavor='seurat_v3')

#Splitting the cells by states
unique_state = adata.obs['state'].unique()
print(unique_state)

#storing
adata_by_state = {}

#selecting out by TP
for state in unique_state:
    adata_subset = adata[adata.obs['state'] == state].copy()
    adata_by_state[state] = adata_subset
    print(state, ": ", adata_by_state[state].shape)

    

['UTTU', 'UU', 'UUUT', 'UUUU']
Categories (4, object): ['UTTU', 'UU', 'UUUT', 'UUUU']
UTTU :  (4021, 2000)
UU :  (6176, 2000)
UUUT :  (6619, 2000)
UUUU :  (6235, 2000)


lx33 (post cluster):
- UTTU :  (4021, 2000)
- UU :  (6176, 2000)
- UUUT :  (6619, 2000)
- UUUU :  (6235, 2000)

lx108
- UUUU :  (4128, 2000)
- UUUT :  (4564, 2000)
- UU :  (5035, 2000)
- UTTU :  (4709, 2000)

The Leiden algorithm improves upon Louvain by using the "agglomerative" approach to optimize a different quality function known as the "improved modularity." Improved modularity has a resolution parameter that allows Leiden to control the granularity of the clustering solution. It also uses a smart local move algorithm to avoid some of the resolution limit issues present in Louvain. Higher paramter means more identified communities. 

#The Leiden algorithm is a hierarchical clustering algorithm, that recursively merges communities into single nodes by greedily optimizing the modularity and the process repeats in the condensed graph.
#The Leiden algorithm improves upon Louvain by using the "agglomerative" approach to optimize a different quality function known as the "improved modularity." Improved modularity has a resolution parameter that allows Leiden to control the granularity of the clustering solution. It also uses a smart local move algorithm to avoid some of the resolution limit issues present in Louvain.

In [14]:
#STEP 8: cluster definition

def cluster_rna(bdata):
    #I have already did filtering and HVG selection on main anndata, we want to keep same hvg across our states
    sc.pp.normalize_total(bdata)    
    sc.pp.log1p(bdata)
    sc.pp.pca(bdata)
    sc.pp.neighbors(bdata)
    sc.tl.umap(bdata)
    sc.tl.leiden(bdata, resolution=2)
    return bdata

In [15]:
#STEP 9: Clustering

for state in unique_state:
    adata_by_state[state] = cluster_rna(adata_by_state[state])

  @numba.jit()
  @numba.jit()
  @numba.jit()
  from .autonotebook import tqdm as notebook_tqdm
  @numba.jit()


In [20]:
#STEP 10: Subsampling

from tqdm import tqdm
frac_cells = 1
data_sub = {}

#Consistent randomization
np.random.seed(0)

for state in unique_state:
    sub_cells = []
    for clust in tqdm(adata_by_state[state].obs['leiden'].unique()):
        # sample 10% of cells from each cluster
        cells_in_clust = adata_by_state[state].obs_names[adata_by_state[state].obs['leiden'] == clust].copy()
        #dropping out clusters with less than 5 cells
        if (len(cells_in_clust) > 5):
            chosen_cells = np.random.choice(cells_in_clust, size=int(len(cells_in_clust)*frac_cells), replace=False)
            sub_cells.extend(chosen_cells)
    data_sub[state] = adata_by_state[state][sub_cells, :].copy()

100%|██████████| 21/21 [00:00<00:00, 1747.97it/s]
100%|██████████| 19/19 [00:00<00:00, 3192.91it/s]
100%|██████████| 22/22 [00:00<00:00, 2553.75it/s]
100%|██████████| 19/19 [00:00<00:00, 3171.18it/s]


In [21]:
#checking for subsampling population

for state in unique_state:
    print(state, data_sub[state].shape)
    set1 = set(data_sub[state].var_names)
    set2 = set(data_sub[unique_state[1]].var_names)
    print("Same Gene set check: ", state, unique_state[1], len(set1.intersection(set2)))

print(COUNTS_MTX_FNAME)

UUUU (4128, 2000)
Same Gene set check:  UUUU UUUT 2000
UUUT (4564, 2000)
Same Gene set check:  UUUT UUUT 2000
UU (5035, 2000)
Same Gene set check:  UU UUUT 2000
UTTU (4706, 2000)
Same Gene set check:  UTTU UUUT 2000
/work/shah/users/salehis/projects/cdm/data/sclc/lx108/rna.h5ad


LX599

10%
UUa (715, 2000)
Same Gene set check:  UUa UUa  2000
UTb (515, 2000)
Same Gene set check:  UTb UUa  2000
UUb (991, 2000)
Same Gene set check:  UUb UUa  2000
UTa (1042, 2000)
Same Gene set check:  UTa UUa  2000

25%
UUa (1812, 2000)
Same Gene set check:  UUa UUa  2000
UTb (1307, 2000)
Same Gene set check:  UTb UUa  2000
UUb (2500, 2000)
Same Gene set check:  UUb UUa  2000
UTa (2618, 2000)
Same Gene set check:  UTa UUa  2000

50%
UUa (3633, 2000)
Same Gene set check:  UUa UUa  2000
UTb (2633, 2000)
Same Gene set check:  UTb UUa  2000
UUb (5011, 2000)
Same Gene set check:  UUb UUa  2000
UTa (5248, 2000)
Same Gene set check:  UTa UUa  2000

LX108

20%
UUUU (818, 2000)
Same Gene set check:  UUUU UUUT 2000
UUUT (904, 2000)
Same Gene set check:  UUUT UUUT 2000
UU (998, 2000)
Same Gene set check:  UU UUUT 2000
UTTU (934, 2000)
Same Gene set check:  UTTU UUUT 2000

40%
UUUU (1643, 2000)
Same Gene set check:  UUUU UUUT 2000
UUUT (1818, 2000)
Same Gene set check:  UUUT UUUT 2000
UU (2004, 2000)
Same Gene set check:  UU UUUT 2000
UTTU (1878, 2000)
Same Gene set check:  UTTU UUUT 2000

100%
UUUU (4128, 2000)
Same Gene set check:  UUUU UUUT 2000
UUUT (4564, 2000)
Same Gene set check:  UUUT UUUT 2000
UU (5035, 2000)
Same Gene set check:  UU UUUT 2000
UTTU (4706, 2000)
Same Gene set check:  UTTU UUUT 2000

LX33

15%
UTTU (594, 2000)
Same Gene set check:  UTTU UU 2000
UU (917, 2000)
Same Gene set check:  UU UU 2000
UUUT (981, 2000)
Same Gene set check:  UUUT UU 2000
UUUU (922, 2000)
Same Gene set check:  UUUU UU 2000

33%
UTTU (1316, 2000)
Same Gene set check:  UTTU UU 2000
UU (2026, 2000)
Same Gene set check:  UU UU 2000
UUUT (2171, 2000)
Same Gene set check:  UUUT UU 2000
UUUU (2044, 2000)
Same Gene set check:  UUUU UU 2000


66%
UTTU (2644, 2000)
Same Gene set check:  UTTU UU 2000
UU (4065, 2000)
Same Gene set check:  UU UU 2000
UUUT (4357, 2000)
Same Gene set check:  UUUT UU 2000
UUUU (4102, 2000)
Same Gene set check:  UUUU UU 2000

100%
UTTU (4021, 2000)
Same Gene set check:  UTTU UU 2000
UU (6176, 2000)
Same Gene set check:  UU UU 2000
UUUT (6619, 2000)
Same Gene set check:  UUUT UU 2000
UUUU (6235, 2000)
Same Gene set check:  UUUU UU 2000
/work/shah/users/salehis/projects/cdm/data/sclc/lx33/rna.h5ad

In [22]:
#STEP 11: copying over raw count 

for state in unique_state:
    data_sub[state].X = data_sub[state].layers['counts'].copy()
    
#check for sample output
data_sub[unique_state[1]].to_df()

Unnamed: 0,FAM87B,LINC00115,HES4,ISG15,ATAD3C,GABRD,AL590822.2,HES5,AL139246.5,AL139246.3,...,BACE2,TFF1,AP001626.1,ERVH48-1,FRGCA,AIRE,TRPM2,C21orf58,PCNT,MT-ND6
TATGTTCAGGGACACT-1_Lx108_UUUT,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
TTTGGAGGTCAGGAGT-1_Lx108_UUUT,0.0,0.0,2.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
ATTCCATAGGTTCCGC-1_Lx108_UUUT,0.0,0.0,3.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,6.0
AGGGTTTAGGGTGAGG-1_Lx108_UUUT,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0
TTCAATCTCGTCTCAC-1_Lx108_UUUT,0.0,0.0,1.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0,1.0,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TCATTGTTCATTGAGC-1_Lx108_UUUT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.0
CAATCGATCTTTACAC-1_Lx108_UUUT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ACGTAGTAGTTAGTAG-1_Lx108_UUUT,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
TTATTGCGTCTGTGTA-1_Lx108_UUUT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [30]:
#STEP 12: output Timepoint specific count matrix after subsampling by state
for state in unique_state:
    EXP_MTX_QC_FNAME = os.path.join(RESULTS_FOLDERNAME, '{}.{}.{}.qc.tpm.csv'.format(DATASET_ID, state, frac_cells))
    data_sub[state].to_df().to_csv(EXP_MTX_QC_FNAME, index=False, sep='\t')
    print("Finish with: ", EXP_MTX_QC_FNAME, " Shape: ", data_sub[state].shape)


Finish with:  /home/linl5/project/SCLC/results/lx108/lx108.UUUU.1.qc.tpm.csv  Shape:  (4128, 2000)
Finish with:  /home/linl5/project/SCLC/results/lx108/lx108.UUUT.1.qc.tpm.csv  Shape:  (4564, 2000)
Finish with:  /home/linl5/project/SCLC/results/lx108/lx108.UU.1.qc.tpm.csv  Shape:  (5035, 2000)
Finish with:  /home/linl5/project/SCLC/results/lx108/lx108.UTTU.1.qc.tpm.csv  Shape:  (4706, 2000)


In [12]:
# STEP 13: prepping for GRN, Loading in expression matrix and TF files

#Loading TF
tf_names = load_tf_names(HUMAN_TFS_FNAME)
print(HUMAN_TFS_FNAME, ": Size of TF list", len(tf_names))

#selecting !!!! CHANGE
frac_cells = 0.66

#expression matrix
for state in unique_state:
    EXP_MTX_QC_FNAME = os.path.join(RESULTS_FOLDERNAME, '{}.{}.{}.qc.tpm.csv'.format(DATASET_ID, state, frac_cells))
    ex_matrix = pd.read_csv(EXP_MTX_QC_FNAME, sep='\t')
   
    #Input Checking 
    print("\nExpression matrix shape for", DATASET_ID, state, ex_matrix.shape)

#STEP 14: Running GRNBOOST2 for coexpression modules
    adjacencies = grnboost2(expression_data=ex_matrix, tf_names=tf_names, verbose=True)
    print("\nCOMPLETED GRNBOOST2 RUNNING FOR", DATASET_ID, state)
    print("\n", adjacencies.head())
    ADJACENCIES_FNAME = os.path.join(DATA_FOLDERNAME, "{}.{}.{}.adjacencies.tsv".format(DATASET_ID, state, frac_cells))
    adjacencies.to_csv(ADJACENCIES_FNAME, index=False, sep='\t')
    print("SUCCESSFUL WRITING TO", ADJACENCIES_FNAME, "\n")
    

/home/linl5/project/SCLC/resources/allTFs_hg38.txt : Size of TF list 1892

Expression matrix shape for lx33 UTTU (2644, 2000)
preparing dask client
parsing input
creating dask graph
8 partitions
computing dask graph
shutting down client and local cluster
finished

COMPLETED GRNBOOST2 RUNNING FOR lx33 UTTU

      TF  target  importance
2  ENO1  IGFBP2  328.750580
2  ENO1   GAPDH  293.824415
2  ENO1   BNIP3  266.833032
2  ENO1    PGK1  258.374602
2  ENO1    LDHA  251.407131
SUCCESSFUL WRITING TO /home/linl5/project/SCLC/data/lx33/lx33.UTTU.0.66.adjacencies.tsv 


Expression matrix shape for lx33 UU (4065, 2000)
preparing dask client
parsing input
creating dask graph
8 partitions
computing dask graph
shutting down client and local cluster
finished

COMPLETED GRNBOOST2 RUNNING FOR lx33 UU

      TF  target  importance
2  ENO1   BNIP3  360.877834
2  ENO1  IGFBP2  266.158488
2  ENO1    NRN1  240.807875
2  ENO1    PGK1  234.456980
2  ENO1  COX4I2  225.749251
SUCCESSFUL WRITING TO /home/linl5/

In [13]:
#STEP 15: Prepping for RCistarget: Loading Database and motif

DATABASE_FOLDER = "/home/linl5/project/SCLC/auxilliaries/"
DATABASES_GLOB = os.path.join(DATABASE_FOLDER, "hg38_*.mc9nr.genes_vs_motifs.rankings.feather")

db_fnames = glob.glob(DATABASES_GLOB)
def name(fname):
    return os.path.splitext(os.path.basename(fname))[0]
dbs = [RankingDatabase(fname=fname, name=name(fname)) for fname in db_fnames]
dbs

MOTIF_ANNOTATIONS_FNAME = os.path.join(RESOURCES_FOLDERNAME,"motifs-v9-nr.hgnc-m0.001-o0.0.tbl")

In [14]:
#STEP 16: Running RCistarget

for state in unique_state:
    #reading necessary files for rcistarget
    ADJACENCIES_FNAME = os.path.join(DATA_FOLDERNAME, "{}.{}.{}.adjacencies.tsv".format(DATASET_ID, state, frac_cells))
    adjacencies = pd.read_csv(ADJACENCIES_FNAME, sep='\t')
    print("\nFINISHED READING ADJACENCIES FILE", ADJACENCIES_FNAME,"\n")
    EXP_MTX_QC_FNAME = os.path.join(RESULTS_FOLDERNAME, '{}.{}.{}.qc.tpm.csv'.format(DATASET_ID, state, frac_cells))
    ex_matrix = pd.read_csv(EXP_MTX_QC_FNAME, sep='\t')
    print("\nFINISHED READING EXPRESSION MATRIX", EXP_MTX_QC_FNAME,"\n")
    MODULES_FNAME = os.path.join(DATA_FOLDERNAME, '{}.{}.{}.modules.p'.format(DATASET_ID, state, frac_cells))
    MOTIFS_FNAME = os.path.join(DATA_FOLDERNAME, '{}.{}.{}.motifs.csv'.format(DATASET_ID, state, frac_cells))
    REGULONS_FNAME = os.path.join(DATA_FOLDERNAME, '{}.{}.{}.regulons.p'.format(DATASET_ID, state, frac_cells))

    #making modules from adjacencies
    modules = list(modules_from_adjacencies(adjacencies, ex_matrix))
    
    #writing modules object to file
    with open(MODULES_FNAME, 'wb') as f:
        pickle.dump(modules, f)
    print("\nCOMPLETED COEXPRESSION MODULE WRITING:", MODULES_FNAME,"\n")
    
    #running Rcistarget with progress bar: searching for enriched motifs and true candidate genes
    with ProgressBar():
        df = prune2df(dbs, modules, MOTIF_ANNOTATIONS_FNAME, client_or_address="dask_multiprocessing") 
    
    #writing enriched motifs with candidate target to file
    df.to_csv(MOTIFS_FNAME)
    print("\nCOMPLETED WRITING ENRICHED MOTIFS", MOTIFS_FNAME,"\n")
    print(df.head())
    
    #making regulon objects
    regulons = df2regulons(df)
    
    #writing regulon objects to file
    with open(REGULONS_FNAME, 'wb') as f:
        pickle.dump(regulons, f)
    print("\nCOMPLETED WRITING DISCOVERED REGULON", REGULONS_FNAME,"\n")


FINISHED READING ADJACENCIES FILE /home/linl5/project/SCLC/data/lx33/lx33.UTTU.0.66.adjacencies.tsv 




2023-08-10 12:28:42,048 - pyscenic.utils - INFO - Calculating Pearson correlations.

	Dropout masking is currently set to [False].



FINISHED READING EXPRESSION MATRIX /home/linl5/project/SCLC/results/lx33/lx33.UTTU.0.66.qc.tpm.csv 




2023-08-10 12:28:43,420 - pyscenic.utils - INFO - Creating modules.



COMPLETED COEXPRESSION MODULE WRITING: /home/linl5/project/SCLC/data/lx33/lx33.UTTU.0.66.modules.p 

[                                        ] | 0% Completed | 17.97 sms




[                                        ] | 0% Completed | 21.43 s





[                                        ] | 0% Completed | 26.92 s




[                                        ] | 0% Completed | 27.13 s




[                                        ] | 0% Completed | 32.11 s




[                                        ] | 0% Completed | 34.25 s





[                                        ] | 0% Completed | 34.66 s




[                                        ] | 0% Completed | 38.83 s




[                                        ] | 0% Completed | 39.44 s





[                                        ] | 0% Completed | 44.52 s





[                                        ] | 0% Completed | 45.85 s




[                                        ] | 0% Completed | 46.15 s




[                                        ] | 0% Completed | 47.38 s




[                                        ] | 0% Completed | 48.70 s




[                                        ] | 0% Completed | 49.31 s




[                                        ] | 0% Completed | 49.92 s




[                                        ] | 0% Completed | 72.70 s




[                                        ] | 0% Completed | 74.53 s




[                                        ] | 0% Completed | 77.38 s




[                                        ] | 0% Completed | 90.60 s





[                                        ] | 0% Completed | 94.06 s




[                                        ] | 0% Completed | 94.57 s





[                                        ] | 0% Completed | 94.98 s




[                                        ] | 0% Completed | 99.46 s




[                                        ] | 0% Completed | 102.71 s




[                                        ] | 0% Completed | 107.19 s




[                                        ] | 0% Completed | 107.40 s




[                                        ] | 0% Completed | 107.80 s




[                                        ] | 0% Completed | 108.41 s




[                                        ] | 0% Completed | 109.33 s




[                                        ] | 0% Completed | 110.04 s




[                                        ] | 0% Completed | 110.75 s




[                                        ] | 0% Completed | 110.95 s




[                                        ] | 0% Completed | 115.94 s





[                                        ] | 0% Completed | 116.34 s




[                                        ] | 0% Completed | 117.16 s




[                                        ] | 0% Completed | 117.87 s




[                                        ] | 0% Completed | 118.38 s





[                                        ] | 0% Completed | 120.72 s




[                                        ] | 0% Completed | 127.02 s




[                                        ] | 0% Completed | 140.96 s




[                                        ] | 0% Completed | 142.28 s





[                                        ] | 0% Completed | 151.74 s




[                                        ] | 0% Completed | 151.94 s




[                                        ] | 0% Completed | 156.21 s




[                                        ] | 0% Completed | 157.33 s





[                                        ] | 0% Completed | 158.45 s




[                                        ] | 0% Completed | 159.87 s




[                                        ] | 0% Completed | 160.48 s





[                                        ] | 0% Completed | 160.69 s




[                                        ] | 0% Completed | 162.72 s





[                                        ] | 0% Completed | 164.76 s




[                                        ] | 0% Completed | 165.67 s




[                                        ] | 0% Completed | 167.71 s




[                                        ] | 0% Completed | 168.11 s




[                                        ] | 0% Completed | 168.62 s




[                                        ] | 0% Completed | 183.55 s




[                                        ] | 0% Completed | 185.49 s




[                                        ] | 0% Completed | 188.23 s




[                                        ] | 0% Completed | 195.76 s





[                                        ] | 0% Completed | 197.18 s




[                                        ] | 0% Completed | 197.59 s





[                                        ] | 0% Completed | 198.10 s




[                                        ] | 0% Completed | 201.76 s




[                                        ] | 0% Completed | 205.01 s




[                                        ] | 0% Completed | 209.69 s




[                                        ] | 0% Completed | 209.89 s




[                                        ] | 0% Completed | 210.30 s




[                                        ] | 0% Completed | 211.82 s




[                                        ] | 0% Completed | 213.55 s




[                                        ] | 0% Completed | 214.77 s





[                                        ] | 0% Completed | 215.08 s




[                                        ] | 0% Completed | 217.32 s





[                                        ] | 0% Completed | 217.62 s





[                                        ] | 0% Completed | 218.33 s




[                                        ] | 0% Completed | 218.84 s




[                                        ] | 0% Completed | 219.45 s




[                                        ] | 0% Completed | 221.69 s




[                                        ] | 0% Completed | 225.25 s




[########################################] | 100% Completed | 232.00 s

COMPLETED WRITING ENRICHED MOTIFS /home/linl5/project/SCLC/data/lx33/lx33.UTTU.0.66.motifs.csv 

                                                   Enrichment            \
                                                          AUC       NES   
TF   MotifID                                                              
ATF3 taipale_cyt_meth__CREM_NRTGAYGTCAYN_eDBD_meth   0.090750  3.068023   
     cisbp__M3088                                    0.094489  3.317156   
     taipale__ATF4_DBD_NNATGAYGCAATN                 0.090975  3.083038   
     cisbp__M5292                                    0.091564  3.122243   
     cisbp__M0300                                    0.095490  3.383888   

                                                                          \
                                                   MotifSimilarityQvalue   
TF   MotifID                                                               
AT


2023-08-10 12:32:45,415 - pyscenic.utils - INFO - Calculating Pearson correlations.

	Dropout masking is currently set to [False].



FINISHED READING EXPRESSION MATRIX /home/linl5/project/SCLC/results/lx33/lx33.UU.0.66.qc.tpm.csv 




2023-08-10 12:32:46,377 - pyscenic.utils - INFO - Creating modules.



COMPLETED COEXPRESSION MODULE WRITING: /home/linl5/project/SCLC/data/lx33/lx33.UU.0.66.modules.p 

[                                        ] | 0% Completed | 15.08 sms




[                                        ] | 0% Completed | 16.30 s




[                                        ] | 0% Completed | 16.60 s




[                                        ] | 0% Completed | 18.23 s




[                                        ] | 0% Completed | 19.25 s




[                                        ] | 0% Completed | 20.06 s




[                                        ] | 0% Completed | 25.55 s




[                                        ] | 0% Completed | 25.96 s




[                                        ] | 0% Completed | 28.20 s





[                                        ] | 0% Completed | 28.50 s




[                                        ] | 0% Completed | 31.15 s




[                                        ] | 0% Completed | 35.52 s




[                                        ] | 0% Completed | 38.47 s




[                                        ] | 0% Completed | 39.89 s




[                                        ] | 0% Completed | 62.67 s




[                                        ] | 0% Completed | 64.60 s




[                                        ] | 0% Completed | 78.22 s




[                                        ] | 0% Completed | 78.43 s




[                                        ] | 0% Completed | 78.83 s




[                                        ] | 0% Completed | 79.14 s





[                                        ] | 0% Completed | 86.16 s




[                                        ] | 0% Completed | 90.23 s




[                                        ] | 0% Completed | 90.43 s




[                                        ] | 0% Completed | 96.84 s




[                                        ] | 0% Completed | 98.77 s




[                                        ] | 0% Completed | 98.97 s




[                                        ] | 0% Completed | 104.87 s




[                                        ] | 0% Completed | 107.51 s




[                                        ] | 0% Completed | 108.02 s




[                                        ] | 0% Completed | 108.74 s




[                                        ] | 0% Completed | 115.65 s




[                                        ] | 0% Completed | 131.61 s




[                                        ] | 0% Completed | 133.55 s




[                                        ] | 0% Completed | 135.07 s




[                                        ] | 0% Completed | 136.60 s




[                                        ] | 0% Completed | 141.89 s




[                                        ] | 0% Completed | 142.91 s




[                                        ] | 0% Completed | 143.72 s




[                                        ] | 0% Completed | 148.20 s




[                                        ] | 0% Completed | 148.60 s




[                                        ] | 0% Completed | 150.23 s





[                                        ] | 0% Completed | 150.64 s




[                                        ] | 0% Completed | 152.88 s




[                                        ] | 0% Completed | 162.34 s




[                                        ] | 0% Completed | 163.76 s




[                                        ] | 0% Completed | 177.80 s




[                                        ] | 0% Completed | 180.14 s




[                                        ] | 0% Completed | 194.79 s




[                                        ] | 0% Completed | 195.10 s




[                                        ] | 0% Completed | 195.40 s




[                                        ] | 0% Completed | 195.71 s





[                                        ] | 0% Completed | 202.62 s




[                                        ] | 0% Completed | 206.80 s




[                                        ] | 0% Completed | 207.10 s




[                                        ] | 0% Completed | 210.65 s




[                                        ] | 0% Completed | 212.28 s




[                                        ] | 0% Completed | 212.99 s




[                                        ] | 0% Completed | 217.16 s




[                                        ] | 0% Completed | 218.08 s




[                                        ] | 0% Completed | 218.69 s




[                                        ] | 0% Completed | 219.50 s




[                                        ] | 0% Completed | 224.99 s




[########################################] | 100% Completed | 235.55 s

COMPLETED WRITING ENRICHED MOTIFS /home/linl5/project/SCLC/data/lx33/lx33.UU.0.66.motifs.csv 

                                                  Enrichment            \
                                                         AUC       NES   
TF    MotifID                                                            
ASCL2 dbcorrdb__RCOR1__ENCSR000EFG_1__m4            0.090957  3.292014   
ATF3  cisbp__M4526                                  0.096851  3.842853   
      dbcorrdb__FOS__ENCSR000DOO_1__m1              0.086374  3.035263   
      dbcorrdb__JUND__ENCSR000EBZ_1__m1             0.098540  3.973059   
      swissregulon__hs__FOS_FOS_B_L1__JUN_B_D_.p2   0.094247  3.642186   

                                                                         \
                                                  MotifSimilarityQvalue   
TF    MotifID                                                             
ASCL2 dbcorrdb_


2023-08-10 12:36:52,309 - pyscenic.utils - INFO - Calculating Pearson correlations.

	Dropout masking is currently set to [False].



FINISHED READING EXPRESSION MATRIX /home/linl5/project/SCLC/results/lx33/lx33.UUUT.0.66.qc.tpm.csv 




2023-08-10 12:36:53,422 - pyscenic.utils - INFO - Creating modules.



COMPLETED COEXPRESSION MODULE WRITING: /home/linl5/project/SCLC/data/lx33/lx33.UUUT.0.66.modules.p 

[                                        ] | 0% Completed | 9.90 s ms




[                                        ] | 0% Completed | 10.21 s





[                                        ] | 0% Completed | 11.12 s




[                                        ] | 0% Completed | 15.39 s




[                                        ] | 0% Completed | 17.43 s




[                                        ] | 0% Completed | 19.06 s




[                                        ] | 0% Completed | 30.35 s




[                                        ] | 0% Completed | 37.58 s





[                                        ] | 0% Completed | 39.72 s




[                                        ] | 0% Completed | 40.33 s




[                                        ] | 0% Completed | 49.28 s





[                                        ] | 0% Completed | 52.03 s




[####                                    ] | 10% Completed | 60.08 s




[####                                    ] | 10% Completed | 62.62 s




[####                                    ] | 10% Completed | 63.03 s




[####                                    ] | 10% Completed | 65.98 s




[####                                    ] | 10% Completed | 68.73 s




[####                                    ] | 10% Completed | 70.66 s




[####                                    ] | 10% Completed | 72.69 s




[####                                    ] | 10% Completed | 76.66 s





[####                                    ] | 10% Completed | 105.34 s




[####                                    ] | 10% Completed | 108.29 s




[####                                    ] | 10% Completed | 113.68 s





[####                                    ] | 10% Completed | 114.50 s




[####                                    ] | 10% Completed | 115.41 s




[####                                    ] | 10% Completed | 116.22 s




[####                                    ] | 10% Completed | 118.97 s





[####                                    ] | 10% Completed | 121.71 s




[####                                    ] | 10% Completed | 122.12 s




[####                                    ] | 10% Completed | 125.99 s




[####                                    ] | 10% Completed | 135.95 s




[####                                    ] | 10% Completed | 141.54 s




[####                                    ] | 10% Completed | 142.15 s




[####                                    ] | 10% Completed | 143.17 s




[####                                    ] | 10% Completed | 144.59 s




[####                                    ] | 10% Completed | 145.31 s






[####                                    ] | 10% Completed | 145.92 s




[####                                    ] | 10% Completed | 147.65 s




[####                                    ] | 10% Completed | 148.16 s




[####                                    ] | 10% Completed | 148.36 s





[####                                    ] | 10% Completed | 149.38 s




[####                                    ] | 10% Completed | 155.38 s




[####                                    ] | 10% Completed | 159.45 s




[####                                    ] | 10% Completed | 170.13 s




[####                                    ] | 10% Completed | 175.83 s





[####                                    ] | 10% Completed | 177.05 s





[####                                    ] | 10% Completed | 183.05 s




[####                                    ] | 10% Completed | 187.63 s




[####                                    ] | 10% Completed | 197.09 s




[####                                    ] | 10% Completed | 199.43 s




[####                                    ] | 10% Completed | 200.76 s




[####                                    ] | 10% Completed | 204.12 s




[####                                    ] | 10% Completed | 206.15 s




[####                                    ] | 10% Completed | 206.46 s




[####                                    ] | 10% Completed | 206.97 s




[####                                    ] | 10% Completed | 210.02 s





[####                                    ] | 10% Completed | 212.56 s




[####                                    ] | 10% Completed | 214.59 s




[################                        ] | 40% Completed | 225.74 s




[################                        ] | 40% Completed | 227.57 s




[############################            ] | 70% Completed | 230.06 s




[############################            ] | 70% Completed | 230.67 s




[############################            ] | 70% Completed | 231.89 s





[############################            ] | 70% Completed | 234.44 s




[############################            ] | 70% Completed | 234.74 s




[############################            ] | 70% Completed | 238.00 s




[############################            ] | 70% Completed | 247.98 s




[############################            ] | 70% Completed | 251.03 s




[############################            ] | 70% Completed | 251.64 s




[############################            ] | 70% Completed | 253.06 s




[############################            ] | 70% Completed | 255.91 s




[############################            ] | 70% Completed | 256.62 s






[############################            ] | 70% Completed | 257.74 s




[########################################] | 100% Completed | 258.49 s

COMPLETED WRITING ENRICHED MOTIFS /home/linl5/project/SCLC/data/lx33/lx33.UUUT.0.66.motifs.csv 

                                         Enrichment            \
                                                AUC       NES   
TF    MotifID                                                   
ASCL2 hocomoco__PTF1A_HUMAN.H11MO.1.B      0.096394  3.153788   
      dbcorrdb__TCF12__ENCSR000BGZ_1__m1   0.135656  5.082400   
      transfac_pro__M02841                 0.100746  3.367574   
      taipale__Ascl2_DBD_RRCAGCTGYY_repr   0.097265  3.196545   
      transfac_pro__M02737                 0.112801  3.959697   

                                                                \
                                         MotifSimilarityQvalue   
TF    MotifID                                                    
ASCL2 hocomoco__PTF1A_HUMAN.H11MO.1.B                 0.000846   
      dbcorrdb__TCF12__ENCSR000BGZ_1__m1      


2023-08-10 12:41:22,283 - pyscenic.utils - INFO - Calculating Pearson correlations.

	Dropout masking is currently set to [False].



FINISHED READING EXPRESSION MATRIX /home/linl5/project/SCLC/results/lx33/lx33.UUUU.0.66.qc.tpm.csv 




2023-08-10 12:41:23,472 - pyscenic.utils - INFO - Creating modules.



COMPLETED COEXPRESSION MODULE WRITING: /home/linl5/project/SCLC/data/lx33/lx33.UUUU.0.66.modules.p 

[                                        ] | 0% Completed | 13.87 sms




[                                        ] | 0% Completed | 14.89 s




[                                        ] | 0% Completed | 15.50 s




[                                        ] | 0% Completed | 23.03 s




[                                        ] | 0% Completed | 25.57 s




[                                        ] | 0% Completed | 26.08 s




[                                        ] | 0% Completed | 26.69 s





[                                        ] | 0% Completed | 28.12 s




[                                        ] | 0% Completed | 32.50 s




[                                        ] | 0% Completed | 36.26 s





[                                        ] | 0% Completed | 40.44 s




[                                        ] | 0% Completed | 40.85 s




[                                        ] | 0% Completed | 42.78 s




[                                        ] | 0% Completed | 44.92 s




[                                        ] | 0% Completed | 45.23 s





[                                        ] | 0% Completed | 45.53 s




[                                        ] | 0% Completed | 47.98 s




[                                        ] | 0% Completed | 50.52 s




[                                        ] | 0% Completed | 51.54 s




[                                        ] | 0% Completed | 52.35 s




[                                        ] | 0% Completed | 55.09 s




[                                        ] | 0% Completed | 58.96 s




[####                                    ] | 10% Completed | 69.03 s




[####                                    ] | 10% Completed | 92.13 s




[####                                    ] | 10% Completed | 121.11 s




[####                                    ] | 10% Completed | 121.32 s




[####                                    ] | 10% Completed | 126.00 s




[####                                    ] | 10% Completed | 133.53 s




[####                                    ] | 10% Completed | 138.93 s




[####                                    ] | 10% Completed | 140.15 s





[####                                    ] | 10% Completed | 142.59 s




[####                                    ] | 10% Completed | 144.22 s




[####                                    ] | 10% Completed | 146.15 s




[####                                    ] | 10% Completed | 153.98 s




[####                                    ] | 10% Completed | 154.19 s




[####                                    ] | 10% Completed | 156.12 s




[####                                    ] | 10% Completed | 161.31 s





[####                                    ] | 10% Completed | 161.72 s




[####                                    ] | 10% Completed | 163.04 s




[####                                    ] | 10% Completed | 166.91 s





[####                                    ] | 10% Completed | 167.21 s




[####                                    ] | 10% Completed | 167.92 s




[####                                    ] | 10% Completed | 170.06 s




[####                                    ] | 10% Completed | 172.30 s




[####                                    ] | 10% Completed | 178.91 s




[####                                    ] | 10% Completed | 181.25 s




[####                                    ] | 10% Completed | 185.72 s




[####                                    ] | 10% Completed | 186.02 s




[####                                    ] | 10% Completed | 187.96 s




[####                                    ] | 10% Completed | 189.89 s





[####                                    ] | 10% Completed | 194.06 s




[####                                    ] | 10% Completed | 194.87 s





[####                                    ] | 10% Completed | 195.48 s





[####                                    ] | 10% Completed | 196.80 s




[####                                    ] | 10% Completed | 198.13 s




[####                                    ] | 10% Completed | 199.14 s





[####                                    ] | 10% Completed | 202.60 s




[####                                    ] | 10% Completed | 203.00 s




[####                                    ] | 10% Completed | 204.23 s




[####                                    ] | 10% Completed | 204.43 s




[####                                    ] | 10% Completed | 206.36 s





[####                                    ] | 10% Completed | 210.73 s




[##########                              ] | 25% Completed | 211.28 s




[################                        ] | 40% Completed | 212.40 s





[################                        ] | 40% Completed | 212.60 s




[################                        ] | 40% Completed | 215.65 s




[################                        ] | 40% Completed | 216.16 s




[################                        ] | 40% Completed | 216.67 s





[################                        ] | 40% Completed | 216.98 s




[################                        ] | 40% Completed | 219.42 s




[################                        ] | 40% Completed | 220.13 s




[################                        ] | 40% Completed | 220.54 s




[################                        ] | 40% Completed | 221.96 s




[################                        ] | 40% Completed | 222.47 s





[################                        ] | 40% Completed | 222.68 s





[################                        ] | 40% Completed | 223.59 s




[################                        ] | 40% Completed | 224.71 s




[################                        ] | 40% Completed | 226.03 s




[################                        ] | 40% Completed | 226.64 s




[################                        ] | 40% Completed | 226.85 s




[################                        ] | 40% Completed | 227.15 s




[############################            ] | 70% Completed | 230.44 s




[############################            ] | 70% Completed | 232.98 s




[########################################] | 100% Completed | 261.86 s

COMPLETED WRITING ENRICHED MOTIFS /home/linl5/project/SCLC/data/lx33/lx33.UUUU.0.66.motifs.csv 

                                                 Enrichment            \
                                                        AUC       NES   
TF   MotifID                                                            
ATF3 transfac_pro__M07414                          0.095289  3.232038   
     cisbp__M4526                                  0.093634  3.112977   
     swissregulon__hs__FOS_FOS_B_L1__JUN_B_D_.p2   0.097139  3.365172   
     jaspar__MA0476.1                              0.095333  3.235200   
     cisbp__M2278                                  0.095027  3.213222   

                                                                        \
                                                 MotifSimilarityQvalue   
TF   MotifID                                                             
ATF3 transfac_pro__M0741

After grouping the data by timepoints
- How is clustering in this case difference by just making anndata subset selected by timepoint (pin)
- Do i do PCA after clustering -> pca changes the expression matrix so output to GENIE3 is not pca, just raw unnormalize, unpertrude data except basic filtering
- If you have more than one condition, it’s often helpful to perform integration to align the cells -? and then within each timepoint there would be three batches so i should find a way to remove this right