<h1 align="center">Assembling or loading anndata object</h1> 
<hr style="border:2px solid black"> </hr>

# A: set up

In [1]:
#####################IMPORTING PACKAGES################################
import sctoolbox
from sctoolbox.utilities import *
from sctoolbox.checker import *
from sctoolbox.creators import *
from sctoolbox.file_converter import *
from sctoolbox.assemblers import *

#####################DEFINING TEST NUMBER##############################
test="Test1"

#####################DEFINING STRATEGY#################################
assembling_10_velocity=True #cells C:
assembling_10_public=False #cells D:
convert_seurat=False #cells E:

######################OUTPUT PATH######################################
path_out="pipeline_output/" #The path where the folders results/Test* will be created to export your results
#NOTE: Remember the leading "/"

##########################DEFINING IMPUTS##############################
#####10X assembling for velocity (cell C:)
path_solo_quant="/mnt/agnerds/loosolab_SC_RNA_framework/examples/assembling_10_velocity/quant/" #This is the directory where the quant folder from snakemake preprocessing is located
                #NOTE: insert the pathway including the /quant folder
                #e.g., /path/quant 

#####10X assembling from public data (cell D:)
path_public_10X="/mnt/agnerds/loosolab_SC_RNA_framework/examples/assembling_10_public/"

#####Converting from seurat to anndata (cell E:)
path_rds="/mnt/agnerds/loosolab_SC_RNA_framework/examples/convert_seurat/GSE135893_ILD_annotated_fullsize.rds" #This is the directory with the Seurat (.rds) file

# B: checking and creating the output directory

In [2]:
#Creating directories to store the outputs and storing this information
create_dir(path_out, test)
result_path=fetch_info_txt() #Loading the output path

Output directory is ready: pipeline_output/results/Test1


# C: assembling 10X anndata for velocity

In [5]:
#Assembling an anndata object for velocity analysis
if assembling_10_velocity == True:
    #Set up the information below
    the_10X_yml=[    
        "sample1:condition:room_air",
        "sample2:condition:smooke_exposure",
    ]

    adata = sctoolbox.assemblers.from_quant(path_solo_quant, the_10X_yml)
    display(adata)

    #Add information to the infoprocess
    build_infor(adata, "Input_for_assembling", path_solo_quant)
    build_infor(adata, "Strategy", "Assembling for velocity")

Found samples: ['sample1', 'sample2']
Assembling sample 'sample1'
Setting up adata from solo files
Adding velocity information from spliced/unspliced/ambiguous
Assembling sample 'sample2'
Setting up adata from solo files
Adding velocity information from spliced/unspliced/ambiguous
Concatenating anndata objects


AnnData object with n_obs × n_vars = 20192 × 55359
    obs: 'sample', 'condition', 'batch'
    var: 'gene', 'type'
    layers: 'spliced', 'unspliced', 'ambiguous'

# D: assembling an anndata object from 10X public data

In [6]:
#Assembling an anndata object from 10X public data
#Set up the information bellow
mtx = ["path_mtx_file"] #Path to matrix file(s)
barcodes = ["path_barcode_file"] #Path to barcode file(s)
genes = ["path_genes_file"] #Path to genes file(s)

if assembling_10_public == True:
    adata=from_mtx(mtx, barcodes, genes, **kwargs)
    display(adata)

    #Add information to the infoprocess
    build_infor(adata, "Input_for_assembling", path_public_10X)
    build_infor(adata, "Strategy", "Assembling from public 10X data")

# E: converting from Seurat to anndata object

In [7]:
# Converting from Seurat to anndata object
if convert_seurat == True:
    adata = converter.convertToAdata(file=path_rds)
    display(adata)

    #Add information to the infoprocess
    #Anndata, key and value for anndata.uns["infoprocess"]
    build_infor(adata, "Input_for_assembling", path_rds)
    build_infor(adata, "Strategy", "Assembling from RDS data")

# F: saving anndata object

In [8]:
#Saving the data
build_infor(adata, "Test_number", test)
build_infor(adata, "Anndata_path", result_path)
adata_output = result_path + "/anndata_1_" + test +".h5ad"
adata.write(filename=adata_output)

... storing 'sample' as categorical
... storing 'condition' as categorical
... storing 'gene' as categorical
... storing 'type' as categorical
