# Assembling or loading anndata object
<hr style="border:2px solid black"> </hr>

<h1><center>⬐ Fill in input data here ⬎</center></h1>

In [None]:
############### DEFINING RUN ID ######################################
run_id = "Run1" 

############### DEFINING STRATEGY ####################################
from_h5ad = True #option 1
assembling_10_velocity = False #option 2
assembling_10_public = False #option 3
convert_seurat = False #option 4

############### INPUT DATA ###########################################

#For option 1: The path to an existing .h5ad file
h5ad_path = "test_data/adata_rna.h5ad"

#For option 2: This is the directory of ext_* or dst_* experiment
path_solo_quant = "/mnt/agnerds/loosolab_SC_RNA_framework/examples/assembling_10_velocity/quant/"

#For option 3: 10X assembling from public data
path_public_10X = "/mnt/agnerds/loosolab_SC_RNA_framework/examples/assembling_10_public/"

#For option 4: This is the path to the Seurat (.rds) file
path_rds = "/mnt/agnerds/loosolab_SC_RNA_framework/examples/convert_seurat/GSE135893_ILD_annotated_fullsize.rds"

############### OUTPUT PATH ##########################################
path_out = "pipeline_output/" #The path where the folders results/Test* will be created to export your results
#NOTE: Remember the leading "/"

<hr style="border:2px solid black"> </hr>

## Setup

In [None]:
##################### IMPORTING PACKAGES ################################
import os
import scanpy as sc
import sctoolbox
import sctoolbox.utilities as utils
import sctoolbox.checker as ch
import sctoolbox.creators as cr
import sctoolbox.assemblers as assembler
import sctoolbox.file_converter as converter
import sctoolbox.generalized_tree as gentree

In [None]:
'''
currently available paths:

run (mandatory)
processing_dir (mandatory)
processed_run_dir
assemble_dir
assembled_anndata_dir
assembled_anndata
qc_dir
norm_correction_dir
clustering_dir
annotation_dir
'''

# Init directory tree object
tree = gentree.Tree()
# Set processing_dir
tree.processing_dir = path_out
# Set run and build directories
tree.run = run_id

In [None]:
#Creating directories to store the outputs and storing this information
output_dir = os.path.join(path_out, "results", run_id)
utils.create_dir(output_dir)

ch.write_info_txt(path_value=output_dir)  # Printing the output dir detailed in the info.txt
result_path = ch.fetch_info_txt()         # Loading the output path

---------

## Read in data

### Option 1: Read from h5ad

In [None]:
if from_h5ad:
    
    adata = sc.read_h5ad(h5ad_path)
    
    #Add information to the infoprocess
    cr.build_infor(adata, "Input_for_assembling", h5ad_path)
    cr.build_infor(adata, "Strategy", "Read from h5ad")

### Option 2: Assemble from preprocessing pipeline 'quant' folder

In [None]:
#Assembling an anndata object for velocity analysis
if assembling_10_velocity == True:
    
    #Set up the information below
    the_10X_yml = [    
        "sample1:condition:room_air",
        "sample2:condition:smooke_exposure",
    ]

    adata = assembler.from_quant(path_solo_quant, the_10X_yml)
    display(adata)

    #Add information to the infoprocess
    cr.build_infor(adata, "Input_for_assembling", path_solo_quant)
    cr.build_infor(adata, "Strategy", "Assembling for velocity")

### Option 3: Assembling an anndata object from 10X public data (mtx, barcodes, genes)

In [None]:
#Assembling an anndata object from 10X public data

if assembling_10_public == True:
    adata = assembler.from_mtx(path_public_10X)
    display(adata)

    #Add information to the infoprocess
    cr.build_infor(adata, "Input_for_assembling", path_public_10X)
    cr.build_infor(adata, "Strategy", "Assembling from public 10X data")

### Option 4: Convert from Seurat to anndata object

In [None]:
# Converting from Seurat to anndata object
if convert_seurat == True:
    adata = converter.convertToAdata(file=path_rds)

    #Add information to the infoprocess
    #Anndata, key and value for anndata.uns["infoprocess"]
    cr.build_infor(adata, "Input_for_assembling", path_rds)
    cr.build_infor(adata, "Strategy", "Assembling from RDS data")

------------

## Saving the loaded anndata object

In [None]:
#Overview of loaded adata
display(adata)

In [None]:
adata.uns["infoprocess"]["Test_number"] = run_id # hack

In [None]:
#Saving the data
cr.build_infor(adata, "Run_id", run_id)
cr.build_infor(adata, "Anndata_path", result_path)

adata_output = result_path + "/anndata_1_" + run_id +".h5ad"
adata.write(filename=adata_output)