# Assembling

This notebook is used to assemble a processable h5ad object for subsequent notebooks.

## Fill in input data, output and settings

In [1]:
####################### TEST NAME ###################################
test = 'Aorta'
################### DEFINING STRATEGY ###############################
from_h5ad = True #option 1
assembling_10_velocity = False #option 2
assembling_10_public = False #option 3
convert_seurat = False #option 4

###################### INPUT DATA ###################################

#For option 1: The path to an existing .h5ad file
h5ad_path = '/home/rstudio/data/anndata/ENC-1JKYN-146-SM-A8CPH_snATAC_esophagus_muscularis_mucosa_Rep1.h5ad'

##################### OUTPUT DATA ###################################
output_dir = '/home/rstudio/processed_data'

## Import modules

In [2]:
# sctoolbox modules 
import sctoolbox.atac_tree as sub_tree
import sctoolbox.creators as cr
# import episcanpy
import episcanpy as epi

## Setup path handling object 

In [3]:
# make an instance of the class
tree = sub_tree.ATAC_tree()
# set processing/output directory
tree.processing_dir = output_dir
# set sample/experiment.. 
tree.run = test

/home/rstudio/processed_data/Aorta/assembling/anndata: NEWLY SETUP


## Read in data

### Option 1: Read from .h5ad

In [4]:
if from_h5ad:
    
    adata = epi.read_h5ad(h5ad_path)
    
    #Add information to the infoprocess
    cr.build_infor(adata, "Input_for_assembling", h5ad_path)
    cr.build_infor(adata, "Strategy", "Read from h5ad")

## Inspect adata

In [5]:
display(adata)

AnnData object with n_obs × n_vars = 73652 × 150152
    obs: 'barcode', 'TN', 'UM', 'PP', 'UQ', 'CM', 'file', 'sample'
    uns: 'infoprocess', 'color_set'

In [6]:
display(adata.var)

0
1
2
3
4
...
150147
150148
150149
150150
150151


In [7]:
display(adata.obs)

Unnamed: 0,barcode,TN,UM,PP,UQ,CM,file,sample
1,AAACTACCAGAAACCCGAGATA,33.0,18.0,18.0,15.0,0.0,/home/rstudio/data/snap/ENC-1JKYN-146-SM-A8CPH...,MB
2,AAACTACCAGAAACCTAAGTGG,52.0,36.0,35.0,32.0,1.0,/home/rstudio/data/snap/ENC-1JKYN-146-SM-A8CPH...,MB
3,AAACTACCAGAAACGGATCAGT,27.0,19.0,19.0,19.0,0.0,/home/rstudio/data/snap/ENC-1JKYN-146-SM-A8CPH...,MB
4,AAACTACCAGAAACGTCCCGTT,6223.0,5231.0,5213.0,3779.0,18.0,/home/rstudio/data/snap/ENC-1JKYN-146-SM-A8CPH...,MB
5,AAACTACCAGAAACTAGCCCTA,41.0,29.0,29.0,26.0,0.0,/home/rstudio/data/snap/ENC-1JKYN-146-SM-A8CPH...,MB
...,...,...,...,...,...,...,...,...
73648,TTCCATCCTCTTTCGCGTGTAA,9.0,6.0,6.0,6.0,0.0,/home/rstudio/data/snap/ENC-1JKYN-146-SM-A8CPH...,MB
73649,TTCCATCCTCTTTCTGCAGACT,10.0,6.0,6.0,6.0,1.0,/home/rstudio/data/snap/ENC-1JKYN-146-SM-A8CPH...,MB
73650,TTCCATCCTCTTTCTGGCGCAG,17.0,5.0,5.0,5.0,1.0,/home/rstudio/data/snap/ENC-1JKYN-146-SM-A8CPH...,MB
73651,TTCCATCCTCTTTGCCGGAAGG,8.0,7.0,7.0,7.0,0.0,/home/rstudio/data/snap/ENC-1JKYN-146-SM-A8CPH...,MB


## Check for QC related columns

In [8]:
# MISSING

## Calculate missing columns

In [9]:
# MISSING

## Inspect adata.obs

In [10]:
adata.obs

Unnamed: 0,barcode,TN,UM,PP,UQ,CM,file,sample
1,AAACTACCAGAAACCCGAGATA,33.0,18.0,18.0,15.0,0.0,/home/rstudio/data/snap/ENC-1JKYN-146-SM-A8CPH...,MB
2,AAACTACCAGAAACCTAAGTGG,52.0,36.0,35.0,32.0,1.0,/home/rstudio/data/snap/ENC-1JKYN-146-SM-A8CPH...,MB
3,AAACTACCAGAAACGGATCAGT,27.0,19.0,19.0,19.0,0.0,/home/rstudio/data/snap/ENC-1JKYN-146-SM-A8CPH...,MB
4,AAACTACCAGAAACGTCCCGTT,6223.0,5231.0,5213.0,3779.0,18.0,/home/rstudio/data/snap/ENC-1JKYN-146-SM-A8CPH...,MB
5,AAACTACCAGAAACTAGCCCTA,41.0,29.0,29.0,26.0,0.0,/home/rstudio/data/snap/ENC-1JKYN-146-SM-A8CPH...,MB
...,...,...,...,...,...,...,...,...
73648,TTCCATCCTCTTTCGCGTGTAA,9.0,6.0,6.0,6.0,0.0,/home/rstudio/data/snap/ENC-1JKYN-146-SM-A8CPH...,MB
73649,TTCCATCCTCTTTCTGCAGACT,10.0,6.0,6.0,6.0,1.0,/home/rstudio/data/snap/ENC-1JKYN-146-SM-A8CPH...,MB
73650,TTCCATCCTCTTTCTGGCGCAG,17.0,5.0,5.0,5.0,1.0,/home/rstudio/data/snap/ENC-1JKYN-146-SM-A8CPH...,MB
73651,TTCCATCCTCTTTGCCGGAAGG,8.0,7.0,7.0,7.0,0.0,/home/rstudio/data/snap/ENC-1JKYN-146-SM-A8CPH...,MB


## Save adata to .h5ad

In [11]:
adata_output = tree.assembled_anndata
adata_output

'/home/rstudio/processed_data/Aorta/assembling/anndata/Aorta.h5ad'

In [12]:
#Saving the data
cr.build_infor(adata, "Test_number", test)
cr.build_infor(adata, "Anndata_path", output_dir)

adata_output = tree.assembled_anndata
adata.write(filename=adata_output)