# Assembling

This notebook is used to assemble a processable h5ad object for subsequent notebooks.

## Fill in input data, output and settings

In [1]:
#Path related settings (these should be the same as for the previous notebook)
output_dir = '/mnt/workspace/jdetlef/processed_data' # specify the output directory of the processing should be the same in every notebook
test = 'Esophagus_146_0.01' # sample name, this should be the same for all the notebooks

In [2]:
################### DEFINING STRATEGY ###############################
from_h5ad = True #option 1
assembling_10_velocity = False #option 2
assembling_10_public = False #option 3
convert_seurat = False #option 4

###################### INPUT DATA ###################################

# For option 1: The path to an existing .h5ad file
h5ad_files = ['/mnt/workspace/jdetlef/data/anndata/esophagus_muscularis_146_0.01.h5ad']
#h5ad_files = ['/mnt/agnerds/PROJECTS/extern/ext442_scATAC_Glaser_11_22/preprocessing_output/data/all_annotated_peaks.h5ad']
merge_column='Sample_new' # (str) if multiple h5ad files merged this is an identifier from which file it originates
coordinate_cols=None # (list:str) columns where peak location data is stored (['chr', 'start', 'end'])
set_index=True # (boolean) should the adata.var index be formatted, that it matches chr:start-stop
index_from="name" # (str) should the index be generated from a certain column. Otherwise this is None (SnapATAC: name)

# Manually set existing QC Columns
n_features_by_counts = None
log1p_n_features_by_counts = None
total_counts = None
log1p_total_counts = None
mean_insertsize = None
insertsize_count = None
n_total_fragments = None
n_fragments_in_promoters = None
pct_fragments_in_promoters = None
blacklist_overlaps = None
# total_number_of_fragments
TN = None # 'TN'
# uniquely_mapped_fragments
UM = None # 'UM'
# properly_paired_fragments
PP = None # 'PP'
# uniq_fragments
UQ = None # 'UQ'
# chrM_fragments
CM = None # 'CM'

#################### Calc QC Columns ###############################
# set available number of threads
n_threads = 8

# Step 1: QC Metrics
calc_qc_metrics = True # set True if the QC metrics should be calculated 

# Step 2 & 3
use_bam = True # False if fragments should be used
barcode_tag = 'CB' # set the tag where to find the barcode to use
# if Step 2, 3 or both is executed specify either a BAM file or a fragments file:
bam_file =  '/mnt/workspace/jdetlef/data/bamfiles/sorted_esophagus_muscularis_146_0.01.bam' # specify bamfile of the corresponding alignment
fragments_file = '/mnt/workspace/jdetlef/data/bamfiles/fragments_esophagus_muscularis_146_0.01.bed' # specify fragments file (if this is possible use the fragments file directly)

# Step 2: Mean Insertsizes
calc_mean_insertsize = True # set True if the mean insertsize should be calculated

# Step 3: Promotor-enrichment
calc_promotor_enrichment = True # set True if the promotor-enrichment should be calculated
# specify promotors_gtf if another promoter than the fixed ones should be used
# if promotors is None please specify species
promoters_gtf =  '/mnt/workspace/jdetlef/data/homo_sapiens.104.promoters2000.gtf' # '/mnt/workspace/jdetlef/ext_ana/mus_musculus.104.promoters2000.gtf'
species = None

## Import modules

In [3]:
# sctoolbox modules 
import sctoolbox.atac_tree as sub_tree
import sctoolbox.creators as cr
import sctoolbox.fragment_length as fragments
import sctoolbox.atac as atac
import sctoolbox.calc_overlap_pct as overlap
import sctoolbox.analyser 
import sctoolbox.nucleosome_utils as nuc_utils
from sctoolbox.qc_filter import *
from sctoolbox.atac_utils import *
# import episcanpy
import episcanpy as epi

## Setup path handling object 

In [4]:
# make an instance of the class
tree = sub_tree.ATAC_tree()
# set processing/output directory
tree.processing_dir = output_dir
# set sample/experiment.. 
tree.run = test

all directories existing
all directories existing


## Read in data

### Option 1: Read from .h5ad

In [5]:
qc_columns = {}
qc_columns["n_features_by_counts"] = n_features_by_counts
qc_columns["log1p_n_features_by_counts"] = log1p_n_features_by_counts
qc_columns["total_counts"] = total_counts
qc_columns["log1p_total_counts"] = log1p_total_counts
qc_columns["mean_insertsize"] = mean_insertsize
qc_columns['n_total_fragments'] = n_total_fragments
qc_columns['n_fragments_in_promoters'] = n_fragments_in_promoters
qc_columns['pct_fragments_in_promoters'] = pct_fragments_in_promoters
qc_columns["blacklist_overlaps"] = blacklist_overlaps
qc_columns["TN"] = TN
qc_columns["UM"] = UM
qc_columns["PP"] = PP
qc_columns["UQ"] = UQ
qc_columns["CM"] = CM

In [6]:
if from_h5ad:
    
    adata = assemble_from_h5ad(h5ad_files,
                       qc_columns,
                       merge_column=merge_column,
                       coordinate_cols=coordinate_cols,
                       set_index=set_index,
                       index_from=index_from)



Index(['peak_chr', 'peak_start', 'peak_end'], dtype='object')
add existing adata.obs columns to infoprocess:

setting adata.obs.index = adata.obs[barcode]


## Inspect adata

In [7]:
display(adata)

AnnData object with n_obs × n_vars = 73652 × 174262
    obs: 'TN', 'UM', 'PP', 'UQ', 'CM', 'file', 'sample', 'Sample_new'
    var: 'peak_chr', 'peak_start', 'peak_end'
    uns: 'infoprocess', 'color_set'

In [8]:
display(adata.obs)

Unnamed: 0,TN,UM,PP,UQ,CM,file,sample,Sample_new
AAACTACCAGAAACCCGAGATA,33.0,18.0,18.0,15.0,0.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1
AAACTACCAGAAACCTAAGTGG,52.0,36.0,35.0,32.0,1.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1
AAACTACCAGAAACGGATCAGT,27.0,19.0,19.0,19.0,0.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1
AAACTACCAGAAACGTCCCGTT,6223.0,5231.0,5213.0,3779.0,18.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1
AAACTACCAGAAACTAGCCCTA,41.0,29.0,29.0,26.0,0.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1
...,...,...,...,...,...,...,...,...
TTCCATCCTCTTTCGCGTGTAA,9.0,6.0,6.0,6.0,0.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1
TTCCATCCTCTTTCTGCAGACT,10.0,6.0,6.0,6.0,1.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1
TTCCATCCTCTTTCTGGCGCAG,17.0,5.0,5.0,5.0,1.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1
TTCCATCCTCTTTGCCGGAAGG,8.0,7.0,7.0,7.0,0.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1


## Calculate missing columns

### 1. calculate scanpy qc_metrics (n_features)

In [9]:
if qc_columns['n_features_by_counts'] is None or qc_columns['log1p_n_features_by_counts'] is None and calc_qc_metrics:
    adata = analyser.calculate_qc_metrics(adata, var_type='features')
    
    qc_columns['n_features_by_counts'] = 'n_features_by_counts'
    qc_columns['log1p_n_features_by_counts'] = 'log1p_n_features_by_counts'
    qc_columns['total_counts'] = 'total_counts'
    qc_columns['log1p_total_counts'] = 'log1p_total_counts'
    
    build_legend(adata, 'n_features_by_counts', 'n_features_by_counts')
    build_legend(adata, 'log1p_n_features_by_counts', 'log1p_n_features_by_counts')
    build_legend(adata, 'total_counts', 'total_counts')
    build_legend(adata, 'log1p_total_counts', 'log1p_total_counts')

adata.obs

Unnamed: 0,TN,UM,PP,UQ,CM,file,sample,Sample_new,n_features_by_counts,log1p_n_features_by_counts,total_counts,log1p_total_counts
AAACTACCAGAAACCCGAGATA,33.0,18.0,18.0,15.0,0.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,5,1.791759,5.0,1.791759
AAACTACCAGAAACCTAAGTGG,52.0,36.0,35.0,32.0,1.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,17,2.890372,17.0,2.890372
AAACTACCAGAAACGGATCAGT,27.0,19.0,19.0,19.0,0.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,10,2.397895,10.0,2.397895
AAACTACCAGAAACGTCCCGTT,6223.0,5231.0,5213.0,3779.0,18.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,1648,7.407924,1791.0,7.491087
AAACTACCAGAAACTAGCCCTA,41.0,29.0,29.0,26.0,0.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,13,2.639057,13.0,2.639057
...,...,...,...,...,...,...,...,...,...,...,...,...
TTCCATCCTCTTTCGCGTGTAA,9.0,6.0,6.0,6.0,0.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,3,1.386294,3.0,1.386294
TTCCATCCTCTTTCTGCAGACT,10.0,6.0,6.0,6.0,1.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,3,1.386294,3.0,1.386294
TTCCATCCTCTTTCTGGCGCAG,17.0,5.0,5.0,5.0,1.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,0,0.000000,0.0,0.000000
TTCCATCCTCTTTGCCGGAAGG,8.0,7.0,7.0,7.0,0.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,5,1.791759,5.0,1.791759


### check barcode tag

In [10]:
check_barcode_tag(adata, bam_file, barcode_tag)

Barcode tag: OK


### 2. calc mean insertsize and count if missing 

In [11]:
adata = nuc_utils.add_chromatin_conditions(adata, fragments_file)

loading fragments...
Time for loading fragments:  108.36348223686218
calculate distribution...
Time for calculating distribution:  24.630712747573853
calculate maxima...
Time for calculating maxima:  7.017499208450317
calculate score...




Time for calculating score:  17.567495346069336


In [13]:
adata.obs

Unnamed: 0,TN,UM,PP,UQ,CM,file,sample,Sample_new,n_features_by_counts,log1p_n_features_by_counts,total_counts,log1p_total_counts,Mean-Fragment-Length,Median-Fragment-Length,Fragments,Fragment-Count,Distribution,Maxima,Maxima-Count,nucleosomal-score
AAACTACCAGAAACCCGAGATA,33.0,18.0,18.0,15.0,0.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,5,1.791759,5.0,1.791759,82.36,49,"[123, 46, 90, 49, 201, 61, 178, 44, 34, 42, 38]",11.0,"[9, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",[0],1.0,145.544305
AAACTACCAGAAACCTAAGTGG,52.0,36.0,35.0,32.0,1.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,17,2.890372,17.0,2.890372,170.38,138.5,"[72, 417, 148, 40, 36, 129, 207, 89, 166, 111,...",26.0,"[16, 7, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",[0],1.0,102.513844
AAACTACCAGAAACGGATCAGT,27.0,19.0,19.0,19.0,0.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,10,2.397895,10.0,2.397895,161.17,74.0,"[407, 78, 50, 52, 26, 70, 81, 178, 496, 28, 53...",12.0,"[8, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",[0],1.0,140.045502
AAACTACCAGAAACGTCCCGTT,6223.0,5231.0,5213.0,3779.0,18.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,1648,7.407924,1791.0,7.491087,144.03,106,"[60, 54, 31, 201, 41, 267, 49, 259, 275, 77, 6...",3475.0,"[2351, 871, 229, 20, 4, 0, 0, 0, 0, 0, 0, 0, 0...",[0],1.0,19.623836
AAACTACCAGAAACTAGCCCTA,41.0,29.0,29.0,26.0,0.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,13,2.639057,13.0,2.639057,160.5,131.5,"[56, 350, 37, 391, 212, 40, 207, 48, 49, 215]",10.0,"[5, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",[0],1.0,152.003069
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTCCATCCTCTTTCGCGTGTAA,9.0,6.0,6.0,6.0,0.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,3,1.386294,3.0,1.386294,125.8,94,"[91, 94, 199, 77, 168]",5.0,"[4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",[0],1.0,220.573902
TTCCATCCTCTTTCTGCAGACT,10.0,6.0,6.0,6.0,1.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,3,1.386294,3.0,1.386294,87.0,58,"[58, 147, 56]",3.0,"[3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",[0],1.0,324.955404
TTCCATCCTCTTTCTGGCGCAG,17.0,5.0,5.0,5.0,1.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,0,0.000000,0.0,0.000000,86.0,46,"[176, 36, 46]",3.0,"[2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",[0],1.0,324.955404
TTCCATCCTCTTTGCCGGAAGG,8.0,7.0,7.0,7.0,0.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,5,1.791759,5.0,1.791759,163.5,163.5,"[266, 61]",2.0,"[1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",[],0.0,inf


In [14]:
#if (qc_columns['mean_insertsize'] is None or qc_columns['insertsize_count'] is None) and calc_mean_insertsize:
#    
#    if use_bam:
#        atac.add_insertsize(adata, bam=bam_file, barcode_tag=barcode_tag)     
#    else:
#        atac.add_insertsize(adata, fragments=fragments_file, barcode_tag=barcode_tag)
#        
#    # update column dictionary
#    qc_columns['mean_insertsize'] = 'mean_insertsize'
#    qc_columns['insertsize_count'] = 'insertsize_count'
#    
#    # update infoprocess table
#    build_legend(adata, 'mean_insertsize', 'mean_insertsize')
#    build_legend(adata, 'insertsize_count', 'insertsize_count')
#    
#    atac.plot_insertsize(adata, barcodes=None)
#    
#adata.obs

### 3. promotor enrichment 

In [15]:
if qc_columns['n_fragments_in_promoters'] is None and calc_promotor_enrichment:

    if use_bam:
        overlap.pct_fragments_in_promoters(adata, gtf_file=promoters_gtf, species=species, bam_file=bam_file, cb_col=None, nproc=1, cb_tag=barcode_tag)
    else:
        overlap.pct_fragments_in_promoters(adata, gtf_file=promoters_gtf, species=species, fragments_file=fragments_file, cb_col=None, nproc=1, cb_tag=barcode_tag)

    # n_total_fragments, n_fragments_in_promoters, pct_fragments_in_promoters
    # update column dictionary
    qc_columns['n_total_fragments'] = 'n_total_fragments'
    qc_columns['n_fragments_in_promoters'] = 'n_fragments_in_promoters'
    qc_columns['pct_fragments_in_promoters'] = 'pct_fragments_in_promoters'
    # update infoprocess table
    build_legend(adata, 'n_total_fragments', 'n_total_fragments')
    build_legend(adata, 'n_fragments_in_promoters', 'n_fragments_in_promoters')
    build_legend(adata, 'pct_fragments_in_promoters', 'pct_fragments_in_promoters')


Converting GTF to BED...
Converting BAM to fragments file! This may take a while...
Finished creating fragments file. Now sorting...
Finished sorting fragments
Finding overlaps...
Calculating percentage...


Progress:   0%|          | 0/15 [00:00<?, ?it/s]

Progress:   0%|          | 0/76 [00:00<?, ?it/s]

Adding results to adata object...
Done


## Save adata to .h5ad

In [16]:
adata_output = tree.assembled_anndata
adata_output

'/mnt/workspace/jdetlef/processed_data/Esophagus_146_0.01/assembling/anndata/Esophagus_146_0.01.h5ad'

In [19]:
adata.obs

Unnamed: 0,TN,UM,PP,UQ,CM,file,sample,Sample_new,n_features_by_counts,log1p_n_features_by_counts,...,Median-Fragment-Length,Fragments,Fragment-Count,Distribution,Maxima,Maxima-Count,nucleosomal-score,n_fragments_in_promoters,n_total_fragments,pct_fragments_in_promoters
AAACTACCAGAAACCCGAGATA,33.0,18.0,18.0,15.0,0.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,5,1.791759,...,49,"[123, 46, 90, 49, 201, 61, 178, 44, 34, 42, 38]",11.0,"[9, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",[0],1.0,145.544305,4.0,16.0,0.250000
AAACTACCAGAAACCTAAGTGG,52.0,36.0,35.0,32.0,1.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,17,2.890372,...,138.5,"[72, 417, 148, 40, 36, 129, 207, 89, 166, 111,...",26.0,"[16, 7, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",[0],1.0,102.513844,5.0,28.0,0.178571
AAACTACCAGAAACGGATCAGT,27.0,19.0,19.0,19.0,0.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,10,2.397895,...,74.0,"[407, 78, 50, 52, 26, 70, 81, 178, 496, 28, 53...",12.0,"[8, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",[0],1.0,140.045502,5.0,19.0,0.263158
AAACTACCAGAAACGTCCCGTT,6223.0,5231.0,5213.0,3779.0,18.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,1648,7.407924,...,106,"[60, 54, 31, 201, 41, 267, 49, 259, 275, 77, 6...",3475.0,"[2351, 871, 229, 20, 4, 0, 0, 0, 0, 0, 0, 0, 0...",[0],1.0,19.623836,1296.0,5264.0,0.246201
AAACTACCAGAAACTAGCCCTA,41.0,29.0,29.0,26.0,0.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,13,2.639057,...,131.5,"[56, 350, 37, 391, 212, 40, 207, 48, 49, 215]",10.0,"[5, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",[0],1.0,152.003069,6.0,19.0,0.315789
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTCCATCCTCTTTCGCGTGTAA,9.0,6.0,6.0,6.0,0.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,3,1.386294,...,94,"[91, 94, 199, 77, 168]",5.0,"[4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",[0],1.0,220.573902,1.0,6.0,0.166667
TTCCATCCTCTTTCTGCAGACT,10.0,6.0,6.0,6.0,1.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,3,1.386294,...,58,"[58, 147, 56]",3.0,"[3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",[0],1.0,324.955404,3.0,7.0,0.428571
TTCCATCCTCTTTCTGGCGCAG,17.0,5.0,5.0,5.0,1.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,0,0.000000,...,46,"[176, 36, 46]",3.0,"[2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",[0],1.0,324.955404,4.0,9.0,0.444444
TTCCATCCTCTTTGCCGGAAGG,8.0,7.0,7.0,7.0,0.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,5,1.791759,...,163.5,"[266, 61]",2.0,"[1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",[],0.0,inf,1.0,5.0,0.200000


In [25]:
adata.obs['Fragments'] = adata.obs['Fragments'].astype(str)
adata.obs['Distribution'] = adata.obs['Distribution'].astype(str)
#adata.obs['Distribution'] = adata.obs['Distribution'].astype(str)

In [27]:
adata.obs['Maxima'] = adata.obs['Maxima'].astype(str)

In [30]:
for entry in adata.obs:
    print(adata.obs[entry])

AAACTACCAGAAACCCGAGATA      33.0
AAACTACCAGAAACCTAAGTGG      52.0
AAACTACCAGAAACGGATCAGT      27.0
AAACTACCAGAAACGTCCCGTT    6223.0
AAACTACCAGAAACTAGCCCTA      41.0
                           ...  
TTCCATCCTCTTTCGCGTGTAA       9.0
TTCCATCCTCTTTCTGCAGACT      10.0
TTCCATCCTCTTTCTGGCGCAG      17.0
TTCCATCCTCTTTGCCGGAAGG       8.0
TTCCATCCTCTTTGTGTTACCG      19.0
Name: TN, Length: 73652, dtype: float64
AAACTACCAGAAACCCGAGATA      18.0
AAACTACCAGAAACCTAAGTGG      36.0
AAACTACCAGAAACGGATCAGT      19.0
AAACTACCAGAAACGTCCCGTT    5231.0
AAACTACCAGAAACTAGCCCTA      29.0
                           ...  
TTCCATCCTCTTTCGCGTGTAA       6.0
TTCCATCCTCTTTCTGCAGACT       6.0
TTCCATCCTCTTTCTGGCGCAG       5.0
TTCCATCCTCTTTGCCGGAAGG       7.0
TTCCATCCTCTTTGTGTTACCG       9.0
Name: UM, Length: 73652, dtype: float64
AAACTACCAGAAACCCGAGATA      18.0
AAACTACCAGAAACCTAAGTGG      35.0
AAACTACCAGAAACGGATCAGT      19.0
AAACTACCAGAAACGTCCCGTT    5213.0
AAACTACCAGAAACTAGCCCTA      29.0
                           ..

In [45]:
#Saving the data
cr.build_infor(adata, "Test_number", test)
cr.build_infor(adata, "Anndata_path", output_dir)

adata_output = tree.assembled_anndata
adata.write(filename=adata_output)

TypeError: Can't implicitly convert non-string objects to strings

Above error raised while writing key 'Mean-Fragment-Length' of <class 'h5py._hl.group.Group'> to /

In [None]:
import os
import shutil
repo_path = os.getcwd()
notebook_name = '1_assembling.ipynb'
notebook_path = os.path.join(repo_path, notebook_name)
notebook_copy = os.path.join(tree.assemble_dir , notebook_name)
shutil.copyfile(notebook_path, notebook_copy)

In [43]:
adata.obs.pop("Maxima")

AAACTACCAGAAACCCGAGATA    [0]
AAACTACCAGAAACCTAAGTGG    [0]
AAACTACCAGAAACGGATCAGT    [0]
AAACTACCAGAAACGTCCCGTT    [0]
AAACTACCAGAAACTAGCCCTA    [0]
                         ... 
TTCCATCCTCTTTCGCGTGTAA    [0]
TTCCATCCTCTTTCTGCAGACT    [0]
TTCCATCCTCTTTCTGGCGCAG    [0]
TTCCATCCTCTTTGCCGGAAGG     []
TTCCATCCTCTTTGTGTTACCG    [0]
Name: Maxima, Length: 73652, dtype: category
Categories (89, object): ['[0, 3]', '[0, 4]', '[0, 5]', '[0, 6, 9]', ..., '[6]', '[9]', '[]', 'nan']

In [44]:
adata.obs >= 

Unnamed: 0,TN,UM,PP,UQ,CM,file,sample,Sample_new,n_features_by_counts,log1p_n_features_by_counts,total_counts,log1p_total_counts,Mean-Fragment-Length,Median-Fragment-Length,Fragment-Count,Maxima-Count,nucleosomal-score,n_fragments_in_promoters,n_total_fragments,pct_fragments_in_promoters
AAACTACCAGAAACCCGAGATA,33.0,18.0,18.0,15.0,0.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,5,1.791759,5.0,1.791759,82.36,49,11.0,1.0,145.544305,4.0,16.0,0.250000
AAACTACCAGAAACCTAAGTGG,52.0,36.0,35.0,32.0,1.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,17,2.890372,17.0,2.890372,170.38,138.5,26.0,1.0,102.513844,5.0,28.0,0.178571
AAACTACCAGAAACGGATCAGT,27.0,19.0,19.0,19.0,0.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,10,2.397895,10.0,2.397895,161.17,74.0,12.0,1.0,140.045502,5.0,19.0,0.263158
AAACTACCAGAAACGTCCCGTT,6223.0,5231.0,5213.0,3779.0,18.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,1648,7.407924,1791.0,7.491087,144.03,106,3475.0,1.0,19.623836,1296.0,5264.0,0.246201
AAACTACCAGAAACTAGCCCTA,41.0,29.0,29.0,26.0,0.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,13,2.639057,13.0,2.639057,160.5,131.5,10.0,1.0,152.003069,6.0,19.0,0.315789
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTCCATCCTCTTTCGCGTGTAA,9.0,6.0,6.0,6.0,0.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,3,1.386294,3.0,1.386294,125.8,94,5.0,1.0,220.573902,1.0,6.0,0.166667
TTCCATCCTCTTTCTGCAGACT,10.0,6.0,6.0,6.0,1.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,3,1.386294,3.0,1.386294,87.0,58,3.0,1.0,324.955404,3.0,7.0,0.428571
TTCCATCCTCTTTCTGGCGCAG,17.0,5.0,5.0,5.0,1.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,0,0.000000,0.0,0.000000,86.0,46,3.0,1.0,324.955404,4.0,9.0,0.444444
TTCCATCCTCTTTGCCGGAAGG,8.0,7.0,7.0,7.0,0.0,/mnt/workspace/jdetlef/data/anndata/esophagus_...,MB,sample1,5,1.791759,5.0,1.791759,163.5,163.5,2.0,0.0,inf,1.0,5.0,0.200000
