In [1]:
import pandas as pd
from pybedtools import BedTool
import pathlib
import anndata
import numpy as np
from scipy.sparse import coo_matrix, hstack

In [2]:
dmr_bed = '/home/hanliu/project/mouse_rostral_brain/DMR/DGmCHGroup/DMR/TotalDMR.nofilter.bed'


In [3]:
# Parameters
dmr_bed = "/home/hanliu/project/mouse_rostral_brain/DMR/SubType/raw/SubType-chr5/TotalDMR.nofilter.bed"


In [4]:
output_dir = pathlib.Path(dmr_bed).parent / 'Annotation'

In [5]:
# genome features
genome_features = {
    'CGI_promoter':
    '/home/hanliu/ref/mouse/genome_feature/sort_and_merge/CGI_promoter.all.merge.sort.bed',
    'exon':
    '/home/hanliu/ref/mouse/genome_feature/sort_and_merge/exon.all.merge.sort.bed',
    'exon_first':
    '/home/hanliu/ref/mouse/genome_feature/sort_and_merge/exon.first.merge.sort.bed',
    'gene_all':
    '/home/hanliu/ref/mouse/genome_feature/sort_and_merge/gene.all.merge.sort.bed',
    'gene_lincRNA':
    '/home/hanliu/ref/mouse/genome_feature/sort_and_merge/gene.lincRNA.merge.sort.bed',
    'gene_protein_coding':
    '/home/hanliu/ref/mouse/genome_feature/sort_and_merge/gene.protein_coding.merge.sort.bed',
    'intron':
    '/home/hanliu/ref/mouse/genome_feature/sort_and_merge/intron.all.merge.sort.bed',
    'intron_first':
    '/home/hanliu/ref/mouse/genome_feature/sort_and_merge/intron.first.merge.sort.bed',
    'Non_CGI_promoter':
    '/home/hanliu/ref/mouse/genome_feature/sort_and_merge/Non_CGI_promoter.all.merge.sort.bed',
    'promoter':
    '/home/hanliu/ref/mouse/genome_feature/sort_and_merge/promoter.all.merge.sort.bed',
    'start_codon':
    '/home/hanliu/ref/mouse/genome_feature/sort_and_merge/start_codon.all.merge.sort.bed',
    'stop_codon':
    '/home/hanliu/ref/mouse/genome_feature/sort_and_merge/stop_codon.all.merge.sort.bed',
    'transcript':
    '/home/hanliu/ref/mouse/genome_feature/sort_and_merge/transcript.all.merge.sort.bed',
    'TSS':
    '/home/hanliu/ref/mouse/genome_feature/sort_and_merge/TSS.all.merge.sort.bed',
    'UTR3':
    '/home/hanliu/ref/mouse/genome_feature/sort_and_merge/UTR3.all.merge.sort.bed',
    'UTR5':
    '/home/hanliu/ref/mouse/genome_feature/sort_and_merge/UTR5.all.merge.sort.bed',
    'CGI':
    '/home/hanliu/ref/mouse/genome_feature/sort_and_merge/CGI.merge.sort.bed',
    'CGI_Shore':
    '/home/hanliu/ref/mouse/genome_feature/sort_and_merge/CGI_Shore.merge.sort.bed',
    'splicing_site_slop100':
    '/home/hanliu/ref/mouse/genome_feature/sort_and_merge/splicing_site_slop100.merge.sort.bed',
    'splicing_site_slop1000':
    '/home/hanliu/ref/mouse/genome_feature/sort_and_merge/splicing_site_slop1000.merge.sort.bed'
}

# different kinds of TEs
te_beds = list(
    pathlib.Path('/home/hanliu/ref/mouse/ucsc/TE_Beds').glob('*bed'))
te_beds = {'.'.join(p.name.split('.')[:2]): p for p in te_beds}

# bulk published DMRs
bulk_dmr = {
    'feDMR':
    '/home/hanliu/ref/inhouse/He_2020_Nature_Mouse_Tissue_Developmental/feDMR_NT_FB_MB_HB.bed',
    'adultDMR':
    '/home/hanliu/ref/inhouse/Hon_Adult_Mouse_Tissue_DMR/Non-NS.no_overlap_with_NS.mm10.bed'
}

# atac peaks
peak_beds = list(
    pathlib.Path('/home/hanliu/project/mouse_rostral_brain/ATAC/peak/').glob(
        '*bed'))
peak_beds = {'.'.join(p.name.split('.')[:3]): p for p in peak_beds}

In [6]:
dmr_bed = BedTool(dmr_bed)

## Intersect

In [7]:
dicts = {
    'GenomeFeature': genome_features,
    'TE': te_beds,
    'BulkDMR': bulk_dmr,
    'ATACPeak': peak_beds
}

for name, path_dict in dicts.items():
    _output_dir = pathlib.Path(output_dir) / name
    pathlib.Path(_output_dir).mkdir(exist_ok=True, parents=True)
    
    for fname, path in path_dict.items():
        print(name, fname)
        this_bed = BedTool(str(path))
        dmr_bed.intersect(this_bed, wa=True, u=True).saveas(f'{_output_dir}/{fname}.bed')


GenomeFeature CGI_promoter


GenomeFeature exon


GenomeFeature exon_first


GenomeFeature gene_all


GenomeFeature gene_lincRNA
GenomeFeature gene_protein_coding


GenomeFeature intron


GenomeFeature intron_first


GenomeFeature Non_CGI_promoter


GenomeFeature promoter


GenomeFeature start_codon


GenomeFeature stop_codon


GenomeFeature transcript


GenomeFeature TSS


GenomeFeature UTR3


GenomeFeature UTR5


GenomeFeature CGI


GenomeFeature CGI_Shore


GenomeFeature splicing_site_slop100


GenomeFeature splicing_site_slop1000


TE DNA.DNA
TE DNA.MULE-MuDR


TE DNA.MuDR
TE DNA.PiggyBac


TE DNA.TcMar
TE DNA.TcMar-Mariner


TE DNA.TcMar-Pogo
TE DNA.TcMar-Tc2


TE DNA.TcMar-Tigger
TE DNA.hAT


TE DNA.hAT-Blackjack


TE DNA.hAT-Charlie


TE DNA.hAT-Tip100
TE LINE.CR1


TE LINE.Dong-R4
TE LINE.L1


TE LINE.L2


TE LINE.RTE-BovB
TE LINE.RTE-X


TE LTR.ERV1


TE LTR.ERVK


TE LTR.ERVL


TE LTR.ERVL-MaLR


TE LTR.Gypsy
TE LTR.LTR


TE SINE.Alu


TE SINE.B2


TE SINE.B4


TE SINE.Deu
TE SINE.ID


TE SINE.MIR


TE SINE.tRNA
BulkDMR feDMR


BulkDMR adultDMR


ATACPeak GABA.CRc.CRc


ATACPeak GABA.Cge.Cge3


ATACPeak GABA.Cge.Cge5


ATACPeak GABA.Cge.Cge6


ATACPeak GABA.Cge.CgeHip


ATACPeak GABA.Cge.Lamp5


ATACPeak GABA.Cge.Vip


ATACPeak GABA.Hip.Hip


ATACPeak GABA.Mge.Mge1


ATACPeak GABA.Mge.Mge2


ATACPeak GABA.Mge.Mge3


ATACPeak GABA.Mge.Mge4


ATACPeak GABA.Mge.Mge5


ATACPeak GABA.Mge.Mge6


ATACPeak GABA.Mge.Mge7


ATACPeak GABA.Mge.Mge8


ATACPeak GABA.Mge.Mge9


ATACPeak GABA.Mge.Mge10


ATACPeak GABA.Mge.Mge11


ATACPeak GABA.Mge.Mge12


ATACPeak GABA.Msn.D1


ATACPeak GABA.Msn.D2


ATACPeak GABA.Msn.Foxp2


ATACPeak GABA.Msn.MSNOLF


ATACPeak GABA.Mxd1.Mxd1


ATACPeak GABA.Olf.OBDOP
ATACPeak GABA.Olf.OBGC


ATACPeak GABA.Olf.OBGLO
ATACPeak GABA.Olf.OBIGC


ATACPeak GABA.Olf.OBNBL


ATACPeak GABA.Sept.LSX


ATACPeak GABA.Sept.MA


ATACPeak GABA.Sept.MS


ATACPeak GABA.Sept.Sept1


ATACPeak GABA.Unk.Unk
ATACPeak Glutamate.GC.GC


ATACPeak NonN.Asc.AscNt


ATACPeak NonN.Asc.AscT


ATACPeak NonN.Asc.RGDG


ATACPeak NonN.Mgc.Mgc


ATACPeak NonN.Ogc.Mfol


ATACPeak NonN.Ogc.Mol


ATACPeak NonN.Opc.Opc


ATACPeak NonN.Vc.Vec1


ATACPeak NonN.Vc.Vec3


ATACPeak NonN.Vc.Vec5


ATACPeak NonN.Vc.Vlmc


ATACPeak NonN.Vc.Vpia


ATACPeak Glutamate.CLA.CLA


ATACPeak Glutamate.CT.CT1


ATACPeak Glutamate.CT.CT2


ATACPeak Glutamate.CT.L6b


ATACPeak Glutamate.HIP.CA1


ATACPeak Glutamate.HIP.CA1p


ATACPeak Glutamate.HIP.CA2


ATACPeak Glutamate.HIP.CA3


ATACPeak Glutamate.HIP.CA4


ATACPeak Glutamate.HIP.CRc


ATACPeak Glutamate.HIP.Mossy


ATACPeak Glutamate.HIP.NBL


ATACPeak Glutamate.HIP.unk1


ATACPeak Glutamate.HIP.unk2
ATACPeak Glutamate.IT.L4


ATACPeak Glutamate.IT.L5


ATACPeak Glutamate.IT.L6


ATACPeak Glutamate.IT.L23


ATACPeak Glutamate.NP.NP1


ATACPeak Glutamate.NP.NP2


ATACPeak Glutamate.NP.NPHIP
ATACPeak Glutamate.PIR.OLFdeep


ATACPeak Glutamate.PIR.Pir1


ATACPeak Glutamate.PIR.Pir2


ATACPeak Glutamate.PIR.Pir3


ATACPeak Glutamate.PIR.Pir4


ATACPeak Glutamate.PT.Pt


ATACPeak Glutamate.PT.Ptlsx


ATACPeak NonN.Asc.Myoc


ATACPeak NonN.Asc.NIPC


ATACPeak NonN.Asc.RGSZ


ATACPeak NonN.Ogc.Cop


ATACPeak NonN.Ogc.Nfol


ATACPeak NonN.Vc.Per


ATACPeak NonN.Vc.Vec2


ATACPeak NonN.Vc.Vec4


ATACPeak rs1atac.merged.reproduced


## Reverse intersect

In [8]:
# dicts = {
#     'GenomeFeature': genome_features,
#     'TE': te_beds,
#     'BulkDMR': bulk_dmr,
#     'ATACPeak': peak_beds
# }
# 
# for name, path_dict in dicts.items():
#     output_dir = f'reverse_intersect/{name}'
#     pathlib.Path(output_dir).mkdir(exist_ok=True)
#     
#     for fname, path in path_dict.items():
#         print(name, fname)
#         this_bed = BedTool(str(path))
#         this_bed.intersect(dmr_bed, wa=True, u=True).saveas(f'{output_dir}/{fname}.bed')
# 

## Assemble Adata
- Genome features, ATAC Peak in each cell type, motif hits, bulk dmrs, TEs


In [9]:
dmr_bed_df = dmr_bed.to_dataframe()
dmr_int_map = {v: k for k, v in dmr_bed_df['name'].items()}

In [10]:
beds = {}
for name in dicts.keys():
    paths = (pathlib.Path(output_dir) / name).glob('*bed')
    for p in paths:
        pname = p.name[:-4]
        beds[pname] = str(p)

In [11]:
names_int_map = {}
xs = []
ys = []
datas = []
for i, (name, bed) in enumerate(beds.items()):
    print(name)
    names_int_map[name] = i
    try:
        bed_df = pd.read_csv(bed, header=None, sep='\t', index_col=-1)
    except pd.errors.EmptyDataError:
        continue
    n = bed_df.shape[0]
    xs.append(bed_df.index.map(dmr_int_map).values)
    ys.append(np.array([i] * n))
    datas.append(np.array([True] * n))

CGI_promoter


exon


exon_first


gene_all


gene_lincRNA


gene_protein_coding


intron


intron_first


Non_CGI_promoter


promoter


start_codon


stop_codon


transcript


TSS


UTR3


UTR5


CGI


CGI_Shore


splicing_site_slop100


splicing_site_slop1000


DNA.DNA


DNA.MULE-MuDR
DNA.MuDR


DNA.PiggyBac


DNA.TcMar


DNA.TcMar-Mariner


DNA.TcMar-Pogo
DNA.TcMar-Tc2


DNA.TcMar-Tigger


DNA.hAT


DNA.hAT-Blackjack


DNA.hAT-Charlie


DNA.hAT-Tip100


LINE.CR1


LINE.Dong-R4


LINE.L1


LINE.L2


LINE.RTE-BovB


LINE.RTE-X


LTR.ERV1


LTR.ERVK


LTR.ERVL


LTR.ERVL-MaLR


LTR.Gypsy


LTR.LTR


SINE.Alu


SINE.B2


SINE.B4


SINE.Deu


SINE.ID


SINE.MIR


SINE.tRNA


feDMR


adultDMR


GABA.CRc.CRc


GABA.Cge.Cge3


GABA.Cge.Cge5


GABA.Cge.Cge6


GABA.Cge.CgeHip


GABA.Cge.Lamp5


GABA.Cge.Vip


GABA.Hip.Hip


GABA.Mge.Mge1


GABA.Mge.Mge2


GABA.Mge.Mge3


GABA.Mge.Mge4


GABA.Mge.Mge5


GABA.Mge.Mge6


GABA.Mge.Mge7


GABA.Mge.Mge8


GABA.Mge.Mge9


GABA.Mge.Mge10


GABA.Mge.Mge11


GABA.Mge.Mge12


GABA.Msn.D1


GABA.Msn.D2


GABA.Msn.Foxp2


GABA.Msn.MSNOLF


GABA.Mxd1.Mxd1


GABA.Olf.OBDOP


GABA.Olf.OBGC


GABA.Olf.OBGLO


GABA.Olf.OBIGC


GABA.Olf.OBNBL


GABA.Sept.LSX


GABA.Sept.MA


GABA.Sept.MS


GABA.Sept.Sept1


GABA.Unk.Unk


Glutamate.GC.GC


NonN.Asc.AscNt


NonN.Asc.AscT


NonN.Asc.RGDG


NonN.Mgc.Mgc


NonN.Ogc.Mfol


NonN.Ogc.Mol


NonN.Opc.Opc


NonN.Vc.Vec1


NonN.Vc.Vec3


NonN.Vc.Vec5


NonN.Vc.Vlmc


NonN.Vc.Vpia


Glutamate.CLA.CLA


Glutamate.CT.CT1


Glutamate.CT.CT2


Glutamate.CT.L6b


Glutamate.HIP.CA1


Glutamate.HIP.CA1p


Glutamate.HIP.CA2


Glutamate.HIP.CA3


Glutamate.HIP.CA4


Glutamate.HIP.CRc


Glutamate.HIP.Mossy


Glutamate.HIP.NBL


Glutamate.HIP.unk1


Glutamate.HIP.unk2


Glutamate.IT.L4


Glutamate.IT.L5


Glutamate.IT.L6


Glutamate.IT.L23


Glutamate.NP.NP1


Glutamate.NP.NP2


Glutamate.NP.NPHIP


Glutamate.PIR.OLFdeep


Glutamate.PIR.Pir1


Glutamate.PIR.Pir2


Glutamate.PIR.Pir3


Glutamate.PIR.Pir4


Glutamate.PT.Pt


Glutamate.PT.Ptlsx


NonN.Asc.Myoc


NonN.Asc.NIPC


NonN.Asc.RGSZ


NonN.Ogc.Cop


NonN.Ogc.Nfol


NonN.Vc.Per


NonN.Vc.Vec2


NonN.Vc.Vec4


rs1atac.merged.reproduced


In [12]:
csr = coo_matrix((np.concatenate(datas), 
                  (np.concatenate(xs), 
                   np.concatenate(ys))), 
                 shape=(len(dmr_int_map), 
                        len(names_int_map)))\
.tocsr()

feature_adata = anndata.AnnData(X=csr, 
                                obs=pd.DataFrame([], index=pd.Series(dmr_int_map).sort_values().index),
                                var=pd.DataFrame([], index=pd.Series(names_int_map).sort_values().index))


In [13]:
feature_annot = {
    'CGI_promoter': 'GenomeFeature',
    'exon': 'GenomeFeature',
    'exon_first': 'GenomeFeature',
    'gene_all': 'GenomeFeature',
    'gene_lincRNA': 'GenomeFeature',
    'gene_protein_coding': 'GenomeFeature',
    'intron': 'GenomeFeature',
    'intron_first': 'GenomeFeature',
    'Non_CGI_promoter': 'GenomeFeature',
    'promoter': 'GenomeFeature',
    'start_codon': 'GenomeFeature',
    'stop_codon': 'GenomeFeature',
    'transcript': 'GenomeFeature',
    'TSS': 'GenomeFeature',
    'UTR3': 'GenomeFeature',
    'UTR5': 'GenomeFeature',
    'CGI': 'GenomeFeature',
    'CGI_Shore': 'GenomeFeature',
    'splicing_site_slop100': 'GenomeFeature',
    'splicing_site_slop1000': 'GenomeFeature',
    'DNA.DNA': 'TE-DNA',
    'DNA.MULE-MuDR': 'TE-DNA',
    'DNA.MuDR': 'TE-DNA',
    'DNA.PiggyBac': 'TE-DNA',
    'DNA.TcMar': 'TE-DNA',
    'DNA.TcMar-Mariner': 'TE-DNA',
    'DNA.TcMar-Pogo': 'TE-DNA',
    'DNA.TcMar-Tc2': 'TE-DNA',
    'DNA.TcMar-Tigger': 'TE-DNA',
    'DNA.hAT': 'TE-DNA',
    'DNA.hAT-Blackjack': 'TE-DNA',
    'DNA.hAT-Charlie': 'TE-DNA',
    'DNA.hAT-Tip100': 'TE-DNA',
    'LINE.CR1': 'TE-LINE',
    'LINE.Dong-R4': 'TE-LINE',
    'LINE.L1': 'TE-LINE',
    'LINE.L2': 'TE-LINE',
    'LINE.RTE-BovB': 'TE-LINE',
    'LINE.RTE-X': 'TE-LINE',
    'LTR.ERV1': 'TE-LTR',
    'LTR.ERVK': 'TE-LTR',
    'LTR.ERVL': 'TE-LTR',
    'LTR.ERVL-MaLR': 'TE-LTR',
    'LTR.Gypsy': 'TE-LTR',
    'LTR.LTR': 'TE-LTR',
    'SINE.Alu': 'TE-SINE',
    'SINE.B2': 'TE-SINE',
    'SINE.B4': 'TE-SINE',
    'SINE.Deu': 'TE-SINE',
    'SINE.ID': 'TE-SINE',
    'SINE.MIR': 'TE-SINE',
    'SINE.tRNA': 'TE-SINE',
    'feDMR': 'BulkDMR',
    'adultDMR': 'BulkDMR',
    'GABA.CRc.CRc': 'ATACPeak',
    'GABA.Cge.Cge3': 'ATACPeak',
    'GABA.Cge.Cge5': 'ATACPeak',
    'GABA.Cge.Cge6': 'ATACPeak',
    'GABA.Cge.CgeHip': 'ATACPeak',
    'GABA.Cge.Lamp5': 'ATACPeak',
    'GABA.Cge.Vip': 'ATACPeak',
    'GABA.Hip.Hip': 'ATACPeak',
    'GABA.Mge.Mge1': 'ATACPeak',
    'GABA.Mge.Mge2': 'ATACPeak',
    'GABA.Mge.Mge3': 'ATACPeak',
    'GABA.Mge.Mge4': 'ATACPeak',
    'GABA.Mge.Mge5': 'ATACPeak',
    'GABA.Mge.Mge6': 'ATACPeak',
    'GABA.Mge.Mge7': 'ATACPeak',
    'GABA.Mge.Mge8': 'ATACPeak',
    'GABA.Mge.Mge9': 'ATACPeak',
    'GABA.Mge.Mge10': 'ATACPeak',
    'GABA.Mge.Mge11': 'ATACPeak',
    'GABA.Mge.Mge12': 'ATACPeak',
    'GABA.Msn.D1': 'ATACPeak',
    'GABA.Msn.D2': 'ATACPeak',
    'GABA.Msn.Foxp2': 'ATACPeak',
    'GABA.Msn.MSNOLF': 'ATACPeak',
    'GABA.Mxd1.Mxd1': 'ATACPeak',
    'GABA.Olf.OBDOP': 'ATACPeak',
    'GABA.Olf.OBGC': 'ATACPeak',
    'GABA.Olf.OBGLO': 'ATACPeak',
    'GABA.Olf.OBIGC': 'ATACPeak',
    'GABA.Olf.OBNBL': 'ATACPeak',
    'GABA.Sept.LSX': 'ATACPeak',
    'GABA.Sept.MA': 'ATACPeak',
    'GABA.Sept.MS': 'ATACPeak',
    'GABA.Sept.Sept1': 'ATACPeak',
    'GABA.Unk.Unk': 'ATACPeak',
    'Glutamate.GC.GC': 'ATACPeak',
    'NonN.Asc.AscNt': 'ATACPeak',
    'NonN.Asc.AscT': 'ATACPeak',
    'NonN.Asc.RGDG': 'ATACPeak',
    'NonN.Mgc.Mgc': 'ATACPeak',
    'NonN.Ogc.Mfol': 'ATACPeak',
    'NonN.Ogc.Mol': 'ATACPeak',
    'NonN.Opc.Opc': 'ATACPeak',
    'NonN.Vc.Vec1': 'ATACPeak',
    'NonN.Vc.Vec3': 'ATACPeak',
    'NonN.Vc.Vec5': 'ATACPeak',
    'NonN.Vc.Vlmc': 'ATACPeak',
    'NonN.Vc.Vpia': 'ATACPeak',
    'Glutamate.CLA.CLA': 'ATACPeak',
    'Glutamate.CT.CT1': 'ATACPeak',
    'Glutamate.CT.CT2': 'ATACPeak',
    'Glutamate.CT.L6b': 'ATACPeak',
    'Glutamate.HIP.CA1': 'ATACPeak',
    'Glutamate.HIP.CA1p': 'ATACPeak',
    'Glutamate.HIP.CA2': 'ATACPeak',
    'Glutamate.HIP.CA3': 'ATACPeak',
    'Glutamate.HIP.CA4': 'ATACPeak',
    'Glutamate.HIP.CRc': 'ATACPeak',
    'Glutamate.HIP.Mossy': 'ATACPeak',
    'Glutamate.HIP.NBL': 'ATACPeak',
    'Glutamate.HIP.unk1': 'ATACPeak',
    'Glutamate.HIP.unk2': 'ATACPeak',
    'Glutamate.IT.L4': 'ATACPeak',
    'Glutamate.IT.L5': 'ATACPeak',
    'Glutamate.IT.L6': 'ATACPeak',
    'Glutamate.IT.L23': 'ATACPeak',
    'Glutamate.NP.NP1': 'ATACPeak',
    'Glutamate.NP.NP2': 'ATACPeak',
    'Glutamate.NP.NPHIP': 'ATACPeak',
    'Glutamate.PIR.OLFdeep': 'ATACPeak',
    'Glutamate.PIR.Pir1': 'ATACPeak',
    'Glutamate.PIR.Pir2': 'ATACPeak',
    'Glutamate.PIR.Pir3': 'ATACPeak',
    'Glutamate.PIR.Pir4': 'ATACPeak',
    'Glutamate.PT.Pt': 'ATACPeak',
    'Glutamate.PT.Ptlsx': 'ATACPeak',
    'NonN.Asc.Myoc': 'ATACPeak',
    'NonN.Asc.NIPC': 'ATACPeak',
    'NonN.Asc.RGSZ': 'ATACPeak',
    'NonN.Ogc.Cop': 'ATACPeak',
    'NonN.Ogc.Nfol': 'ATACPeak',
    'NonN.Vc.Per': 'ATACPeak',
    'NonN.Vc.Vec2': 'ATACPeak',
    'NonN.Vc.Vec4': 'ATACPeak',
    'rs1atac.merged.reproduced': 'ATACPeakAll'
}

feature_adata.var['FeatureType'] = feature_adata.var_names.map(feature_annot)

In [14]:
total_adata = feature_adata
total_adata.write_h5ad(pathlib.Path(output_dir) / 'DMRAnnotation.h5ad')

... storing 'FeatureType' as categorical


In [15]:
total_adata

AnnData object with n_obs × n_vars = 283678 × 139 
    var: 'FeatureType'

In [16]:
total_adata.var['FeatureType'].value_counts()

ATACPeak         84
GenomeFeature    20
TE-DNA           13
TE-SINE           7
TE-LTR            6
TE-LINE           6
BulkDMR           2
ATACPeakAll       1
Name: FeatureType, dtype: int64