In [None]:
import matplotlib.pyplot as plt
import anndata
import scanpy as sc
import snapatac2 as snap
import numpy as np
import pandas as pd
import os
import scanpy.external as sce
#adata_concat = snap.read_dataset('output/motif/motif/motif/motif/motif/mouse_brain.h5ads')

In [None]:
file = "/data2st1/junyi/output/mouse_brain_dar.h5ad"

In [None]:
adata = anndata.read_h5ad(file)

In [None]:
snap.pl.umap(adata, color="celltype.L1.tab", interactive=False,  marker_size=3)


In [None]:
celltypes = adata.obs["celltype.L1.tab"].unique()
region = ['AMY','HIP','PFC']
print(celltypes)
print(region)

In [None]:
rergion = 'HIP'
celltype = 'Neuron'
base_name = f"{region}_{celltype}"

In [None]:
adata_AMY_neuron = adata[(adata.obs['sample'].str.contains(rergion)) & (adata.obs['celltype.L1.tab'].str.contains(celltype)) ].copy()
adata_AMY_neuron


In [None]:
adata_AMY_neuron.obs['expriment'] = adata_AMY_neuron.obs['sample'].str[:2]


In [None]:
snap.pl.umap(adata_AMY_neuron, color="sample", interactive=False,  marker_size=3)


In [None]:
%%time
snap.tl.macs3(adata_AMY_neuron, groupby='expriment')


In [None]:
adata_AMY_neuron

In [None]:
%%time
peaks = snap.tl.merge_peaks(adata_AMY_neuron.uns['macs3'], snap.genome.GRCm39)
peaks.head()


In [None]:
%%time
peak_mat = snap.pp.make_peak_matrix(adata_AMY_neuron, use_rep=peaks['Peaks'])
peak_mat


In [None]:
peak_mat.layers['raw'] = peak_mat.X.copy()

In [None]:
sc.pp.normalize_total(peak_mat)

In [None]:
sc.pp.log1p(peak_mat)

In [None]:
%%time
sc.tl.rank_genes_groups(peak_mat, groupby='expriment', method='wilcoxon',pts=True)

In [None]:
peak_mat.obsm['X_umap'] = adata_AMY_neuron.obsm['X_umap']

In [None]:
sc.pl.umap(peak_mat, color='expriment', size=50)

In [None]:
sc.pl.violin(peak_mat, keys='chr6:145133176-145133677', groupby='expriment')

In [None]:
df = sc.get.rank_genes_groups_df(peak_mat, group='MC', key='rank_genes_groups',pval_cutoff=0.05)

In [None]:
df.to_csv(f"output/motif/{rergion}_{celltype}_wilcoxon.csv")
#df.to_csv("output/AMY_neuron_MC_wilcoxon.csv")

In [None]:
#df.sort_values('logfoldchanges',ascending=False).names.str.split(r'[-:]', expand=True).to_csv("output/AMY_neuron_MC_wilcoxon.bed",sep='\t', header=False, index=False) 

In [None]:
sc.pl.umap(peak_mat, color=df.sort_values('logfoldchanges',ascending=False).head(10).names, size=50)

In [None]:
peak_mat.uns['rank_genes_groups']

In [None]:
# Create a pandas dataframe
df = pd.DataFrame({group + '_' + key: rank_gene_groups[key][group]
                   for group in rank_gene_groups.keys()
                   for key in ['names', 'scores']})

In [None]:
df_mw = sc.get.rank_genes_groups_df(peak_mat, group='MW', key='rank_genes_groups',pval_cutoff=0.05)

In [None]:
df_mw.to_csv(f"output/motif/{rergion}_{celltype}_MW.csv")

In [None]:
# %%time
# marker_peaks = snap.tl.marker_regions(peak_mat, groupby='expriment', pvalue=0.05)


In [None]:
marker_peaks = {"MC": df[df.logfoldchanges>0].set_index('names').index,"MW":df_mw[df_mw.logfoldchanges>0].set_index('names').index}

In [None]:
marker_peaks

In [None]:
from snapatac2._snapatac2 import read_motifs, PyDNAMotif

def cis_bp_mouse(unique: bool = True , path="data/motifdb/Mus_musculus.meme") -> list[PyDNAMotif]:
    motifs = read_motifs(path)
    for motif in motifs:
        motif.name = motif.id.split('+')[0]
    if unique:
        unique_motifs = {}
        for motif in motifs:
            name = motif.name
            if (
                    name not in unique_motifs or 
                    unique_motifs[name].info_content() < motif.info_content()
               ):
               unique_motifs[name] = motif
        motifs = list(unique_motifs.values())
    return motifs


In [None]:
motifs = snap.tl.motif_enrichment(
    motifs=cis_bp_mouse(unique=True,path="data/motifdb/Mus_musculus.meme"),
    regions=marker_peaks,
    genome_fasta=snap.genome.GRCm39,
)


In [None]:
motifs['MC'].write_csv(f"output/motif/{rergion}_{celltype}_MC_cisbp_motif.csv")

In [None]:
fig = snap.pl.motif_enrichment(motifs, max_fdr=0.0001, height=4000, interactive=False)
with open(f"output/motif/{base_name}_MC_cisbp_motif.png", 'wb') as f:
    f.write(fig.data)  # Assuming 'im.data' contains the image data


In [None]:
motifs = snap.tl.motif_enrichment(
    motifs=cis_bp_mouse(unique=True,path="data/motifdb/uniprobe_mouse.meme"),
    regions=marker_peaks,
    genome_fasta=snap.genome.GRCm39,
)


In [None]:
fig = snap.pl.motif_enrichment(motifs, max_fdr=0.0001, height=4000, interactive=False)
with open(f"output/motif/{base_name}_MC_uniprod_motif.png", 'wb') as f:
    f.write(fig.data)  # Assuming 'im.data' contains the image data


In [None]:
#motifs['MC'].write_csv("output/motif/AMY_neuron_MC_motif_uniprot.csv")
motifs['MC'].write_csv(f"output/motif/{rergion}_{celltype}_MC_motif_uniprot.csv")