## Mannens et al., bioRxiv (2023)

#### https://github.com/linnarsson-lab/fetal_brain_multiomics?tab=readme-ov-file
#### https://github.com/linnarsson-lab/fetal_brain_multiomics/blob/main/files/supplementals/Extended_data_2_Cluster_annotation.xlsx

In [None]:
import anndata
from pybedtools import BedTool
import pandas as pd
import numpy as np

In [None]:
peak = anndata.read_loom("Pool_peaks.agg.loom")
peak

In [None]:
clust_annotation = pd.read_excel("Extended_data_2_Cluster_annotation.xlsx")
clust_annotation = clust_annotation.iloc[:, 1:]

In [None]:
clust_annotation

In [None]:
Telencephalon_clust = clust_annotation.loc[clust_annotation.MostCommonRegion == "Telencephalon"]
Telencephalon_clust

In [None]:
cerebrum_peaks = peak[peak.obs['ClusterName'].isin(Telencephalon_clust['Cluster name'])]
cerebrum_peaks.obs['NPeaks']

In [None]:
cerebrum_peaks_mat = cerebrum_peaks.layers['binary'].todense() # 1: exist 0: non-exist
clust_peak_idx = list(np.where(cerebrum_peaks_mat[0]==1)[1])
cerebrum_peaks.var.iloc[(clust_peak_idx)]

In [None]:
cerebrum_peak_beds = pd.DataFrame()

for i in range(len(cerebrum_peaks_mat)):
    c = Telencephalon_clust.iloc[i]['Cluster name']
    clust_peak_idx = list(np.where(cerebrum_peaks_mat[i]==1)[1])
    clust_peak = pd.DataFrame(cerebrum_peaks.var).iloc[clust_peak_idx]
    clust_peak_bed = clust_peak.loc[:, ['Chr','Start','End']]
    clust_peak_bed['Cluster name'] = c
    cerebrum_peak_beds = pd.concat([cerebrum_peak_beds, clust_peak_bed])
    

In [None]:
cerebrum_peak_beds

In [None]:
clust_annot = Telencephalon_clust.loc[:,["Cluster name","Full Name"]]
clust_annot['Cluster'] = ['MGE','LGE.DGE','GABA','GluN1','GluN2','Neuroblast','Neuroblast','GluN3','RG','vRG','dRG','RG']
clust_annot

In [None]:
cerebrum_peak_beds = pd.merge(cerebrum_peak_beds, clust_annot, on="Cluster name")

In [None]:
celltypes = cerebrum_peak_beds['Cluster'].unique().tolist()

for c in celltypes:
    c_peak_beds = cerebrum_peak_beds.loc[cerebrum_peak_beds['Cluster']==c]
    c_peak_beds = c_peak_beds.loc[:, ["Chr","Start","End"]]
    c_peak_beds = BedTool.from_dataframe(c_peak_beds).sort().to_dataframe()
    
    c_peak_beds.to_csv("Mannens2023/Mannens_{}_regulatory_elements.sorted.bed".format(c), sep="\t", index=False, header=False)