In [None]:
import os
from pathlib import Path
import pandas as pd
from dotenv import load_dotenv
import scanpy as sc
import anndata


In [None]:
import h5py

# Open the file and list all available keys/datasets
with h5py.File("/mnt/windows/extradata/meiotic_cells_2/scplus_pipeline/Snakemake/ctx_results.hdf5", 'r') as f:
    print("Available keys in the HDF5 file:")
    print(list(f.keys()))

In [None]:
with pd.HDFStore("/mnt/windows/extradata/meiotic_cells_2/scplus_pipeline/Snakemake/ctx_results.hdf5") as store:
    keys = store.keys()
print(len(keys))
print(keys)

In [None]:
store = pd.HDFStore("/mnt/windows/extradata/meiotic_cells_2/scplus_pipeline/Snakemake/dem_results.hdf5")

In [None]:
store.keys()

In [None]:
dars_oogonia_STRA8_regions = store.select("DARs_cell_type_oogonia_STRA8_vs_all/regions_to_db")
dars_oogonia_STRA8_regions

In [None]:
dars_oogonia_STRA8 = store.select("DARs_cell_type_oogonia_STRA8_vs_all/motif_enrichment")

In [None]:
dars_oogonia_meiotic = store.select("DARs_cell_type_oogonia_meiotic_vs_all/motif_enrichment")

In [None]:
dars_combined = pd.concat([dars_oogonia_STRA8, dars_oogonia_meiotic])

In [None]:
dars_combined

In [None]:
dars_combined.sort_values(by="Log2FC", ascending=False, inplace=True)
dars_combined.head(20)

In [None]:
import numpy as np
dars_combined['TF'] = np.where(dars_combined['Direct_annot'].notna(), dars_combined['Direct_annot'], dars_combined['Orthology_annot'])
#dars_combined['TF'] = dars_combined['TF'].str.split(',').str[0]
dars_combined_avg = dars_combined.groupby(dars_combined.index).agg({
    'Log2FC': 'mean',
    'TF': 'first',
    'Logo': 'first'
}).reset_index()
dars_combined_avg.set_index('motifs', inplace=True)
dars_combined_avg.sort_values(by="Log2FC", ascending=False, inplace=True)
dars_combined_avg_top_20 = dars_combined_avg.head(20)

In [None]:
dars_combined_avg_top_20

In [None]:
import matplotlib.pyplot as plt

dars_combined_avg_top_20['TF'] = dars_combined_avg_top_20['TF'].str.split(',').str[0]

plt.figure(figsize=(12, 6))
plt.bar(dars_combined_avg_top_20['TF'], dars_combined_avg_top_20['Log2FC'])
plt.xticks(rotation=90)
plt.ylabel('Log2FC')
plt.xlabel('Transcription Factors')
plt.title('Transcription Factor Enrichment in Meiotic Cells DARs')
plt.tight_layout()
plt.savefig('/mnt/storage/outputs/garcia_ATAC/outputs/top_motifs_meiotic_DARs.svg', format='svg')
plt.show()


In [None]:
dars_combined_avg.to_csv('/mnt/storage/outputs/garcia_ATAC/outputs/top_motifs_meiotic_DARs_with_logos.csv', sep = ",")

In [None]:
test = pd.read_csv('/mnt/storage/outputs/garcia_ATAC/top_motifs_meiotic_DARs.csv', sep = ",")
test

In [None]:
test.Logo[0]