In [None]:
import os
from pathlib import Path
import pandas as pd
from dotenv import load_dotenv
import scanpy as sc
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
load_dotenv()

DATA_PATH = Path(os.getenv("OUTPUT_PATH"))/'garcia_ATAC'

In [None]:
import pickle
cistopic_obj = pickle.load(open(DATA_PATH / 'atac_preprocessing_combined/cistopic_obj.pkl', "rb"))

In [None]:
has_celltype = cistopic_obj.cell_data[~cistopic_obj.cell_data.celltype.isna()].index.tolist()
cistopic_obj = cistopic_obj.subset(cells=has_celltype, copy = True)

In [None]:
is_pre_oocyte = cistopic_obj.cell_data[cistopic_obj.cell_data.celltype != 'pre_oocyte'].index.tolist()
cistopic_obj = cistopic_obj.subset(cells=is_pre_oocyte, copy = True)

In [None]:
print(cistopic_obj.fragment_matrix.shape)

In [None]:
print(cistopic_obj.cell_data["sample"].unique())

In [None]:
cistopic_obj.cell_data['celltype_2'] = np.where(cistopic_obj.cell_data['sample'].str.startswith('24047'), "our_meiotic_cells_", "atlas_") + cistopic_obj.cell_data['celltype']

In [None]:
cistopic_obj.cell_data['celltype_2'].unique()

In [None]:
import anndata as ad

# Create AnnData objects
adata = ad.AnnData(X=cistopic_obj.fragment_matrix.T,
            obs=pd.DataFrame(index=cistopic_obj.cell_names), 
            var=pd.DataFrame(index=cistopic_obj.region_names))

In [None]:
adata.obs['celltype'] = cistopic_obj.cell_data['celltype_2']

In [None]:
adata.write_h5ad(DATA_PATH / "temp_adata.h5ad")

In [None]:
adata = sc.read_h5ad(DATA_PATH / "temp_adata.h5ad")

In [None]:
print(adata)

In [None]:
aggregate_func = "sum"
pseudobulk = sc.get.aggregate(adata, by=["celltype"], func=aggregate_func)


In [None]:
pseudobulk.obs.index

In [None]:
#Convert AnnData objects to pandas DataFrames for correlation analysis
corr_df = pd.DataFrame(pseudobulk.layers[aggregate_func], index=pseudobulk.obs.index, columns=pseudobulk.var_names)


In [None]:
corr_df

In [None]:
#Compute the Spearman correlation matrix for the combined dataframe
correlation_matrix_combined = corr_df.T.corr(method='spearman')

In [None]:
correlation_matrix_combined


In [None]:
# Step 6: Plot a heatmap of the combined correlation matrix
plt.figure(figsize=(12, 10))
sns.heatmap(
	correlation_matrix_combined,
	annot=True,
	fmt=".2f",
	cmap="coolwarm",
	cbar_kws={'label': 'Spearman Correlation'},
	xticklabels=correlation_matrix_combined.columns,
	yticklabels=correlation_matrix_combined.index
)
plt.title("Spearman Correlation Among Cell Types (Both Samples)")
plt.xlabel("Cell Types")
plt.ylabel("Cell Types")
plt.tight_layout()
plt.show()
