In [None]:
import os
from pathlib import Path
import pandas as pd
from dotenv import load_dotenv
import scanpy as sc
import anndata
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
load_dotenv()

DATA_PATH = Path('/mnt/windows/extradata/')
DATA_PATH_2 = Path(os.getenv("DATA_PATH")) / "garcia_ATAC"

In [None]:
meiotic_cells = sc.read_h5ad(DATA_PATH / 'meiotic_cells/meiotic_merged/preprocessed/merged_preprocessed_data.h5ad')
meiotic_cell_types = pd.read_csv(DATA_PATH / 'meiotic_cells/meiotic_merged/preprocessed/cell_data.csv', index_col=0)
meiotic_cells.obs['celltype'] = meiotic_cell_types['celltype']

In [None]:
garcia_cells = sc.read_h5ad(DATA_PATH_2 / 'combined_samples.h5ad')
garcia_cells.obs_names_make_unique()


In [None]:
common_genes = meiotic_cells.var_names.intersection(garcia_cells.var_names)
garcia_cells = garcia_cells[:, common_genes]
meiotic_cells = meiotic_cells[:, common_genes]

In [None]:
sc.pp.normalize_total(garcia_cells, target_sum=1e4)
sc.pp.log1p(garcia_cells)

In [None]:
garcia_cells.obs["dataset"] = "garcia_RNA"
meiotic_cells.obs["dataset"] = "our_meiotic_cells"
combined = anndata.concat([garcia_cells, meiotic_cells])

In [None]:
sc.pp.neighbors(combined)
sc.tl.umap(combined)

In [None]:
# Plot
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
sc.pl.umap(combined, color="dataset", ax=ax1, show=False, title='Datasets')
sc.pl.umap(combined, color="celltype", ax=ax2, show=False, title='Cell Types')
plt.tight_layout()
plt.savefig('/mnt/storage/outputs/garcia_ATAC/outputs/rna_joint_UMAP.png', format='png')
plt.show()