In [None]:
import scanpy as sc
import numpy as np
import os
import pandas as pd
from scipy.sparse import csr_matrix
from scipy.stats import entropy, itemfreq
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture as GMM
from sklearn.metrics import adjusted_rand_score as ARI
from sklearn.metrics import normalized_mutual_info_score as NMI
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import LabelEncoder

In [None]:
DATASETS = {
    "pancreas": {"name": "pancreas", "batch_key": "study", "cell_type_key": "cell_type"},
    "brain": {"name": "mouse_brain", "batch_key": "study", "cell_type_key": "cell_type"}
}

In [None]:
sc.settings.autosave = True

# Harmony

In [None]:
for data in ["brain", "pancreas"]:
    data_dict = DATASETS[data]
    data_name = data_dict['name']
    batch_key = data_dict['batch_key']
    cell_type_key = data_dict['cell_type_key']

    adata = sc.read(f"./data/{data_name}_normalized.h5ad")

    adata.obs['cell_types'] = adata.obs[cell_type_key]
    
    os.makedirs(f"./results/Harmony/{data_name}/", exist_ok=True)

    sc.tl.pca(adata, svd_solver="arpack", n_comps=20)
    
    os.makedirs(f"./data/{data_name}/", exist_ok=True) 
    
    pd.DataFrame(adata.obsm['X_pca']).to_csv(f'./data/{data_name}/X.csv')
    pd.DataFrame(adata.obs[batch_key]).to_csv(f'./data/{data_name}/Batch.csv')
    adata.obs.to_csv(f'./data/{data_name}/Obs.csv')
            
            
            


### Now run the harmony-FI.R script, then continue

<br>

In [None]:
for data in ["brain", "pancreas"]:
    data_dict = DATASETS[data]
    data_name = data_dict['name']
    batch_key = data_dict['batch_key']
    cell_type_key = data_dict['cell_type_key']

    adata = sc.read(f"./data/{data_name}_normalized.h5ad")

    adata.obs['cell_types'] = adata.obs[cell_type_key]
    

    obs = pd.read_csv(f'./data/{data_name}/Obs.csv', index_col=0)
    x = pd.read_csv(f'./results/Harmony/{data_name}/harmonyCorrected.csv', index_col=0)
    x.index = obs.index
            
    final_adata = sc.AnnData(X=x, obs=obs)


    print(f"after")
    sc.pp.neighbors(final_adata)
    sc.tl.umap(final_adata)
    sc.settings.figdir = f"./results/Harmony/{data_name}/after"
    sc.pl.umap(final_adata, color=[batch_key, cell_type_key], wspace=.5)
    final_adata.write(f"./results/Harmony/{data_name}/result_adata.h5ad")