In [None]:
!pip install git+https://github.com/reutd/ABC.git
!pip install matplotlib==3.6

In [None]:
from ABC import ABC
import os
from pathlib import Path
import scanpy as sc
import pandas as pd
import time
import sys


# When using colab, set the path to the modules directory to use saved modules
sys.path.append('/content/drive/MyDrive/modules/')
from datasets_dict import datasets

# path to the original dataset (after subset to 3000 highly variable genes)
base_path = '/content/drive/MyDrive/Colab Notebooks/integrationDatasets/'
execution_times = {}


# for dataset_name in datasets.keys():
for dataset_name in ['small_atac_windows']:

  # get dataset parameters
  label_key = datasets[dataset_name]['label_key']
  batch_key = datasets[dataset_name]['batch_key']
  ATAC = datasets[dataset_name]['ATAC']
  log_orig = datasets[dataset_name]['log']

  # set paths
  inPath = os.path.join(base_path, f"{dataset_name}_hvg.h5ad")
  outPath = os.path.join(base_path, 'integratedDatasets', 'ABC')

  # create directory if does not exists
  Path(outPath).mkdir(parents=True, exist_ok=True)

  # read the original dataset
  adata = sc.read(inPath)

  # integrate the dataset
  start_time = time.time()  
  model = ABC(adata, batch_key, label_key)
  integrated = model.batch_correction()
  end_time = time.time()

  # save integration duration time
  elapsed_time = end_time - start_time
  minutes, seconds = divmod(elapsed_time, 60)
  execution_times[dataset_name] = elapsed_time

  print("Integrated: ", dataset_name)
  print(f"Duration: {minutes} minutes and {seconds} seconds")

  # write integrated data
  sc.write(os.path.join(outPath, f"{dataset_name}_integrated.h5ad"), integrated)
  print("Integrated data saved")  


# write execution times data
df = pd.DataFrame(list(execution_times.items()), columns=['Dataset', 'Execution Time'])
df.to_csv(os.path.join(outPath, 'execution_times.csv'), index=False)



In [None]:
from google.colab import drive
drive.mount('/content/drive')