In [None]:
!pip install scanpy==1.9.1
!pip install matplotlib==3.6
!pip install --upgrade tensorflow
!pip install numpy pandas sklearn matplotlib
!git clone https://github.com/datapplab/AutoClass

In [None]:
import os
from pathlib import Path
import scanpy as sc
import time
import sys
from AutoClass.AutoClass.AutoClass import AutoClassImpute, take_norm
import pandas as pd


# When using colab, set the path to the modules directory to use saved modules
sys.path.append('/content/drive/MyDrive/modules/')
from datasets_dict import datasets

# path to the original dataset (after subset to 3000 highly variable genes)
base_path = '/content/drive/MyDrive/Colab Notebooks/integrationDatasets/'
execution_times = {}


# for dataset_name in datasets.keys():
for dataset_name in ['small_atac_windows']:

  # get dataset parameters
  label_key = datasets[dataset_name]['label_key']
  batch_key = datasets[dataset_name]['batch_key']

  # set paths
  inPath = os.path.join(base_path, f"{dataset_name}_hvg.h5ad")
  outPath = os.path.join(base_path, 'integratedDatasets', 'AutoClass')
  
  # create directory if does not exists
  Path(outPath).mkdir(parents=True, exist_ok=True)

  # read the original dataset
  adata = sc.read(inPath)
  cell_type_labels = adata.obs[label_key]

  # integrate the dataset
  start_time = time.time()
  res = AutoClassImpute(adata.X,cellwise_norm=False,log1p=False,
                        truelabel=cell_type_labels)
  adata.X = res['imp']
  end_time = time.time()

  # save integration duration time
  elapsed_time = end_time - start_time
  minutes, seconds = divmod(elapsed_time, 60)
  execution_times[dataset_name] = elapsed_time

  print("Integrated: ", dataset_name)
  print(f"Duration: {minutes} minutes and {seconds} seconds")
  
  # write integrated data
  sc.write(os.path.join(outPath, f"{dataset_name}_integrated.h5ad"), adata)
  print("Integrated data saved")

# write execution times data
df = pd.DataFrame(list(execution_times.items()), columns=['Dataset', 'Execution Time'])
df.to_csv(os.path.join(outPath, 'execution_times.csv'), index=False)
