# Run ECLARE on sample paired data

Trigger src/eclare/\_\_init\_\_.py to set environment variables

In [20]:
#!source config/export_env_variables.sh config/config.yaml
import os
# Ensure that the environment variables set by the script are accessible in this notebook
import subprocess

# Run the export_env_variables.sh script and capture the output
result = subprocess.run(['bash', '-c', 'config/export_env_variables.sh config/config.yaml'], capture_output=True, text=True)

# Parse the output and set the environment variables in the current Python environment
for line in result.stdout.splitlines()[2:]:
    key, value = line.split('=', 1)
    os.environ[key] = value

# Verify that the environment variables are set
print("ECLARE_ROOT:", os.environ.get("ECLARE_ROOT"))
print("OUTPATH:", os.environ.get("OUTPATH"))
print("DATAPATH:", os.environ.get("DATAPATH"))


ECLARE_ROOT: /home/mcb/users/dmannk/scMultiCLIP/ECLARE
OUTPATH: /home/mcb/users/dmannk/scMultiCLIP/outputs
DATAPATH: /home/mcb/users/dmannk/scMultiCLIP/data


In [21]:
# Go to DATAPATH
os.chdir(os.environ["DATAPATH"])
DATAPATH_TMP = os.environ["DATAPATH"]

Download sample data from Zenodo (only run once)

In [None]:
'''
# Download the data from the DOI link
!wget https://zenodo.org/records/14799100/files/eclare_sample_zenodo.zip?download=1 -O eclare_data.zip

# Unzip the downloaded data
!unzip eclare_data.zip -d eclare_data
!unzip eclare_data/eclare_sample_zenodo.zip  # takes about 15 minutes @ 5.67 Mb/s
'''

Overwrite the DATAPATH environment variable to the path of the downloaded data


In [22]:
os.environ["DATAPATH"] = os.path.join(DATAPATH_TMP, "eclare_data", "eclare_sample_zenodo")
# generally, os.environ["DATAPATH"] = os.path.join("/path/to/sample/data", "eclare_sample_zenodo")

print("DATAPATH: ", os.environ["DATAPATH"])

DATAPATH:  /home/mcb/users/dmannk/scMultiCLIP/data/eclare_data/eclare_sample_zenodo


### Step 1: train CLIP teacher models

In [4]:
# Got to ECLARE_ROOT
os.chdir(os.environ["ECLARE_ROOT"])

In [None]:
# Run clip_run.py
os.environ['N_EPOCHS'] = '1'
!${ECLARE_ROOT}/scripts/clip_scripts/clip_samples.sh $N_EPOCHS

### Step 2: perform multi-teacher distillation (ECLARE)

In [8]:
# Got to ECLARE_ROOT (in case not already there)
os.chdir(os.environ["ECLARE_ROOT"])

Identify the Job ID related to the CLIP teacher models. Should be shown in the first line output by clip_samples.sh, e.g.:<br>

Job ID: clip_03173230


Can also run code below to identify most common directory in OUTPATH:

In [23]:
# Get most recent directory in OUTPATH that starts with "clip_"
from glob import glob
clip_dirs = glob(os.path.join(os.environ["OUTPATH"], "clip_*"))
if clip_dirs:
    latest_clip_dir = max(clip_dirs, key=os.path.getmtime)
    clip_job_id = os.path.basename(latest_clip_dir)
    print(f"Most recent CLIP job directory, assigned to clip_job_id: {clip_job_id}")
else:
    print("No CLIP job directories found in OUTPATH")


Most recent CLIP job directory, assigned to clip_job_id: clip_03173230


In [27]:
# Run eclare_run.py
os.environ['N_EPOCHS'] = '1'
os.environ['CLIP_JOB_ID'] = clip_job_id.split('_')[1]  # only keep digits
!${ECLARE_ROOT}/scripts/eclare_scripts/eclare_samples.sh $N_EPOCHS $CLIP_JOB_ID

Job ID: eclare_04165045
Total epochs: 1
CLIP job ID: 03173230
=== Target dataset: PFC_Zhu ===
=== Random state: 17772 ===
  @numba.jit()
  @numba.jit()
  @numba.jit()
  @numba.jit()
CUDA available
Allocated CPUs: 1
Extracting data
model checkpoint
Number of peaks and genes remaining: 55284 peaks & 6816 genes
model checkpoint
/home/mcb/users/dmannk/scMultiCLIP/outputs/clip_03173230/PFC_Zhu/DLPFC_Ma/0/model.pt
model checkpoint
DLPFC_Ma
Number of peaks and genes remaining: 57263 peaks & 6920 genes
/home/mcb/users/dmannk/scMultiCLIP/outputs/clip_03173230/PFC_Zhu/DLPFC_Anderson/0/model.pt
model checkpoint
DLPFC_Anderson
Number of peaks and genes remaining: 50937 peaks & 7189 genes
Iterating over epochs, batches & datasets
Unable to initialize backend 'cuda': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
Unable to initialize backend 'tpu': INVALID_ARGUMENT: TpuPlatfo