# LabTOP Training on Google Colab TPU

**IMPORTANT: Before running, change Runtime ‚Üí Change runtime type ‚Üí TPU v2**


## Step 1: Mount Google Drive & Setup Directories


In [None]:
from google.colab import drive
drive.mount('/content/drive')

!mkdir -p /content/drive/MyDrive/mimiciv/icu
!mkdir -p /content/drive/MyDrive/mimiciv/hosp


## Step 2: Download MIMIC-IV Data (Skip if already downloaded)


In [None]:
import getpass
import os
import shutil

print("Logging into PhysioNet")
user = "kiotov2"
password = "nWnpPiP8&QqsXnf"

# Create ~/.netrc for wget authentication
netrc_path = "/root/.netrc"
with open(netrc_path, "w") as f:
    f.write(f"machine physionet.org login {user} password {password}\n")
    f.write(f"machine content.physionet.org login {user} password {password}\n")
os.chmod(netrc_path, 0o600)
print("‚úÖ Authentication configured.")

# Define all files to download
icu_files = {
    "icustays.csv.gz":       "https://physionet.org/files/mimiciv/2.2/icu/icustays.csv.gz",
    "inputevents.csv.gz":    "https://physionet.org/files/mimiciv/2.2/icu/inputevents.csv.gz",
    "procedureevents.csv.gz":"https://physionet.org/files/mimiciv/2.2/icu/procedureevents.csv.gz",
    "outputevents.csv.gz":   "https://physionet.org/files/mimiciv/2.2/icu/outputevents.csv.gz",
    "d_items.csv.gz":        "https://physionet.org/files/mimiciv/2.2/icu/d_items.csv.gz",
}

hosp_files = {
    "admissions.csv.gz":     "https://physionet.org/files/mimiciv/2.2/hosp/admissions.csv.gz",
    "patients.csv.gz":       "https://physionet.org/files/mimiciv/2.2/hosp/patients.csv.gz",
    "labevents.csv.gz":      "https://physionet.org/files/mimiciv/2.2/hosp/labevents.csv.gz",
    "d_labitems.csv.gz":     "https://physionet.org/files/mimiciv/2.2/hosp/d_labitems.csv.gz",
}

# Local download dirs (fast)
local_icu = "/content/mimiciv/icu"
local_hosp = "/content/mimiciv/hosp"
os.makedirs(local_icu, exist_ok=True)
os.makedirs(local_hosp, exist_ok=True)
print("üìÅ Local download folders prepared.")

# Download helper
def download_files(file_dict, out_dir):
    for name, url in file_dict.items():
        print(f"\n‚¨áÔ∏è Downloading {name} ...")
        cmd = f"wget --progress=bar:force -c -O {out_dir}/{name} {url}"
        os.system(cmd)
        print(f"   ‚úî Completed: {name}")

print("\n=== üè• Downloading ICU files ===")
download_files(icu_files, local_icu)

print("\n=== üß¨ Downloading HOSP files ===")
download_files(hosp_files, local_hosp)

print("\nüéâ All downloads completed successfully!")

# Move to Google Drive
drive_root = "/content/drive/MyDrive/mimiciv"
drive_icu  = f"{drive_root}/icu"
drive_hosp = f"{drive_root}/hosp"
os.makedirs(drive_icu, exist_ok=True)
os.makedirs(drive_hosp, exist_ok=True)

print("\nüì¶ Copying results to Google Drive... (this may take 1‚Äì3 minutes)")
shutil.copytree(local_icu, drive_icu, dirs_exist_ok=True)
shutil.copytree(local_hosp, drive_hosp, dirs_exist_ok=True)

print("\n‚úÖ Files copied to Google Drive at:")
print("   /content/drive/MyDrive/mimiciv/icu")
print("   /content/drive/MyDrive/mimiciv/hosp")


## Step 3: Clone Repository


In [None]:
%cd /content
!rm -rf labtop-reproduction
!git clone https://github.com/kiotov2/labtop-reproduction.git
%cd labtop-reproduction


## Step 4: Install PyTorch XLA for TPU Support


In [None]:
# Install PyTorch XLA for TPU support
!pip install torch~=2.5.0 torch_xla[tpu]~=2.5.0 -f https://storage.googleapis.com/libtpu-releases/index.html

# Install other dependencies
!pip install accelerate transformers hydra-core omegaconf pandas numpy scipy scikit-learn tqdm datasets tokenizers safetensors huggingface-hub


## Step 5: Verify TPU is Available


In [None]:
import torch
import torch_xla
import torch_xla.core.xla_model as xm

# Check TPU availability
device = xm.xla_device()
print(f"TPU Device: {device}")
print(f"Number of TPU cores: {xm.xrt_world_size()}")

# Test tensor on TPU
test_tensor = torch.randn(3, 3).to(device)
print(f"Test tensor created on TPU: {test_tensor.device}")
print("‚úÖ TPU is ready!")


## Step 6: Slice MIMIC-IV Data


In [None]:
%cd /content/labtop-reproduction

!python scripts/slice_mimic.py \
    --source /content/drive/MyDrive/mimiciv \
    --dest ./data_small \
    --n_stays 200


## Step 7: Create TPU-Optimized Configs


In [None]:
import os
os.makedirs("labtop/src/config/data", exist_ok=True)
os.makedirs("labtop/src/config/train", exist_ok=True)

# Data config
with open("labtop/src/config/data/mimiciv_small.yaml", "w") as f:
    f.write("""defaults:
  - mimiciv

raw_data_path: /content/labtop-reproduction/data_small
min_los: 1
debug_table_sample_ratio: 1.0
""")

# Train config - TPU optimized
# TPUs work best with batch sizes divisible by 8 (per core)
# With 8 TPU cores, effective batch size = batch_size * gradient_accumulation_steps * 8
with open("labtop/src/config/train/train_small_tpu.yaml", "w") as f:
    f.write("""defaults:
  - train_base

epochs: 2
batch_size: 8        # Per-core batch size (8 cores * 8 = 64 total)
gradient_accumulation_steps: 4  # Effective batch size = 256
use_wandb: false
patience: 1
max_seq_len: 512
lr: 1e-4
""")

print("‚úÖ Configs created for TPU training")
print("   - Batch size: 8 per core")
print("   - Gradient accumulation: 4 steps")
print("   - Effective batch size: ~256 (with 8 TPU cores)")


## Step 8: Preprocess Data


In [None]:
%cd /content/labtop-reproduction/labtop

!python src/scripts/preprocess.py \
    data=mimiciv_small \
    max_seq_len=512


## Step 9: Train on TPU üöÄ


In [None]:
%cd /content/labtop-reproduction/labtop

# Train with TPU - Accelerate will automatically detect and use TPU
!python src/scripts/train.py \
    data=mimiciv_small \
    train=train_small_tpu \
    max_seq_len=512


## Step 10: Evaluate


In [None]:
%cd /content/labtop-reproduction/labtop

!python src/scripts/evaluate.py \
    data=mimiciv_small \
    train=train_small_tpu \
    max_seq_len=512


## Monitor TPU Utilization (Optional)


In [None]:
import torch_xla.debug.metrics as met

# Print TPU metrics
print(met.metrics_report())
