In [None]:
import os

# 1) Define your token, repo URL and local directory name
GIT_TOKEN = ""
REPO_URL  = f"https://{GIT_TOKEN}@github.com/semilleroCV/BreastCATT.git"
REPO_DIR  = "BreastCATT"

# 2) Clone or pull depending on whether the folder exists
if not os.path.isdir(REPO_DIR):
    print(f"Cloning repository into ./{REPO_DIR}…")
    os.system(f"git clone {REPO_URL}")
else:
    print(f"Directory '{REPO_DIR}' already exists. Pulling latest changes…")
    # If your origin remote wasn’t set with the token, you could uncomment:
    # os.system(f"git -C {REPO_DIR} remote set-url origin {REPO_URL}")
    os.system(f"git -C {REPO_DIR} pull")

# 3) Change into the repo directory
os.chdir(REPO_DIR)
print("Current working directory:", os.getcwd())

In [59]:
!git fetch origin
!git checkout feat/medclip-probe
!git pull origin feat/medclip-probe

remote: Enumerating objects: 5, done.[K
remote: Counting objects: 100% (5/5), done.[K
remote: Compressing objects: 100% (1/1), done.[K
remote: Total 3 (delta 2), reused 3 (delta 2), pack-reused 0 (from 0)[K
Unpacking objects: 100% (3/3), 335 bytes | 167.00 KiB/s, done.
From https://github.com/semilleroCV/BreastCATT
   8fdbaf2..52c7f61  feat/medclip-probe -> origin/feat/medclip-probe
Already on 'feat/medclip-probe'
Your branch is behind 'origin/feat/medclip-probe' by 1 commit, and can be fast-forwarded.
  (use "git pull" to update your local branch)
From https://github.com/semilleroCV/BreastCATT
 * branch            feat/medclip-probe -> FETCH_HEAD
Updating 8fdbaf2..52c7f61
Fast-forward
 train.py | 2 [32m+[m[31m-[m
 1 file changed, 1 insertion(+), 1 deletion(-)


In [None]:
# every time you do a factory reset you have to run this
!pip install -r requirements.txt -q

In [None]:
# log in hugging face hug to load data and models
from huggingface_hub import login

login(token="", add_to_git_credential=True)

In [None]:
import os
os.environ["WANDB_ENTITY"] = "ai-uis" 

# log in wand to track the experiments
import wandb

wandb.login(key="", relogin=True)

In [None]:
from accelerate.utils import write_basic_config

# Configure accelerate
write_basic_config(mixed_precision="fp16")
!cat /root/.cache/huggingface/accelerate/default_config.yaml

In [None]:
! accelerate test

## Running training script

In [None]:
from huggingface_hub import hf_hub_download
import os

# Local directory where you want the checkpoint saved
save_path = "/kaggle/working/BreastCATT/checkpoints/segmentation"
os.makedirs(save_path, exist_ok=True)

# Download the file
checkpoint_path = hf_hub_download(
    repo_id="SemilleroCV/transunet-breast-cancer",
    filename="lucky-sweep-6_0.4937.pth",
    local_dir=save_path
)

### Configure sh file

In [63]:
# Define the script content
script_content = """
#!/bin/bash
set -e  # exit on error

########################################
# 1. Basic Configuration
########################################

VIT_VERSION="small"               # Options: small, base, large, huge
DATASET_NAME="SemilleroCV/DMR-IR"

NUM_TRAIN_EPOCHS=30
TRAIN_BATCH_SIZE=32
EVAL_BATCH_SIZE=32
GRADIENT_ACCUMULATION_STEPS=1

LEARNING_RATE=1e-4
WEIGHT_DECAY=1e-4
CHECKPOINTING_STEPS=1000

WITH_TRACKING="--with_tracking"
REPORT_TO="wandb"
WANDB_PROJECT="colcaci"
SEED=3407

########################################
# 2. Derived Variables Calculation
########################################

# Total number of training samples
TOTAL_TRAIN_SAMPLES=5671

# Effective batch size considering gradient accumulation
EFFECTIVE_BATCH_SIZE=$(( TRAIN_BATCH_SIZE * GRADIENT_ACCUMULATION_STEPS ))

# Total training steps
MAX_TRAIN_STEPS=$(( TOTAL_TRAIN_SAMPLES * NUM_TRAIN_EPOCHS / EFFECTIVE_BATCH_SIZE ))

# Warm‑up steps = 5% of total training steps
NUM_WARMUP_STEPS=$(( MAX_TRAIN_STEPS * 5 / 100 ))

########################################
# 3. Dynamic Flags (Segmentation / Cross-Attention)
########################################

USE_SEGMENTATION=true
USE_CROSS_ATTN=true

SEG_FLAG=""
CROSS_ATTN_FLAG=""
SCRIPT_TAGS=""

if [ "$USE_SEGMENTATION" = true ]; then
  SEG_FLAG="--use_segmentation"
  SCRIPT_TAGS="${SCRIPT_TAGS}_seg"
fi

if [ "$USE_CROSS_ATTN" = true ]; then
  CROSS_ATTN_FLAG="--use_cross_attn"
  SCRIPT_TAGS="${SCRIPT_TAGS}_text"
fi

########################################
# 4. Directories and Script Naming
########################################

OUTPUT_DIR="fvit-${VIT_VERSION}${SCRIPT_TAGS}"
NEW_SCRIPT_NAME="${OUTPUT_DIR}_1x${EFFECTIVE_BATCH_SIZE}-${MAX_TRAIN_STEPS}.sh"

PUSH_TO_HUB="--push_to_hub"       # Use "--push_to_hub" if you want to push the model
HUB_MODEL_ID=""                   # e.g. "SemilleroCV/${OUTPUT_DIR}"
HUB_TOKEN=""                      # your Hugging Face token

########################################
# 5. Training Command
########################################

accelerate launch train.py \
    --vit_version "$VIT_VERSION" \
    --dataset_name "$DATASET_NAME" \
    --output_dir "$OUTPUT_DIR" \
    --num_train_epochs $NUM_TRAIN_EPOCHS \
    --per_device_train_batch_size $TRAIN_BATCH_SIZE \
    --per_device_eval_batch_size $EVAL_BATCH_SIZE \
    --checkpointing_steps $CHECKPOINTING_STEPS \
    --learning_rate $LEARNING_RATE \
    --weight_decay $WEIGHT_DECAY \
    --gradient_accumulation_steps $GRADIENT_ACCUMULATION_STEPS \
    --num_warmup_steps $NUM_WARMUP_STEPS \
    $WITH_TRACKING \
    --report_to $REPORT_TO \
    --wandb_project "$WANDB_PROJECT" \
    --seed $SEED \
    $SEG_FLAG \
    $CROSS_ATTN_FLAG \
    $PUSH_TO_HUB \
    ${HUB_MODEL_ID:+--hub_model_id "$HUB_MODEL_ID"} \
    ${HUB_TOKEN:+--hub_token "$HUB_TOKEN"}

########################################
# 6. Rename Script
########################################

mkdir -p scripts
mv "$0" "scripts/$NEW_SCRIPT_NAME"
"""

# Create the folder if it doesn't exist
!mkdir -p scripts

# Write the script to a file inside the scripts folder
with open('scripts/script.sh', 'w') as f:
    f.write(script_content)

# Make the script executable
!chmod +x scripts/script.sh

# List files to confirm it's created
!ls scripts/

fvit-base_1x8-epochs1.sh     fvit-small_seg_text_1x16-epochs2.sh
fvit-large_1x16-epochs30.sh  fvit-small_seg_text_1x8-epochs2.sh
fvit-small_1x16-epochs30.sh  fvit-small_text_1x32-epochs60.sh
fvit-small_1x8-epochs2.sh    script.sh
fvit-small_1x8-epochs60.sh   vit-base_base_1x8-epochs10.sh


- fixed zero_division error
- partial fixed ValueError: Default process group has not been initialized, please make sure to call init_process_group, commenting out the line of code where the error occurred

to-do
- test resume_from_checkpoint

In [61]:
! rm -rf /kaggle/working/BreastCATT/fvit-small_text

In [None]:
!sh /kaggle/working/BreastCATT/scripts/script.sh

2025-04-19 01:03:23.564996: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745024603.591838    3393 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745024603.600212    3393 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-19 01:03:23.894316: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745024603.919735    3392 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745024603.927291    3392 cuda_blas.cc:1