In [19]:
import os
import sys
sys.path.append(os.getcwd())
sys.path.append(os.getcwd() + '/../src')


In [20]:
import logging
import wandb
from concurrent.futures import ThreadPoolExecutor
import multiprocessing


In [21]:
# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(sys.stdout),
        logging.FileHandler('download.log')
    ]
)
logger = logging.getLogger(__name__)

# Set multiprocessing start method to 'spawn'
multiprocessing.set_start_method("spawn", force=True)

In [22]:
# Configuration
download_dir = "/scratch/downloaded_artifacts"
entity = "wlp9800-new-york-university"
project_name = "oho_exps"
group_name = "fixed_weight_seed-1_cf21e420f71a4529bed03b4c48fda84c"
max_download_workers = 20

In [23]:
# Ensure download directory exists
os.makedirs(download_dir, exist_ok=True)

In [24]:
# Function to download a single run's artifact
def download_artifact(run_data):
    run_id = run_data["id"]
    config = run_data["config"]
    try:
        api = wandb.Api()
        artifact = api.artifact(f'{entity}/{project_name}/logs_{run_id}:v0')
        artifact_dir = os.path.join(download_dir, artifact.name)
        artifact.download(root=artifact_dir)
        logger.info(f"Downloaded {artifact.name} to {artifact_dir}")
        return {
            "run_id": run_id,
            "artifact_dir": artifact_dir,
            "config": config,
            "status": "success"
        }
    except Exception as e:
        logger.error(f"Error downloading artifact for run {run_id}: {str(e)}")
        return {
            "run_id": run_id,
            "artifact_dir": None,
            "config": config,
            "status": f"error: {str(e)}"
        }

In [25]:
api = wandb.Api()
runs = api.runs(
    path=f"{entity}/{project_name}",
    filters={"group": group_name}
)

# Prepare run data
run_data = [{"id": run.id, "config": run.config} for run in runs]
logger.info(f"Found {len(run_data)} runs to download")


2025-04-20 15:04:03,424 - INFO - Found 40 runs to download


In [26]:
with ThreadPoolExecutor(max_workers=max_download_workers) as executor:
    download_results = list(executor.map(download_artifact, run_data))

[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:04,834 - INFO - Downloaded logs_u4nlwnuv:v0 to /scratch/downloaded_artifacts/logs_u4nlwnuv:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:06,128 - INFO - Downloaded logs_vi4afo9g:v0 to /scratch/downloaded_artifacts/logs_vi4afo9g:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:06,903 - INFO - Downloaded logs_b66fmkn2:v0 to /scratch/downloaded_artifacts/logs_b66fmkn2:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:06,926 - INFO - Downloaded logs_ovdris5a:v0 to /scratch/downloaded_artifacts/logs_ovdris5a:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:06,929 - INFO - Downloaded logs_ja4iixbt:v0 to /scratch/downloaded_artifacts/logs_ja4iixbt:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:06,934 - INFO - Downloaded logs_cllug4l5:v0 to /scratch/downloaded_artifacts/logs_cllug4l5:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:07,209 - INFO - Downloaded logs_ho3ybktm:v0 to /scratch/downloaded_artifacts/logs_ho3ybktm:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:07,244 - INFO - Downloaded logs_jxjrnh8v:v0 to /scratch/downloaded_artifacts/logs_jxjrnh8v:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:07,269 - INFO - Downloaded logs_76qsyq10:v0 to /scratch/downloaded_artifacts/logs_76qsyq10:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:07,324 - INFO - Downloaded logs_115k739i:v0 to /scratch/downloaded_artifacts/logs_115k739i:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:07,444 - INFO - Downloaded logs_4npcw0w2:v0 to /scratch/downloaded_artifacts/logs_4npcw0w2:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:07,451 - INFO - Downloaded logs_styjzmmg:v0 to /scratch/downloaded_artifacts/logs_styjzmmg:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:07,467 - INFO - Downloaded logs_hxdsp5qn:v0 to /scratch/downloaded_artifacts/logs_hxdsp5qn:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:07,488 - INFO - Downloaded logs_ggabyes2:v0 to /scratch/downloaded_artifacts/logs_ggabyes2:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:07,500 - INFO - Downloaded logs_tcizhmbz:v0 to /scratch/downloaded_artifacts/logs_tcizhmbz:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:07,507 - INFO - Downloaded logs_ysgqllrd:v0 to /scratch/downloaded_artifacts/logs_ysgqllrd:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:07,554 - INFO - Downloaded logs_it5bydoi:v0 to /scratch/downloaded_artifacts/logs_it5bydoi:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:07,570 - INFO - Downloaded logs_697to2he:v0 to /scratch/downloaded_artifacts/logs_697to2he:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:07,587 - INFO - Downloaded logs_ro6yvvi7:v0 to /scratch/downloaded_artifacts/logs_ro6yvvi7:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:07,610 - INFO - Downloaded logs_g8tlxpq8:v0 to /scratch/downloaded_artifacts/logs_g8tlxpq8:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:07,616 - INFO - Downloaded logs_mzh2tsx3:v0 to /scratch/downloaded_artifacts/logs_mzh2tsx3:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:07,636 - INFO - Downloaded logs_mnyj1qrz:v0 to /scratch/downloaded_artifacts/logs_mnyj1qrz:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:08,113 - INFO - Downloaded logs_boxul4lu:v0 to /scratch/downloaded_artifacts/logs_boxul4lu:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:08,165 - INFO - Downloaded logs_m33zrght:v0 to /scratch/downloaded_artifacts/logs_m33zrght:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:08,185 - INFO - Downloaded logs_1q1yzror:v0 to /scratch/downloaded_artifacts/logs_1q1yzror:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:08,403 - INFO - Downloaded logs_2l9uzx6i:v0 to /scratch/downloaded_artifacts/logs_2l9uzx6i:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:08,615 - INFO - Downloaded logs_0uxy4ev2:v0 to /scratch/downloaded_artifacts/logs_0uxy4ev2:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:08,658 - INFO - Downloaded logs_pgp8nvgi:v0 to /scratch/downloaded_artifacts/logs_pgp8nvgi:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:08,679 - INFO - Downloaded logs_vrjqozfn:v0 to /scratch/downloaded_artifacts/logs_vrjqozfn:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:08,750 - INFO - Downloaded logs_1ec5uo3u:v0 to /scratch/downloaded_artifacts/logs_1ec5uo3u:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:08,842 - INFO - Downloaded logs_oolhxgv0:v0 to /scratch/downloaded_artifacts/logs_oolhxgv0:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:08,885 - INFO - Downloaded logs_uz8smreq:v0 to /scratch/downloaded_artifacts/logs_uz8smreq:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:08,889 - INFO - Downloaded logs_61qbqcxz:v0 to /scratch/downloaded_artifacts/logs_61qbqcxz:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:08,951 - INFO - Downloaded logs_9zi7jc0t:v0 to /scratch/downloaded_artifacts/logs_9zi7jc0t:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:08,955 - INFO - Downloaded logs_cp53gcwe:v0 to /scratch/downloaded_artifacts/logs_cp53gcwe:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:08,974 - INFO - Downloaded logs_zul4qzka:v0 to /scratch/downloaded_artifacts/logs_zul4qzka:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:09,002 - INFO - Downloaded logs_qtxajn6q:v0 to /scratch/downloaded_artifacts/logs_qtxajn6q:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:09,009 - INFO - Downloaded logs_lgc3gwab:v0 to /scratch/downloaded_artifacts/logs_lgc3gwab:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:09,011 - INFO - Downloaded logs_a6fk8oxn:v0 to /scratch/downloaded_artifacts/logs_a6fk8oxn:v0


[34m[1mwandb[0m:   1 of 1 files downloaded.  


2025-04-20 15:04:09,020 - INFO - Downloaded logs_qludfphl:v0 to /scratch/downloaded_artifacts/logs_qludfphl:v0


In [27]:
# Save download results for the processing script, including group name
import pickle
results_file = os.path.join(download_dir, f'download_results_{group_name}.pkl')
with open(results_file, 'wb') as f:
    pickle.dump(download_results, f)
logger.info(f"Saved download results to {results_file}")

2025-04-20 15:04:09,075 - INFO - Saved download results to /scratch/downloaded_artifacts/download_results_fixed_weight_seed-1_cf21e420f71a4529bed03b4c48fda84c.pkl
