<a href="https://colab.research.google.com/github/marcusvdl/healthier/blob/main/healthier_v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# etl_data_processed

In [2]:
# aging_datasets_downloader.py

import os
import requests
from pathlib import Path

# Root directory to save all datasets
BASE_DIR = Path("datasets")
BASE_DIR.mkdir(exist_ok=True)

# Função auxiliar para baixar arquivos

def download_file(name, url, subdir="general"):
    folder = BASE_DIR / subdir
    folder.mkdir(parents=True, exist_ok=True)
    filename = url.split("/")[-1]
    path = folder / filename

    try:
        r = requests.get(url)
        r.raise_for_status()
        with open(path, "wb") as f:
            f.write(r.content)
        print(f"[OK] {name} saved in {path}")
    except Exception as e:
        print(f"[ERROR] {name}: {e}")

    return str(path)


# -----------------------------
# MULTI-OMICS / MULTI-TISSUES
# -----------------------------

# Aging Atlas
aging_atlas_files = {
    "Transcriptome Bulk": "https://ngdc.cncb.ac.cn/aging/download/data/Aging_Atlas_Bulk_Transcriptome.xlsx",
    "Single-cell Transcriptome": "https://ngdc.cncb.ac.cn/aging/download/data/Aging_Atlas_Single_Cell.xlsx",
    "Proteome": "https://ngdc.cncb.ac.cn/aging/download/data/Aging_Atlas_Proteome.xlsx",
    "Pharmacogenomics": "https://ngdc.cncb.ac.cn/aging/download/data/Aging_Atlas_Pharmacogenomics.xlsx"
}

for name, url in aging_atlas_files.items():
    download_file(name, url, subdir="aging_atlas")

# GTEx
print("[INFO] GTEx requires registration and must be downloaded manually: https://gtexportal.org")

# Tabula Muris
download_file("Tabula Muris Senis Raw Data", "https://figshare.com/ndownloader/files/22921744", subdir="tabula_muris")

# Tabula Sapiens
print("[INFO] Tabula Sapiens: access https://tabula-sapiens-portal.ds.czbiohub.org for personalized download.")

# ENCODE
print("[INFO] ENCODE datasets must be downloaded via the interface: https://www.encodeproject.org")

# Human Cell Atlas
print("[INFO] HCA offers API and interface: https://data.humancellatlas.org")

# Roadmap Epigenomics
print("[INFO] Roadmap Epigenomics: download by https://egg2.wustl.edu/roadmap/web_portal/")

# AFCA
download_file("Aging Fly Cell Atlas (AFCA)", "https://static-content.springer.com/esm/art%3A10.1038%2Fs41586-022-05597-y/MediaObjects/41586_2022_5597_MOESM3_ESM.zip", subdir="afca")

# Single Cell Portal
print("[INFO] Broad Institute Single Cell Portal: https://singlecell.broadinstitute.org/single_cell")


# -----------------------------
# LONGITUDINAL / AGE CLOCKS
# -----------------------------

# ClockBase
print("[INFO] ClockBase requires access via https://clockbase.org")

# ROSMAP
print("[INFO] ROSMAP available via Synapse: https://www.synapse.org/#!Synapse:syn3219045")

# GEO (exemplo GSE201338)
download_file("GSE201338 metadata", "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE201nnn/GSE201338/matrix/GSE201338_series_matrix.txt.gz", subdir="geo")

# Mammalian Aging Cell Atlas (MACA)
download_file("MACA (Mouse Aging Single-Cell)", "https://figshare.com/ndownloader/files/33835075", subdir="maca")

# SenNet
print("[INFO] SenNet data hub: https://sennetconsortium.org")

# Multi-Omics Longitudinal Fibroblasts
print("[INFO] Download from the study's supplementary repository: https://www.nature.com/articles/s41586-020-2326-1")

# BrainSpan / Allen Brain Atlas
print("[INFO] BrainSpan download: https://www.brainspan.org/static/download.html")

# LifeTime Initiative
print("[INFO] LifeTime resources: https://lifetime-fetflagship.eu")

# Framingham Heart Study
print("[INFO] FHS data via dbGaP: https://www.nhlbi.nih.gov/science/framingham-heart-study")


# -----------------------------
# REJUVENATION / INTERVENTIONS
# -----------------------------

# SINGULAR (Cell Rejuvenation Atlas)
print("[INFO] Access the Cell Rejuvenation Atlas (SINGULAR): https://rejuvenome.org")

# TPE-IVIG Study
print("[INFO] Supplementary dataset via original article: https://www.nature.com/articles/s41467-023-38028-0")

# Rejuvenation Roadmap
print("[INFO] Rejuvenation Roadmap datasets: https://www.lifespan.io/road-maps/the-rejuvenation-roadmap/")

# iPSC datasets
print("[INFO] Access iPSC repositories via GEO or https://stemcellcommons.org")

# DGIdb
print("[INFO] DGIdb API and downloads: https://www.dgidb.org")

# DrugBank
print("[INFO] DrugBank requires registration to download: https://go.drugbank.com/releases/latest")


# -----------------------------
# Finalização
# -----------------------------
print("\n[✔] Script finished. Available datasets have been downloaded. See directories at ./datasets/")

[ERROR] Transcriptome Bulk: 404 Client Error:  for url: https://ngdc.cncb.ac.cn/aging/download/data/Aging_Atlas_Bulk_Transcriptome.xlsx
[ERROR] Single-cell Transcriptome: 404 Client Error:  for url: https://ngdc.cncb.ac.cn/aging/download/data/Aging_Atlas_Single_Cell.xlsx
[ERROR] Proteome: 404 Client Error:  for url: https://ngdc.cncb.ac.cn/aging/download/data/Aging_Atlas_Proteome.xlsx
[ERROR] Pharmacogenomics: 404 Client Error:  for url: https://ngdc.cncb.ac.cn/aging/download/data/Aging_Atlas_Pharmacogenomics.xlsx
[INFO] GTEx requires registration and must be downloaded manually: https://gtexportal.org
[ERROR] Tabula Muris Senis Raw Data: 404 Client Error: Not Found for url: https://figshare.com/ndownloader/files/22921744
[INFO] Tabula Sapiens: access https://tabula-sapiens-portal.ds.czbiohub.org for personalized download.
[INFO] ENCODE datasets must be downloaded via the interface: https://www.encodeproject.org
[INFO] HCA offers API and interface: https://data.humancellatlas.org
[INFO