In [7]:
import boto3
session = boto3.Session(profile_name="nhs-ods", region_name="eu-west-2")
s3 = session.client("s3")
print([b["Name"] for b in s3.list_buckets()["Buckets"]])  # should list buckets you can see


['aws-glue-assets-736116164248-eu-west-2', 'nhs-fingertips-data', 'test-athena-results-fingertips', 'test-nhs-fingertips', 'test-nhs-ods']


In [8]:
import time
from pathlib import Path
import boto3
from botocore.config import Config
from boto3.s3.transfer import TransferConfig
from tqdm.auto import tqdm  # auto = nice in Jupyter or terminal

PROFILE_NAME   = "nhs-ods"
REGION         = "eu-west-2"
S3_BUCKET      = "test-nhs-ods"
S3_PREFIX      = "nhs-ods/curated"

LOCAL_DIR_LATEST   = Path(r"C:\Users\NikhilYadav\Desktop\NHS ODS\bronze\ods\extracts\curated\latest")
LOCAL_DIR_SNAPSHOT = Path(r"C:\Users\NikhilYadav\Desktop\NHS ODS\bronze\ods\extracts\curated\2025-10-03_1703")

session = boto3.session.Session(profile_name=PROFILE_NAME, region_name=REGION)
s3 = session.client("s3", config=Config(retries={"max_attempts": 10, "mode": "standard"}))

tx_cfg = TransferConfig(
    multipart_threshold=16 * 1024**2,
    multipart_chunksize=16 * 1024**2,
    max_concurrency=4,
    use_threads=True,
)

def iter_files(root: Path):
    for p in root.rglob("*"):
        if p.is_file():
            yield p

class ProgressThrottle:
    """Throttle S3 Callback -> tqdm updates so Jupyter won't spam new lines."""
    def __init__(self, bar, step_bytes=4*1024**2, min_interval=0.25):
        self.bar = bar
        self.step = step_bytes           # update at least every 4 MB
        self.min_interval = min_interval # ...or every 250 ms
        self._last_shown = 0
        self._accum = 0
        self._last_time = time.time()

    def __call__(self, bytes_amount):
        self._accum += bytes_amount
        now = time.time()
        if self._accum >= self.step or (now - self._last_time) >= self.min_interval:
            delta = self._accum
            self._accum = 0
            self._last_time = now
            self.bar.update(delta)

def upload_one(local_path: Path, bucket: str, key: str):
    total = local_path.stat().st_size
    with tqdm(
        total=total, unit="B", unit_scale=True, unit_divisor=1024,
        desc=f"{local_path.name}", leave=False
    ) as t:
        cb = ProgressThrottle(t, step_bytes=4*1024**2, min_interval=0.25)
        s3.upload_file(
            Filename=str(local_path),
            Bucket=bucket,
            Key=key,
            ExtraArgs={"ACL": "bucket-owner-full-control"},
            Callback=cb,
            Config=tx_cfg,
        )
        # flush any remaining bytes
        if cb._accum:
            t.update(cb._accum)

def upload_dir(local_dir: Path, bucket: str, prefix: str):
    files = [p for p in iter_files(local_dir)]
    if not files:
        print(f"(nothing to upload in {local_dir})")
        return
    print(f"\nUploading {local_dir} → s3://{bucket}/{prefix}  (files: {len(files)})")
    with tqdm(total=len(files), desc="Files", unit="file") as file_bar:
        for p in files:
            rel = p.relative_to(local_dir).as_posix()
            key = f"{prefix}/{rel}"
            try:
                upload_one(p, bucket, key)
            finally:
                file_bar.update(1)

print(f"Local sources:\n- latest:   {LOCAL_DIR_LATEST}\n- snapshot: {LOCAL_DIR_SNAPSHOT}")
print(f"S3 target:  s3://{S3_BUCKET}/{S3_PREFIX}")

upload_dir(LOCAL_DIR_LATEST, S3_BUCKET, f"{S3_PREFIX}/latest")
if LOCAL_DIR_SNAPSHOT.exists():
    upload_dir(LOCAL_DIR_SNAPSHOT, S3_BUCKET, f"{S3_PREFIX}/{LOCAL_DIR_SNAPSHOT.name}")

print("Done.")


Local sources:
- latest:   C:\Users\NikhilYadav\Desktop\NHS ODS\bronze\ods\extracts\curated\latest
- snapshot: C:\Users\NikhilYadav\Desktop\NHS ODS\bronze\ods\extracts\curated\2025-10-03_1703
S3 target:  s3://test-nhs-ods/nhs-ods/curated

Uploading C:\Users\NikhilYadav\Desktop\NHS ODS\bronze\ods\extracts\curated\latest → s3://test-nhs-ods/nhs-ods/curated/latest  (files: 99)


Files:   0%|          | 0/99 [00:00<?, ?file/s]

coverage_report.txt:   0%|          | 0.00/182 [00:00<?, ?B/s]

Integrated_Care_Boards_(December_2024)_Names_and_Codes_in_EN.csv:   0%|          | 0.00/2.78k [00:00<?, ?B/s]

ONSPD_MAY_2025_UK.csv:   0%|          | 0.00/1.35G [00:00<?, ?B/s]

orgs.csv:   0%|          | 0.00/113M [00:00<?, ?B/s]

orgs.parquet:   0%|          | 0.00/26.6M [00:00<?, ?B/s]

orgs_gp.csv:   0%|          | 0.00/7.15M [00:00<?, ?B/s]

org_dates.csv:   0%|          | 0.00/10.6M [00:00<?, ?B/s]

org_rels.csv:   0%|          | 0.00/40.0M [00:00<?, ?B/s]

org_roles.csv:   0%|          | 0.00/23.9M [00:00<?, ?B/s]

org_succs.csv:   0%|          | 0.00/614k [00:00<?, ?B/s]

Contents - Full Package.docx:   0%|          | 0.00/17.6k [00:00<?, ?B/s]

aug25oth.csv:   0%|          | 0.00/5.81M [00:00<?, ?B/s]

aug25wsn.csv:   0%|          | 0.00/160M [00:00<?, ?B/s]

aug25y56.csv:   0%|          | 0.00/125M [00:00<?, ?B/s]

aug25y58.csv:   0%|          | 0.00/94.8M [00:00<?, ?B/s]

aug25y59.csv:   0%|          | 0.00/152M [00:00<?, ?B/s]

aug25y60.csv:   0%|          | 0.00/142M [00:00<?, ?B/s]

aug25y61.csv:   0%|          | 0.00/99.2M [00:00<?, ?B/s]

aug25y62.csv:   0%|          | 0.00/109M [00:00<?, ?B/s]

aug25y63.csv:   0%|          | 0.00/127M [00:00<?, ?B/s]

nhg25aug.csv:   0%|          | 0.00/0.99G [00:00<?, ?B/s]

NHSPD User Guide Aug 2025.odt:   0%|          | 0.00/287k [00:00<?, ?B/s]

NHSPD User Guide Aug 2025.pdf:   0%|          | 0.00/610k [00:00<?, ?B/s]

NHSPD CHP lookup SC as at 04_12.txt:   0%|          | 0.00/2.26k [00:00<?, ?B/s]

NHSPD Health Board lookup SC as at 08_16.txt:   0%|          | 0.00/592 [00:00<?, ?B/s]

Pseudo Country 1 (country order) Feb 2019.xlsx:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

Pseudo Country 2 (code order) Feb 2019.xlsx:   0%|          | 0.00/49.8k [00:00<?, ?B/s]

Cancer Alliance names and codes EN as at 04_24.csv:   0%|          | 0.00/633 [00:00<?, ?B/s]

Cancer Alliance names and codes EN as at 04_24.xlsx:   0%|          | 0.00/13.8k [00:00<?, ?B/s]

Cancer Network names and codes EW as at 04_11.csv:   0%|          | 0.00/1.08k [00:00<?, ?B/s]

Cancer Network names and codes EW as at 04_11.xlsx:   0%|          | 0.00/9.34k [00:00<?, ?B/s]

Cancer Registry names and codes EW as at 01_08.csv:   0%|          | 0.00/433 [00:00<?, ?B/s]

Cancer Registry names and codes EW as at 01_08.xlsx:   0%|          | 0.00/8.68k [00:00<?, ?B/s]

CHP names and codes SC as at 04_12.csv:   0%|          | 0.00/2.06k [00:00<?, ?B/s]

CHP names and codes SC as at 04_12.xlsx:   0%|          | 0.00/13.1k [00:00<?, ?B/s]

Country names and codes UK as at 08_12.csv:   0%|          | 0.00/241 [00:00<?, ?B/s]

Country names and codes UK as at 08_12.xlsx:   0%|          | 0.00/11.5k [00:00<?, ?B/s]

County names and codes EN as at 12_23.csv:   0%|          | 0.00/531 [00:00<?, ?B/s]

County names and codes EN as at 12_23.xlsx:   0%|          | 0.00/19.2k [00:00<?, ?B/s]

County names and codes EW as at 21_04_91.csv:   0%|          | 0.00/1.20k [00:00<?, ?B/s]

County names and codes EW as at 21_04_91.xlsx:   0%|          | 0.00/12.8k [00:00<?, ?B/s]

District names and codes EW as at 21_4_91.csv:   0%|          | 0.00/10.8k [00:00<?, ?B/s]

District names and codes EW as at 21_4_91.xlsx:   0%|          | 0.00/23.8k [00:00<?, ?B/s]

Health Authority names and codes EN as at 12_01.csv:   0%|          | 0.00/2.01k [00:00<?, ?B/s]

Health Authority names and codes EN as at 12_01.xlsx:   0%|          | 0.00/14.1k [00:00<?, ?B/s]

Health Authority names and codes WA as at 12_01.csv:   0%|          | 0.00/91.0 [00:00<?, ?B/s]

Health Authority names and codes WA as at 12_01.xlsx:   0%|          | 0.00/11.8k [00:00<?, ?B/s]

Health Board  names and codes SC as at 04_19.csv:   0%|          | 0.00/405 [00:00<?, ?B/s]

Health Board  names and codes SC as at 04_19.xlsx:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

Health Board names and codes NI as at 2003.csv:   0%|          | 0.00/124 [00:00<?, ?B/s]

Health Board names and codes NI as at 2003.xlsx:   0%|          | 0.00/9.13k [00:00<?, ?B/s]

HSCB names and codes NI as at 12_10.csv:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

HSCB names and codes NI as at 12_10.xlsx:   0%|          | 0.00/10.9k [00:00<?, ?B/s]

ICB names and codes EN as at 04_23.csv:   0%|          | 0.00/2.70k [00:00<?, ?B/s]

ICB names and codes EN as at 04_23.xlsx:   0%|          | 0.00/20.1k [00:00<?, ?B/s]

IT Cluster names and codes EN as at 01_07_03.csv:   0%|          | 0.00/132 [00:00<?, ?B/s]

IT Cluster names and codes EN as at 01_07_03.xlsx:   0%|          | 0.00/9.59k [00:00<?, ?B/s]

LA_UA names and codes UK as at 04_25.csv:   0%|          | 0.00/8.92k [00:00<?, ?B/s]

LA_UA names and codes UK as at 04_25.xlsx:   0%|          | 0.00/22.0k [00:00<?, ?B/s]

LCG names and codes NI as at 12_10.csv:   0%|          | 0.00/107 [00:00<?, ?B/s]

LCG names and codes NI as at 12_10.xlsx:   0%|          | 0.00/11.0k [00:00<?, ?B/s]

LHB names and codes WA as at 04_19.csv:   0%|          | 0.00/657 [00:00<?, ?B/s]

LHB names and codes WA as at 04_19.xlsx:   0%|          | 0.00/11.1k [00:00<?, ?B/s]

LSOA (2001) names and codes EW & NI as at 02_05.csv:   0%|          | 0.00/940k [00:00<?, ?B/s]

LSOA (2001) names and codes EW & NI as at 02_05.xlsx:   0%|          | 0.00/722k [00:00<?, ?B/s]

LSOA (2011) names and codes UK as at 12_12.csv:   0%|          | 0.00/1.16M [00:00<?, ?B/s]

LSOA (2011) names and codes UK as at 12_12.xlsx:   0%|          | 0.00/850k [00:00<?, ?B/s]

LSOA (2021) names and codes EW as at 12_21.csv:   0%|          | 0.00/970k [00:00<?, ?B/s]

LSOA (2021) names and codes EW as at 12_21.xlsx:   0%|          | 0.00/702k [00:00<?, ?B/s]

MSOA (2011) names and codes UK as at 12_12.csv:   0%|          | 0.00/224k [00:00<?, ?B/s]

MSOA (2011) names and codes UK as at 12_12.xlsx:   0%|          | 0.00/179k [00:00<?, ?B/s]

MSOA (2021) names and codes EW as at 12_21.csv:   0%|          | 0.00/190k [00:00<?, ?B/s]

MSOA (2021) names and codes EW as at 12_21.xlsx:   0%|          | 0.00/150k [00:00<?, ?B/s]

NHSER names and codes EN as at 04_24.csv:   0%|          | 0.00/227 [00:00<?, ?B/s]

NHSER names and codes EN as at 04_24.xlsx:   0%|          | 0.00/13.3k [00:00<?, ?B/s]

NHSRLO names and codes EN as at 04_19.csv:   0%|          | 0.00/866 [00:00<?, ?B/s]

NHSRLO names and codes EN as at 04_19.xlsx:   0%|          | 0.00/12.3k [00:00<?, ?B/s]

Pan SHA names and codes EN as at 12_10.csv:   0%|          | 0.00/201 [00:00<?, ?B/s]

Pan SHA names and codes EN as at 12_10.xlsx:   0%|          | 0.00/11.4k [00:00<?, ?B/s]

PCO names and codes EN as at 04_11.csv:   0%|          | 0.00/4.53k [00:00<?, ?B/s]

PCO names and codes EN as at 04_11.xlsx:   0%|          | 0.00/17.6k [00:00<?, ?B/s]

PCO names and codes EN as at 10_05.csv:   0%|          | 0.00/6.79k [00:00<?, ?B/s]

PCO names and codes EN as at 10_05.xlsx:   0%|          | 0.00/20.8k [00:00<?, ?B/s]

Region names and codes EN as at 12_20 (RGN).csv:   0%|          | 0.00/592 [00:00<?, ?B/s]

Region names and codes EN as at 12_20 (RGN).xlsx:   0%|          | 0.00/13.0k [00:00<?, ?B/s]

SCN names and codes EN as at 04_13.csv:   0%|          | 0.00/346 [00:00<?, ?B/s]

SCN names and codes EN as at 04_13.xlsx:   0%|          | 0.00/11.3k [00:00<?, ?B/s]

SHA names and codes EN as at 09_02_04.csv:   0%|          | 0.00/824 [00:00<?, ?B/s]

SHA names and codes EN as at 09_02_04.xlsx:   0%|          | 0.00/12.3k [00:00<?, ?B/s]

SHA names and codes EN as at 12_10.csv:   0%|          | 0.00/319 [00:00<?, ?B/s]

SHA names and codes EN as at 12_10.xlsx:   0%|          | 0.00/12.8k [00:00<?, ?B/s]

SICBL names and codes EN as at 04_23.csv:   0%|          | 0.00/5.65k [00:00<?, ?B/s]

SICBL names and codes EN as at 04_23.xlsx:   0%|          | 0.00/21.7k [00:00<?, ?B/s]

smhpc_nc_alpha.csv:   0%|          | 0.00/2.52k [00:00<?, ?B/s]

smhpc_nc_code.csv:   0%|          | 0.00/2.52k [00:00<?, ?B/s]

Ward names and codes UK as at 05_25.csv:   0%|          | 0.00/231k [00:00<?, ?B/s]

Ward names and codes UK as at 05_25.xlsx:   0%|          | 0.00/234k [00:00<?, ?B/s]

Westminster Parliamentary Constituency names and codes UK as at 12_24.csv:   0%|          | 0.00/19.7k [00:00<…

Westminster Parliamentary Constituency names and codes UK as at 12_24.xlsx:   0%|          | 0.00/34.6k [00:00…


Uploading C:\Users\NikhilYadav\Desktop\NHS ODS\bronze\ods\extracts\curated\2025-10-03_1703 → s3://test-nhs-ods/nhs-ods/curated/2025-10-03_1703  (files: 8)


Files:   0%|          | 0/8 [00:00<?, ?file/s]

coverage_report.txt:   0%|          | 0.00/182 [00:00<?, ?B/s]

orgs.csv:   0%|          | 0.00/113M [00:00<?, ?B/s]

orgs.parquet:   0%|          | 0.00/26.6M [00:00<?, ?B/s]

orgs_gp.csv:   0%|          | 0.00/7.15M [00:00<?, ?B/s]

org_dates.csv:   0%|          | 0.00/10.6M [00:00<?, ?B/s]

org_rels.csv:   0%|          | 0.00/40.0M [00:00<?, ?B/s]

org_roles.csv:   0%|          | 0.00/23.9M [00:00<?, ?B/s]

org_succs.csv:   0%|          | 0.00/614k [00:00<?, ?B/s]

Done.
