In [1]:
from google.colab import drive
drive.mount("/content/drive")

import os, sys, tensorflow as tf

PROJECT_DIR = "/content/drive/MyDrive/sepsis-timeline-project"
SRC_DIR = f"{PROJECT_DIR}/src"

os.chdir(PROJECT_DIR)
if SRC_DIR not in sys.path:
    sys.path.insert(0, SRC_DIR)

print("‚úÖ CWD:", os.getcwd())
print("‚úÖ SRC in path:", SRC_DIR in sys.path)
print("‚úÖ TF:", tf.__version__)
print("‚úÖ GPU:", tf.config.list_physical_devices("GPU"))


Mounted at /content/drive
‚úÖ CWD: /content/drive/MyDrive/sepsis-timeline-project
‚úÖ SRC in path: True
‚úÖ TF: 2.19.0
‚úÖ GPU: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [2]:
import os

PROJECT_DIR = "/content/drive/MyDrive/sepsis-timeline-project"
RAW_DIR = f"{PROJECT_DIR}/data/raw/physionet2019"

os.makedirs(RAW_DIR, exist_ok=True)
os.makedirs(f"{PROJECT_DIR}/data/interim", exist_ok=True)
os.makedirs(f"{PROJECT_DIR}/data/processed", exist_ok=True)

print("‚úÖ Created:", RAW_DIR)


‚úÖ Created: /content/drive/MyDrive/sepsis-timeline-project/data/raw/physionet2019


In [3]:
import os

BASE_DIR = "/content/drive/MyDrive/sepsis-timeline-project/data/raw/physionet2019"
TRAIN_A = f"{BASE_DIR}/training_setA"

os.makedirs(TRAIN_A, exist_ok=True)
print("‚úÖ Klar:", TRAIN_A)


‚úÖ Klar: /content/drive/MyDrive/sepsis-timeline-project/data/raw/physionet2019/training_setA


In [7]:
import os
import time
import urllib.request
import socket

BASE_URL = "https://physionet.org/files/challenge-2019/1.0.0/training/training_setA"
OUT_DIR = "/content/drive/MyDrive/sepsis-timeline-project/data/raw/physionet2019/training_setA"

N = 5000

os.makedirs(OUT_DIR, exist_ok=True)

# Timeout s√• vi inte h√§nger om n√§tet strular
socket.setdefaulttimeout(30)

downloaded = 0
skipped = 0
failed = []

for i in range(1, N + 1):
    pid = f"p{i:06d}.psv"
    url = f"{BASE_URL}/{pid}?download="
    out_path = os.path.join(OUT_DIR, pid)

    # Hoppa √∂ver om filen redan finns
    if os.path.exists(out_path):
        skipped += 1
        continue

    ok = False
    for attempt in range(1, 4):  # 3 f√∂rs√∂k
        try:
            urllib.request.urlretrieve(url, out_path)
            downloaded += 1
            ok = True
            break
        except Exception as e:
            if attempt == 3:
                failed.append((pid, str(e)))
            else:
                time.sleep(1.0 * attempt)  # lite backoff: 1s, 2s

    # liten paus ibland f√∂r att vara sn√§ll mot servern
    if (i % 200) == 0:
        print(f"Progress: {i}/{N} | downloaded={downloaded} | skipped={skipped} | failed={len(failed)}")
        time.sleep(0.5)

print(f"‚úÖ Nedladdade: {downloaded}")
print(f"‚Ü©Ô∏è Skippade (fanns redan): {skipped}")
print(f"‚ùå Misslyckades: {len(failed)}")
print("üìÇ Totalt i mappen:", len(os.listdir(OUT_DIR)))

# Spara lista p√• misslyckade s√• du kan f√∂rs√∂ka igen senare
if failed:
    fail_path = os.path.join(OUT_DIR, "failed_downloads.txt")
    with open(fail_path, "w", encoding="utf-8") as f:
        for pid, err in failed:
            f.write(f"{pid}\t{err}\n")
    print("üìù Sparade failed-lista:", fail_path)


Progress: 1600/5000 | downloaded=17 | skipped=1583 | failed=0
Progress: 1800/5000 | downloaded=217 | skipped=1583 | failed=0
Progress: 2000/5000 | downloaded=417 | skipped=1583 | failed=0
Progress: 2200/5000 | downloaded=617 | skipped=1583 | failed=0
Progress: 2400/5000 | downloaded=817 | skipped=1583 | failed=0
Progress: 2600/5000 | downloaded=1017 | skipped=1583 | failed=0
Progress: 2800/5000 | downloaded=1217 | skipped=1583 | failed=0
Progress: 3000/5000 | downloaded=1417 | skipped=1583 | failed=0
Progress: 3200/5000 | downloaded=1617 | skipped=1583 | failed=0
Progress: 3400/5000 | downloaded=1817 | skipped=1583 | failed=0
Progress: 3600/5000 | downloaded=2017 | skipped=1583 | failed=0
Progress: 3800/5000 | downloaded=2217 | skipped=1583 | failed=0
Progress: 4000/5000 | downloaded=2417 | skipped=1583 | failed=0
Progress: 4200/5000 | downloaded=2617 | skipped=1583 | failed=0
Progress: 4400/5000 | downloaded=2817 | skipped=1583 | failed=0
Progress: 4600/5000 | downloaded=3017 | skippe

In [8]:
import glob, os
files = glob.glob(os.path.join(OUT_DIR, "*.psv"))
print("PSV count:", len(files))
print("Example:", os.path.basename(sorted(files)[0]), os.path.basename(sorted(files)[-1]))


PSV count: 5000
Example: p000001.psv p005000.psv
