# Homework 1 — IMERG Global Precipitation (May 2024)
# Author: Mohammed Fawaz Nawaz
# Course: ESDP1 WS 2025/26
# Notebook: load_imerg_mnawaz_may2024.ipynb

"""
This notebook downloads a small sample of IMERG Global Precipitation data
for May 1–3, 2024.

Notebook Purpose — IMERG Precipitation Download (May 2024)

This notebook demonstrates how to access and download sample IMERG Half-Hourly precipitation data from NASA GES DISC.
For this assignment, the time period 1–3 May 2024 is selected to meet the requirement of downloading data from multiple days and implementing a date loop (“outer loop”).

Steps shown in this notebook:

- Define the date range (May 1–3, 2024)

- Construct the correct IMERG directory structure

- Year (YYYY)

- Day of year (DOY)

- Build filenames following the IMERG naming convention

- Authenticate using NASA EDL Application Token

- Required to avoid 401 errors

- Download one file per day as demonstration

- Save files to the local folder

This notebook serves as a working example of:

- Accessing NASA GES DISC data

- Handling authentication

- Implementing a date loop

- Downloading HDF5 files programmatically

"""


In [1]:
import os
import requests
from datetime import datetime, timedelta


In [None]:
# ---- Configuration ----

# Date range for downloading IMERG data
start_date = datetime(2024, 5, 1)
end_date = datetime(2024, 5, 3)   # inclusive

# Folder where files will be saved
output_folder = "imerg_may2024"
os.makedirs(output_folder, exist_ok=True)



In [None]:
import requests
from datetime import datetime, timedelta
import os

# ---- PUT YOUR NEW TOKEN HERE , the Below is the dummy token ----
EDL_TOKEN = "eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIjoiVXNlciIsInVpZCI6Im1uYXdhejAwNyIsImV4cCI6MTc2OTgwNjc2NSwiaWF0IjoxNzY0NjIyNzY1LCJpc3MiOiJodHRwczovL3Vycy5lYXJ0aGRhdGEubmFzYS5nb3YiLCJpZGVudGl0eV9wcm92aWRlciI6ImVkbF9vcHMiLCJhY3IiOiJlZGwiLCJhc3N1cmFuY2VfbGV2ZWwiOjN9.rVvujHVPztFU9A4tTg6pmnaVswKFfMw7UhPJFdy6quvUs9cjdZApWby7-OWrQtLVyvUZ5PxiA2TijDrqTjGicxaJOJEKUpO8FcFzTiflqgjLbJibnxj2M56UkYDQfMsJprxo1AUs_2DYQ6xAPD44xib-R2CBB0nBACPhzUxAn2g5vUN9jHkz_ZmLtrrTsHBxeGZDt-UUwgZTfX5VL-y4qcozHvn6td8kDWRQjeHIFKrtH4exGFiVhrHd-2LoJAu0Rp5gt3P5zrsBViTAON3sfSisc-YmCfJ7fnRzOdIvLEA_axgClC9sNCSw6appYJgME623bT6gdyzmBibxG9j0Aw"

# Output folder
output_folder = "imerg_data"
os.makedirs(output_folder, exist_ok=True)

# Date range
start_date = datetime(2024, 5, 1)
end_date   = datetime(2024, 5, 3)

# Create session with token authentication
session = requests.Session()
session.headers.update({
    "Authorization": f"Bearer {EDL_TOKEN}"
})

current = start_date

while current <= end_date:
    date_str = current.strftime("%Y%m%d")
    doy = current.strftime("%j")
    
    filename = f"3B-HHR.MS.MRG.3IMERG.{date_str}-S000000-E002959.0000.V07B.HDF5"
    url = f"https://gpm1.gesdisc.eosdis.nasa.gov/data/GPM_L3/GPM_3IMERGHH.07/{current.year}/{doy}/{filename}"
    save_path = os.path.join(output_folder, filename)

    print(f"\nProcessing {current.date()}")
    print("URL:", url)

    r = session.get(url, stream=True, timeout=60)

    if r.status_code == 200:
        with open(save_path, "wb") as f:
            for chunk in r.iter_content(chunk_size=8192):
                f.write(chunk)
        print("Downloaded successfully!")
    else:
        print("Failed:", r.status_code)

    current += timedelta(days=1)



Processing 2024-05-01
URL: https://gpm1.gesdisc.eosdis.nasa.gov/data/GPM_L3/GPM_3IMERGHH.07/2024/122/3B-HHR.MS.MRG.3IMERG.20240501-S000000-E002959.0000.V07B.HDF5
Downloaded successfully!

Processing 2024-05-02
URL: https://gpm1.gesdisc.eosdis.nasa.gov/data/GPM_L3/GPM_3IMERGHH.07/2024/123/3B-HHR.MS.MRG.3IMERG.20240502-S000000-E002959.0000.V07B.HDF5
Downloaded successfully!

Processing 2024-05-03
URL: https://gpm1.gesdisc.eosdis.nasa.gov/data/GPM_L3/GPM_3IMERGHH.07/2024/124/3B-HHR.MS.MRG.3IMERG.20240503-S000000-E002959.0000.V07B.HDF5
Downloaded successfully!


In [7]:
import os

print("Files in output folder:", output_folder)
for fname in os.listdir(output_folder):
    fpath = os.path.join(output_folder, fname)
    size_kb = os.path.getsize(fpath) / 1024
    print(f" - {fname} ({size_kb:.1f} KB)")


Files in output folder: imerg_data
 - 3B-HHR.MS.MRG.3IMERG.20240501-S000000-E002959.0000.V07B.HDF5 (7659.0 KB)
 - 3B-HHR.MS.MRG.3IMERG.20240502-S000000-E002959.0000.V07B.HDF5 (7893.9 KB)
 - 3B-HHR.MS.MRG.3IMERG.20240503-S000000-E002959.0000.V07B.HDF5 (8202.5 KB)
