# Get GFS data from the source
- Data dictionary: https://rda.ucar.edu/datasets/ds084.1/#metadata/grib2.html?_do=y
- Data access: https://rda.ucar.edu/datasets/ds084.1/#!access

## Imports

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
from datetime import datetime, timedelta
from pathlib import Path
from typing import Iterable

import httpx
import xarray as xr
from dotenv import load_dotenv

from download import download_file

In [None]:
load_dotenv()

## Authenticate and get cookie

In [None]:
auth_url = "https://rda.ucar.edu/cgi-bin/login"
auth_data = {
    "email": os.environ["UCAR_EMAIL"],
    "passwd": os.environ["UCAR_PASS"],
    "action": "login",
}
res = httpx.post(auth_url, data=auth_data)
assert res.status_code == 200
cookies = res.cookies

## Build path and download

In [None]:
def build_url(date: datetime, fc: int) -> str:
    base_url = "https://rda.ucar.edu/data/ds084.1"
    ymd = date.strftime("%Y%m%d")
    return f"{base_url}/{date.year}/{ymd}/gfs.0p25.{ymd}{date.hour:02d}.f{fc:03d}.grib2"

Download single file:

In [None]:
data_url = build_url(datetime(2022, 8, 13), 0)
file = download_file(data_url, cookies=cookies)

## Download multiple files

In [None]:
def download_range(
    start_date: datetime, end_date: datetime
) -> Iterable[tuple[datetime, int, Path]]:
    date = start_date
    delta = timedelta(hours=6)

    while date <= end_date:
        for fc in [0, 3, 6]:
            url = build_url(date, fc)
            file = Path(download_file(url, cookies=cookies))
            yield date, fc, file
        date += delta

In [None]:
start_date = datetime(2021, 1, 1)
end_date = datetime(2023, 1, 1)
files = download_range(start_date, end_date)

In [None]:
# use as follows
# for date, fc, path in files: pass

## Load into xarray

Possible values for `typeOfLevel`:
- meanSea
- hybrid
- atmosphere
- surface
- planetaryBoundaryLayer
- isobaricInPa
- isobaricInhPa
- heightAboveGround
- depthBelowLandLayer
- heightAboveSea
- atmosphereSingleLayer
- lowCloudLayer
- middleCloudLayer
- highCloudLayer
- cloudCeiling
- heightAboveGroundLayer
- tropopause
- maxWind
- isothermZero
- highestTroposphericFreezing
- pressureFromGroundLayer
- sigmaLayer
- sigma
- potentialVorticity

In [None]:
ds = xr.open_dataset(
    file,
    engine="cfgrib",
    backend_kwargs={"filter_by_keys": {"typeOfLevel": "sigma"}},
)
ds