In [144]:
import io
import os
import subprocess
import time
from datetime import datetime, timedelta, timezone
from typing import Any, Iterable, Mapping, Optional, Sequence, TypeAlias, Union

import dateparser
import numpy as np
import numpy.typing as npt
import pandas as pd
import psycopg
import pygrib
import requests
import xarray as xr
from psycopg import sql
from requests.adapters import HTTPAdapter
from urllib3 import Retry

Query: TypeAlias = Union[bytes, "sql.SQL", "sql.Composed"]
Params: TypeAlias = Union[Sequence[Any], Mapping[str, Any]]

"CMC_hrdps_domain_Variable_LevelType_level_ps2.5km_YYYYMMDDHH_Phhh-mm.grib2"  
"CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022121900_P000-00.grib2 "  
"https://dd.weather.gc.ca/model_hrdps/continental/grib2/00/000/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022121900_P000-00.grib2"  

In [None]:
def create_urls() -> list[list[str]]:
    base_url = "https://dd.weather.gc.ca/model_hrdps/continental/grib2/"
    model_runs = [f"{(i):0>2}" for i in range(0, 24, 6)]
    forecast_hours = [f"{(i):0>3}" for i in range(49)]
    prefix = "CMC"
    model = "hrdps"
    domain = "continental"
    variable = "SNOD"
    level_type = "SFC"
    level = "0"
    resolution = "ps2.5km"
    date = datetime.now(timezone.utc).strftime("%Y%m%d")
    minutes = "00"
    extension = "grib2"

    model_run_urls = []
    for model_run in model_runs:
        prediction_urls = []
        for forecast_hour in forecast_hours:
            filename = f"{prefix}_{model}_{domain}_{variable}_{level_type}_{level}_{resolution}_{date}{model_run}_P{forecast_hour}-{minutes}.{extension}"
            download_url = f"{base_url}{model_run}/{forecast_hour}/{filename}"
            prediction_urls.append(download_url)
        model_run_urls.append(prediction_urls)
    
    return model_run_urls

In [None]:
model_run_urls = create_urls()

In [None]:
def find_latest_run(model_run_urls: list[list[str]]) -> int:
    """Find the latest model run by issuing a HEAD http request to the first url of each model run and comparing the "Last-Modified" field."""
    latest_time = datetime.fromisoformat("0001-01-01 00:00:00.000+00:00")
    latest_idx = 0
    for idx, prediction_urls in enumerate(model_run_urls):
        res = requests.head(prediction_urls[0])
        if not res.ok:
            continue

        modified_date = dateparser.parse(res.headers["Last-Modified"])
        if not modified_date:
            continue

        if modified_date > latest_time:
            latest_time = modified_date
            latest_idx = idx

    return latest_idx


latest_id = find_latest_run(model_run_urls)

In [None]:
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

def download_predictions(model_run_urls: list[list[str]], model_idx:int = 0, savepath:str = "./") -> list[str]:
    filepaths = []

    retry_strategy = Retry(
        total=3,
        backoff_factor=0.3,
        status_forcelist=[404, 429, 500, 502, 503, 504],
        method_whitelist=["HEAD", "GET", "OPTIONS"]
    )
    adapter = HTTPAdapter(max_retries=retry_strategy)
    http = requests.Session()
    http.mount("https://", adapter)
    http.mount("http://", adapter)

    for prediction_url in model_run_urls[model_idx]:
        filename = prediction_url.split("/")[-1]
        print(f"Downloading {prediction_url}")
        res = http.get(prediction_url)
        filepath = os.path.join(savepath, filename)
        with open(filepath, "wb") as f:
            f.write(res.content)
        filepaths.append(filepath)
    return filepaths

paths = download_predictions(model_run_urls, latest_id, "../data/")

In [None]:
def download_predictions(model_run_urls: list[list[str]], model_idx:int = 0, savepath:str = "./") -> list[str]:
    filepaths = []
    for prediction_url in model_run_urls[model_idx]:
        filename = prediction_url.split("/")[-1]
        print(f"Processing {prediction_url}")
        res = requests.get(prediction_url)
        filepath = os.path.join(savepath, filename)
        with open(filepath, "wb") as f:
            f.write(res.content)
        filepaths.append(filepath)
    return filepaths

paths = download_predictions(model_run_urls, latest_id, "../data/")

In [None]:
# Get current time in UTC
# for most recent to oldest model run:
#   Is date and model run newer than existing data in DB?
#       NO > raise error & stop
#   retrieve current date's forecast hour folder listing:
#       Contains all forecast hours?
#            YES > download
#            NO or 404 > attempt earlier model run
#   Nothing downloaded > go back one day

In [2]:
def query_latest(
    forecast_hour: int,
    url_path: str = "WXO-DD/model_hrdps/continental/grib2",
    domain: str = "https://hpfx.collab.science.gc.ca",
) -> tuple[str, str, str] | None:
    """Find the latest model run.
    Starting at the current date working backwards 5 days, for each day for forecasts 18,12,06,00 a HEAD request is sent to the forecast_hour.
    First 200 OK response is returned"""
    current_date = datetime.now(timezone.utc)

    # url parameters
    forecasts = [f"{i:02}" for i in range(0, 24, 6)][::-1]
    dates = [(current_date - timedelta(days=i)).strftime("%Y%m%d") for i in range(5)]

    for date in dates:
        for forecast in forecasts:
            baseurl = f"{domain.strip('/')}/{date}/{url_path.strip('/')}/{forecast}/"
            request_url = f"{baseurl}{forecast_hour:03}/"
            res = requests.head(request_url)
            if res.status_code == 200:
                print("Success:", baseurl, date, forecast)
                return baseurl, date, forecast
            else:
                print("Status code:", res.status_code, request_url)


baseurl, date, model_run = query_latest(48)

Status code: 404 https://hpfx.collab.science.gc.ca/20221221/WXO-DD/model_hrdps/continental/grib2/18/048/
Success: https://hpfx.collab.science.gc.ca/20221221/WXO-DD/model_hrdps/continental/grib2/12/ 20221221 12


In [3]:
def create_urls(
    base_url:str = "https://hpfx.collab.science.gc.ca/20221221/WXO-DD/model_hrdps/continental/grib2/06/",
    forecast_hours:list[str] = [f"{(i):03}" for i in range(49)],
    prefix:str = "CMC",
    model:str = "hrdps",
    domain:str = "continental",
    variable:str = "SNOD",
    level_type:str = "SFC",
    level:str = "0",
    resolution:str = "ps2.5km",
    date:str = datetime.now(timezone.utc).strftime("%Y%m%d"),
    model_run:str = "00",
    minutes:str = "00",
    extension:str = "grib2",
    ) -> list[str]:
    """Create list of URLs given inputs.
    URL structure: {base_url}/{forecast_hour}/{filename}
    Filename structure: {prefix}_{model}_{domain}_{variable}_{level_type}_{level}_{resolution}_{date}{model_run}_P{forecast_hour}-{minutes}.{extension}"""
    urls = []
    for forecast_hour in forecast_hours:
        filename = f"{prefix}_{model}_{domain}_{variable}_{level_type}_{level}_{resolution}_{date}{model_run:02}_P{forecast_hour:03}-{minutes:02}.{extension}"
        download_url = f"{base_url.strip('/')}/{forecast_hour.strip('/')}/{filename}"
        urls.append(download_url)
    
    return urls

download_urls = create_urls(base_url=baseurl, date=date, model_run=model_run)

In [4]:
def download_predictions(download_urls: list[str], savepath: str = "./") -> list[str]:
    """Download list of urls to savepath"""
    filepaths = []

    retry_strategy = Retry(
        total=3,
        backoff_factor=0.3,
        status_forcelist=[404, 429, 500, 502, 503, 504],
        allowed_methods=["HEAD", "GET", "OPTIONS"],
    )
    adapter = HTTPAdapter(max_retries=retry_strategy)
    http = requests.Session()
    http.mount("https://", adapter)
    http.mount("http://", adapter)

    for url in download_urls:
        filename = url.split("/")[-1]
        print(f"Downloading {url}", end=" | ")
        res = http.get(url)
        filepath = os.path.join(savepath, filename)
        with open(filepath, "wb") as f:
            f.write(res.content)
        print("File size:", res.headers["Content-Length"])
        filepaths.append(filepath)
    return filepaths


paths = download_predictions(download_urls, "../data/")

Downloading https://hpfx.collab.science.gc.ca/20221221/WXO-DD/model_hrdps/continental/grib2/12/000/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122112_P000-00.grib2 | File size: 2912404
Downloading https://hpfx.collab.science.gc.ca/20221221/WXO-DD/model_hrdps/continental/grib2/12/001/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122112_P001-00.grib2 | File size: 2917492
Downloading https://hpfx.collab.science.gc.ca/20221221/WXO-DD/model_hrdps/continental/grib2/12/002/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122112_P002-00.grib2 | File size: 2916855
Downloading https://hpfx.collab.science.gc.ca/20221221/WXO-DD/model_hrdps/continental/grib2/12/003/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122112_P003-00.grib2 | File size: 2915974
Downloading https://hpfx.collab.science.gc.ca/20221221/WXO-DD/model_hrdps/continental/grib2/12/004/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122112_P004-00.grib2 | File size: 2915240
Downloading https://hpfx.collab.science.gc.ca/20221221/WXO-DD/model_hr

In [None]:
def read_grib_predictions(filepaths:list[str]) -> list[npt.NDArray]:
    predictions:list[npt.NDArray] = []
    for path in filepaths:
        print(f"Reading {path}")
        gribs = pygrib.open(path)
        data = np.ma.filled(gribs[1].values, 0)
        predictions.append(data.reshape(-1))
    return predictions

data = read_grib_predictions(paths)

In [17]:
def read_grib_to_df(path: str) -> dict[str, pd.DataFrame | datetime | timedelta]:
    """Reads GRIB2 file and returns a dictionary with the data in a dataframe and details about the GRIB file.
    Multi-message GRIB2 files are not supported."""
    grib = pygrib.open(path)[1]
    data = np.around(
        np.stack(
            [
                *grib.latlons(),
                np.ma.filled(grib.values, 0),
            ],
            axis=-1,
        ).reshape((-1, 3)),
        decimals=6,
    )
    # lat, lon = grib.latlons()
    forecast_reference_time = datetime.strptime(
        f"{grib['dataDate']}{grib['dataTime']:04} +0000", "%Y%m%d%H%M %z"
    )
    forecast_validity_time = datetime.strptime(
        f"{grib['validityDate']}{grib['validityTime']:04} +0000", "%Y%m%d%H%M %z"
    )
    forecast_step = forecast_validity_time - forecast_reference_time
    return {
        "data": pd.DataFrame(data, columns=["latitude", "longitude", "value"]),
        "forecast_reference_time": forecast_reference_time,
        "forecast_validity_time": forecast_validity_time,
        "forecast_step": forecast_step,
        "short_name": grib["shortName"],
        "long_name": grib["name"],
        "units": grib["units"],
    }

In [130]:
# Define connection details
pg_connection_dict = {
    'dbname': "mydb",
    'user': "myn",
    'password': r"2)2K9zJCKZv7pLUd",
    'port': "5432",
    'host': "terraform-20221222010822007100000002.c2x7llrlmsr3.us-east-2.rds.amazonaws.com"
}

with psycopg.connect(**pg_connection_dict, autocommit=True) as conn:
    with conn.cursor() as curr:
        print(conn.info.encoding)
        print(curr.execute(r"SELECT now()").fetchall())

utf-8
[(datetime.datetime(2022, 12, 22, 1, 12, 48, 538132, tzinfo=datetime.timezone.utc),)]


In [66]:
def execute_sql_as_dataframe(
    conn_details: dict[str, str],
    sql_query: Query,
    params: Optional[Params] = None,
) -> pd.DataFrame:
    """Execute SQL query and return results in a DataFrame"""
    with psycopg.connect(**pg_connection_dict, autocommit=True) as conn:
        with conn.cursor() as curr:
            res = curr.execute(sql_query, params).fetchall()
            print(f"Rows impacted: {curr.rowcount}")

            if curr.description:
                columns = [col.name for col in curr.description]
                return pd.DataFrame(res, columns=columns)
            return pd.DataFrame()

In [50]:
def execute_sql_file(conn_details: dict[str, str], filepath: str) -> None:
    """Execute sql contained in a file using the conn_details"""
    with psycopg.connect(**conn_details, autocommit=True) as conn:
        with conn.cursor() as curr:
            with open(filepath, "rt") as f:
                contents: LiteralString = f.read()
            if contents:
                curr.execute(sql.SQL(contents))
                print(f"Rows impacted: {curr.rowcount}")

In [128]:
def execute_sql_statement(
    conn_details: dict[str, str],
    sql_statement: Query,
    params: Optional[Params] = None,
) -> None:
    """Execute sql provided using the conn_details"""
    with psycopg.connect(**conn_details, autocommit=True) as conn:
        with conn.cursor() as curr:
            curr.execute(sql_statement, params)
            print(f"Rows impacted: {curr.rowcount}")

In [63]:
def write_coordinates(conn_details: dict[str, str], coordinated_path: str):
    """Parse coordinates file from WSC and insert the data into the coordinates table"""
    df = pd.read_csv(
        coordinated_path, sep=" ", names=["i", "j", "latitude", "longitude"], skiprows=1
    )

    columns = ["latitude", "longitude"]
    write_query = sql.SQL("COPY {table} ({columns}) FROM STDIN").format(
        table=sql.Identifier("public", "coordinates"),
        columns=sql.SQL(", ").join(map(sql.Identifier, columns)),
    )

    with psycopg.connect(**conn_details, autocommit=True) as conn:
        with conn.cursor() as curr:
            with curr.copy(write_query) as copy:
                for record in df[["latitude", "longitude"]].values:
                    copy.write_row(record)
                print(f"Rows impacted: {curr.rowcount}")

In [115]:
def execute_copy_statement_using_df(
    conn_details: dict[str, str],
    sql_statement: Query,
    df: pd.DataFrame,
    params: Optional[Params] = None,
) -> None:
    """Execute sql copy statement against the conn_details using the DataFrame provided"""
    with psycopg.connect(**conn_details, autocommit=True) as conn:
        with conn.cursor() as curr:
            with curr.copy(sql_statement, params) as copy:
                for record in df.itertuples(index=False):
                    copy.write_row(record)
            print(f"Rows impacted: {curr.rowcount}")

In [148]:
def execute_many_statement(
    conn_details: dict[str, str],
    sql_statement: Query,
    params: Iterable[Params],
) -> None:
    """Execute sql statements against the conn_details using the DataFrame provided"""
    with psycopg.connect(**conn_details, autocommit=True) as conn:
        with conn.cursor() as curr:
            print(sql_statement.as_string(curr))
            curr.executemany(sql_statement, params)
            print(f"Rows impacted: {curr.rowcount}")

In [86]:
def get_forecast_id(
    conn_details: dict[str, str],
    model: str,
    forecast_reference_time: datetime,
    forecast_step: timedelta,
):
    """Select forecast_id where model, forecast_reference_time and forecast_step"""
    fields = ["forecast_id"]
    sql_statement = sql.SQL(
        """
        SELECT {fields} 
        FROM {table} 
        WHERE
            model = {model}
            and forecast_reference_time = {time}
            and forecast_step = {step};"""
    ).format(
        table=sql.Identifier("public", "forecasts"),
        fields=sql.SQL(", ").join(map(sql.Identifier, fields)),
        model=sql.Literal(model),
        time=sql.Literal(forecast_reference_time),
        step=sql.Literal(forecast_step),
    )

    df = execute_sql_as_dataframe(conn_details, sql_statement)

    return int(df.iloc[0, 0])

In [87]:
def get_variable_id(
    conn_details: dict[str, str],
    short_name: str
):
    """Select variable_id where short_name"""
    fields = ["variable_id"]
    sql_statement = sql.SQL(
        """
        SELECT {fields} 
        FROM {table} 
        WHERE short_name = {name};"""
    ).format(
        table=sql.Identifier("public", "variables"),
        fields=sql.SQL(", ").join(map(sql.Identifier, fields)),
        name=sql.Literal(short_name),
    )

    df = execute_sql_as_dataframe(conn_details, sql_statement)

    return int(df.iloc[0, 0])

In [77]:
def get_coordinates(
    conn_details: dict[str, str],
):
    """Select coordinates into a DataFrame"""
    fields = ["coord_id", "latitude", "longitude"]
    sql_statement = sql.SQL(
        """
        SELECT {fields} 
        FROM {table};"""
    ).format(
        table=sql.Identifier("public", "coordinates"),
        fields=sql.SQL(", ").join(map(sql.Identifier, fields)),
    )

    df = execute_sql_as_dataframe(conn_details, sql_statement)

    return df

In [None]:
def write_data_to_temp_table(grib_paths:list[str], model_name:str = "hrdps"):
    with psycopg.connect(**pg_connection_dict, autocommit=True) as conn:
        with conn.cursor() as curr:

            # get parameters from 1st grib file
            grib_data = read_grib_to_df(grib_paths[0])
            forecast = {
                "model": model_name,
                "forecast_reference_time": grib_data["forecast_reference_time"],
                "forecast_step": grib_data["forecast_step"],
            }
            variable = {
                "short_name": grib_data["short_name"]
            }

            # get forecast_id
            with conn.transaction():
                forecast_id = curr.execute(
                    """
                    SELECT forecast_id
                    FROM public.forecasts
                    WHERE 
                        model = %(model)s AND
                        forecast_reference_time = %(forecast_reference_time)s AND
                        forecast_step = %(step)s
                """,
                    forecast,
                ).fetchone()
            if not forecast_id:
                raise ValueError("No forecast_id obtained")

            # get variable_id
            with conn.transaction():
                variable_id = curr.execute(
                    """
                    SELECT variable_id
                    FROM public.variables
                    WHERE short_name = %s
                """,
                    [variable["short_name"]],
                ).fetchone()
            if not variable_id:
                raise ValueError("No variable_id obtained")

            # get coord_ids
            with conn.transaction():
                variable_id = curr.execute(
                    """
                    SELECT variable_id
                    FROM public.variables
                    WHERE short_name = %s
                """,
                    [variable["short_name"]],
                ).fetchone()
            if not variable_id:
                raise ValueError("No variable_id obtained")

            # Prepare data to upload

            curr.executemany(write_query, list(df.itertuples(index=False)))

In [131]:
# Create fresh tables
print("Creating fresh tables", end="\t")
execute_sql_file(pg_connection_dict, "../database.sql")

# Populate coordinates
print("Populating coordinates table", end="\t")
write_coordinates(pg_connection_dict, "../data/coordinates/hrdps_continential.txt")
df_coords = get_coordinates(pg_connection_dict)

model = "hrdps"

for idx, path in enumerate(paths):
    print(f"{idx:02}: {path}")

    # Get grib meta data from first GRIB file
    print("\tReading GRIB meta data")
    grib_info = read_grib_to_df(path)

    # Populate variables
    print("\tPopulating variables table", end="\t")
    fields = ["short_name", "long_name", "unit"]
    rfields = ["variable_id"]
    sql_statement = sql.SQL(
        """
            INSERT INTO {table}({fields}) 
            VALUES ({placeholders})
            ON CONFLICT DO NOTHING
        """
    ).format(
        table=sql.Identifier("public", "variables"),
        fields=sql.SQL(", ").join(map(sql.Identifier, fields)),
        placeholders=sql.SQL(", ").join(sql.Placeholder() * len(fields)),
    )
    execute_sql_statement(
        pg_connection_dict,
        sql_statement,
        (grib_info["short_name"], grib_info["long_name"], grib_info["units"]),
    )
    variable_id = get_variable_id(pg_connection_dict, grib_info["short_name"])

    # Populate forecasts
    print("\tPopulating forecasts table", end="\t")
    fields = ["model", "forecast_reference_time", "forecast_step"]
    sql_statement = sql.SQL(
        """
            INSERT INTO {table}({fields}) 
            VALUES ({placeholders})
            ON CONFLICT DO NOTHING
        """
    ).format(
        table=sql.Identifier("public", "forecasts"),
        fields=sql.SQL(", ").join(map(sql.Identifier, fields)),
        placeholders=sql.SQL(", ").join(sql.Placeholder() * len(fields)),
    )
    forecast_id = execute_sql_statement(
        pg_connection_dict,
        sql_statement,
        (model, grib_info["forecast_reference_time"], grib_info["forecast_step"]),
    )
    forecast_id = get_forecast_id(
        pg_connection_dict,
        model,
        grib_info["forecast_reference_time"],
        grib_info["forecast_step"],
    )

    # Populate predictions table
    print("\tPreparing predications data")
    df = grib_info["data"]
    df = pd.DataFrame.merge(
        df,
        right=df_coords,
        on=["latitude", "longitude"],
        how="inner",
        validate="1:1",
    )
    df["variable_id"] = variable_id
    df["forecast_id"] = forecast_id

    fields = ["forecast_id", "variable_id", "coord_id", "value"]
    write_query = sql.SQL("COPY {table} ({fields}) FROM STDIN").format(
        table=sql.Identifier("public", "predictions"),
        fields=sql.SQL(", ").join(map(sql.Identifier, fields)),
    )
    print("\tPopulating predictions table", end="\t")
    execute_copy_statement_using_df(pg_connection_dict, write_query, df.loc[:, fields])

Creating fresh tables	Rows impacted: -1
Populating coordinates table	Rows impacted: -1
Rows impacted: 3750656
00: ../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122112_P000-00.grib2
	Reading GRIB meta data
	Populating variables table	Rows impacted: 1
Rows impacted: 1
	Populating forecasts table	Rows impacted: 1
Rows impacted: 1
	Preparing predications data
	Populating predictions table	Rows impacted: 3750656
01: ../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122112_P001-00.grib2
	Reading GRIB meta data
	Populating variables table	Rows impacted: 0
Rows impacted: 1
	Populating forecasts table	Rows impacted: 1
Rows impacted: 1
	Preparing predications data
	Populating predictions table	Rows impacted: 3750656
02: ../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122112_P002-00.grib2
	Reading GRIB meta data
	Populating variables table	Rows impacted: 0
Rows impacted: 1
	Populating forecasts table	Rows impacted: 1
Rows impacted: 1
	Preparing predications data
	Populating predict

In [None]:
"""INSERT INTO predictions
            (forecast_id,
             variable_id,
             coord_id,
             value)
SELECT *
FROM   (WITH f
             AS (SELECT forecast_id
                 FROM   public.forecasts
                 WHERE  model = 'hrdps'
                        AND forecast_reference_time =
                            '2022-12-21 07:00:00.000 -0500'
                        AND forecast_step = '00:00:00'),
             v
             AS (SELECT variable_id
                 FROM   public.variables
                 WHERE  short_name = 'sde'),
             c
             AS (SELECT coord_id,
                        latitude,
                        longitude
                 FROM   coordinates)
        SELECT forecast_id,
               variable_id,
               coord_id,
               t.value
         FROM   "temp" t
                cross join f
                cross join v
                inner join c
                        ON t.latitude = c.latitude
                           AND t.longitude = c.longitude)"""

In [136]:
sql_statement = sql.SQL(
    """
        SELECT
            c.latitude,
            c.longitude,
            f.forecast_reference_time + f.forecast_step AS forecast_time,
            value AS snow_depth
        FROM predictions p
        inner join coordinates c on p.coord_id = c.coord_id and c.latitude = '35.758587' and c.longitude = '-127.552296'
        inner join forecasts f on p.forecast_id = f.forecast_id
        inner join variables v on p.variable_id = v.variable_id
        ORDER BY forecast_time ASC
    """)

df = execute_sql_as_dataframe(pg_connection_dict, sql_statement)

Rows impacted: 49


In [153]:
# Test writing to table using arrays
# Assume all dimension tables have already been populated

# Get coordinates
print("Getting coordinates", time.strftime("%H:%M:%S +0000", time.gmtime()))
df_coords = get_coordinates(pg_connection_dict)
model = "hrdps"

for idx, path in enumerate(paths):
    print(f"{idx:02}: {path}")

    # Get grib meta data from first GRIB file
    print("\tReading GRIB meta data", time.strftime("%H:%M:%S +0000", time.gmtime()))
    grib_info = read_grib_to_df(path)

    # Get variable
    print("\tGet forecast_id", time.strftime("%H:%M:%S +0000", time.gmtime()), end="\t")
    variable_id = get_variable_id(pg_connection_dict, grib_info["short_name"])

    # Get forecast
    print("\tGet variable_id", time.strftime("%H:%M:%S +0000", time.gmtime()), end="\t")
    forecast_id = get_forecast_id(
        pg_connection_dict,
        model,
        grib_info["forecast_reference_time"],
        grib_info["forecast_step"],
    )

    # Populate predictions table
    print("\tPreparing predications data", time.strftime("%H:%M:%S +0000", time.gmtime()))
    df = grib_info["data"]
    df = pd.DataFrame.merge(
        df,
        right=df_coords,
        on=["latitude", "longitude"],
        how="inner",
        validate="1:1",
    )
    df["variable_id"] = variable_id
    df["forecast_id"] = forecast_id
    params = [row._asdict() for row in df.loc[:, ["forecast_id", "variable_id", "coord_id", "value"]].itertuples(index=False)]

    fields = ["forecast_id", "variable_id", "coord_id", f"value {idx:02}"]
    write_query = sql.SQL(
        """
        INSERT INTO {table} ({fields}, {value_field})
        VALUES ({forecast_id}, {variable_id}, {coord_id}, {value})
        ON CONFLICT ({fields})
        DO UPDATE SET {value_field} = {value};
        """).format(
        table=sql.Identifier("public", "test"),
        fields=sql.SQL(", ").join(map(sql.Identifier, fields[:-1])),
        value_field=sql.Identifier(f"value {idx:02}"),
        forecast_id=sql.Placeholder("forecast_id"),
        variable_id=sql.Placeholder("variable_id"),
        coord_id=sql.Placeholder("coord_id"),
        value=sql.Placeholder("value")
    )
    print("\tPopulating predictions table", time.strftime("%H:%M:%S +0000", time.gmtime()), end="\t")
    execute_many_statement(pg_connection_dict, write_query, params)

Getting coordinates 14:50:50 +0000
Rows impacted: 3750656
00: ../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122112_P000-00.grib2
	Reading GRIB meta data 14:50:57 +0000
	Get forecast_id 14:50:59 +0000	Rows impacted: 1
	Get variable_id 14:50:59 +0000	Rows impacted: 1
	Preparing predications data 14:50:59 +0000
	Populating predictions table 14:51:18 +0000	
        INSERT INTO "public"."test" ("forecast_id", "variable_id", "coord_id", "value 00")
        VALUES (%(forecast_id)s, %(variable_id)s, %(coord_id)s, %(value)s)
        ON CONFLICT ("forecast_id", "variable_id", "coord_id")
        DO UPDATE SET "value 00" = %(value)s;
        
Rows impacted: 3750656
01: ../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122112_P001-00.grib2
	Reading GRIB meta data 14:54:47 +0000
	Get forecast_id 14:54:49 +0000	Rows impacted: 1
	Get variable_id 14:54:49 +0000	Rows impacted: 1
	Preparing predications data 14:54:49 +0000


KeyboardInterrupt: 