In [10]:
# Standard libraries
import os

# Third-party libraries
import pandas as pd
import numpy as np
import xarray as xr

# Local libraries
from utilities import netcdf


In [11]:
# Paths
DATA_DIR = os.path.join(os.getcwd(), "data/intermediate/hadukgrid")
OUT_DIR = os.path.join(os.getcwd(), "data/processed/time")


In [12]:
NC_FILES = netcdf.list_files(DATA_DIR, path=True)


In [13]:
startTimes = []
endTimes = []
for path in NC_FILES:
    with xr.open_dataarray(path, decode_coords="all", chunks="auto") as da:
        startTimes.append(da.coords["time"].min().values)
        endTimes.append(da.coords["time"].max().values)
firstTime = min(startTimes)
lastTime = max(endTimes)


In [14]:
def get_season_month(datetime):
    year = datetime.year
    month = datetime.month
    if month == 12:
        return year + 1
    return year


def get_time_id(datetime):
    return datetime.date().strftime("%Y%m")


def get_month(datetime):
    return datetime.month


df = pd.DataFrame(
    {
        "date": pd.date_range(
            start=firstTime,
            end=lastTime,
            freq=pd.DateOffset(months=1),
            inclusive="both",
        )
    }
)
df["month_number"] = df.apply(lambda row: get_month(row.date), axis=1)
df["year"] = df.apply(lambda row: row.date.year, axis=1)
df["season_year"] = df.apply(lambda row: get_season_month(row.date), axis=1)
df["id"] = df.apply(lambda row: get_time_id(row.date), axis=1)
df

0      1836-01-16
1      1836-02-16
2      1836-03-16
3      1836-04-16
4      1836-05-16
          ...    
2227   2021-08-16
2228   2021-09-16
2229   2021-10-16
2230   2021-11-16
2231   2021-12-16
Name: date, Length: 2232, dtype: datetime64[ns]

In [15]:
df.to_csv(os.path.join(OUT_DIR, "time.csv"), index=False)
