This script walks through a forecast for SMR using the NN-operational model developed in the NASA-NW repo using daily summaries of the 3h GEFS data. In the rollout, we use forecasted met data from NOAA GEFS 0.25 degree resolution and the forecasted water temperature, all other data are observed.

# Import Modules

In [93]:
#high level modules
import os
import imp
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [94]:
# custom modules
this_dir = "/Users/steeleb/Documents/GitHub/ats-data-driven-forecasting/NN-operational/arNN_summary/"

imp.load_source("universals", os.path.join(this_dir, "universal_functions.py"))
from universals import load_pickle_file, calculate_vals


# Import models

In [95]:
model_dir = "/Users/steeleb/Documents/GitHub/ats-data-driven-forecasting/data/NN_train_val_test/SMR_forecast/models/leaky_5_summary_t2023/"

model_1 = load_pickle_file("model_1.pkl", model_dir)
model_2 = load_pickle_file("model_2.pkl", model_dir)
model_3 = load_pickle_file("model_3.pkl", model_dir)
model_4 = load_pickle_file("model_4.pkl", model_dir)
model_5 = load_pickle_file("model_5.pkl", model_dir)
model_6 = load_pickle_file("model_6.pkl", model_dir)
model_7 = load_pickle_file("model_7.pkl", model_dir)
model_8 = load_pickle_file("model_8.pkl", model_dir)
model_9 = load_pickle_file("model_9.pkl", model_dir)


# Import data

In [None]:
data_dir = "/Users/steeleb/Documents/GitHub/ats-data-driven-forecasting/data/NN_train_val_test/SMR_forecast/met_summary/"

test = pd.read_csv(os.path.join(data_dir, "t2023_standardized_summary_v2024-11-29.csv"))
forecast = pd.read_csv(os.path.join(data_dir, "t2023_forecast_std_v2024-12-01.csv"))

test["date"] = pd.to_datetime(test["date"], utc=True).dt.strftime("%Y-%m-%d")
forecast["date"] = pd.to_datetime(forecast["date"], utc=True).dt.strftime("%Y-%m-%d")
forecast["valid_date"] = pd.to_datetime(forecast["valid_date"], utc=True).dt.strftime("%Y-%m-%d")

# we need the test columns to be the same as the forecast columns at the end of this, so grab the names for now
forecast_cols = test.columns

# and let's drop the observed temp data from the forecast columns, too
forecast_cols_less = forecast_cols.drop(["mean_1m_temp_degC", "mean_0_5m_temp_degC"])

0    2023-05-18
1    2023-05-19
2    2023-05-20
3    2023-05-21
4    2023-05-22
Name: date, dtype: object 0    2023-06-01
1    2023-06-01
2    2023-06-01
3    2023-06-01
4    2023-06-01
Name: date, dtype: object 0    2023-06-01
1    2023-06-02
2    2023-06-03
3    2023-06-04
4    2023-06-05
Name: valid_date, dtype: object


In [100]:
# make a variable of the met cols - they start with air_temp, sol_rad, rh, wind
met_cols = forecast.columns[forecast.columns.str.contains("air_temp|sol_rad|rh|wind")]
# transform form index object to list
met_cols = met_cols.tolist()

def make_met_cols(d):
    if d == 0:
        return met_cols
    return met_cols + [f"{col}_m{i}" for i in range(1, d + 1) for col in met_cols]

def pred_9_models(features):
    return [model_1.predict(features),
            model_2.predict(features),
            model_3.predict(features),
            model_4.predict(features),
            model_5.predict(features),
            model_6.predict(features),
            model_7.predict(features),
            model_8.predict(features),
            model_9.predict(features)]

def make_forecasted_temp(pred_1, pred_2, pred_3, pred_4, pred_5, pred_6, pred_7, pred_8, pred_9, obs_fore, forecast_date):
    for i, pred in enumerate([pred_1, pred_2, pred_3, pred_4, pred_5, pred_6, pred_7, pred_8, pred_9], start=1):
        forecasted_temp = pd.DataFrame(columns=['date', 'perturbation', 'model', 'mean_1m_temp_degC', 'mean_0_5m_temp_degC'])
        forecasted_temp["perturbation"] = obs_fore.index
        forecasted_temp['model'] = i
        forecasted_temp["mean_1m_temp_degC"] = [p[0] for p in pred]
        forecasted_temp["mean_0_5m_temp_degC"] = [p[1] for p in pred]
        forecasted_temp["date"] = forecast_date

        # Append to the main dataframe (or create it if it doesn't exist)
        if 'all_forecasts' in locals():
            all_forecasts = pd.concat([all_forecasts, forecasted_temp])
        else:
            all_forecasts = forecasted_temp.copy()
    return all_forecasts

def make_forecasted_temp_not0(forecast_features, forecasted_temp, forecast_date):
    # loop through the models
    for m in range(1, 10):
        # filter the forecast features for the model and drop the model column
        model_forecast_features = forecast_features[forecast_features.index.get_level_values(0) == m].copy()
        # make the forecast for each perturbation
        pred = eval(f"model_{m}").predict(model_forecast_features)
        forecasted_temp["perturbation"] = model_forecast_features.index.get_level_values(1)
        forecasted_temp['model'] = m
        forecasted_temp["mean_1m_temp_degC"] = [p[0] for p in pred]
        forecasted_temp["mean_0_5m_temp_degC"] = [p[1] for p in pred]
        forecasted_temp["date"] = forecast_date

        # Append to a dataframe (or create it if it doesn't exist)
        if 'forecasted_date' in locals():
            forecasted_date = pd.concat([forecasted_date, forecasted_temp])
        else:
            forecasted_date = forecasted_temp.copy()
    
    return forecasted_date

def prep_data_for_forecast_0(d, fore, obs, forecast_cols):
    d_met_cols = make_met_cols(d)
    obs = obs.drop(columns=d_met_cols)
    
    # grab the forecast data for the offset date
    fore_select = fore[fore["offset"] == d].copy()
    # join the observed and forecasted
    obs_fore = obs.join(fore_select.set_index("date"), on="date")
    obs_fore = obs_fore.drop(columns=["offset"])
    obs_fore = obs_fore.set_index("number")
    # now reorganize the columns to match the input columns for modelint
    obs_fore = obs_fore[forecast_cols]
    
    return obs_fore


# Create function to roll out forecast

In [114]:
def make_seven_day_forecast(date):
        
    print(f"Beginning forecast for: {date}")
    
    # get the forecast data from a specific date
    fore = forecast[forecast["date"] == date].copy()

    # earliest date is noon met for forecast. Calculate the difference in dates between the reported date and the forecast date and add as a column called "offset"
    fore["offset"] = (pd.to_datetime(fore["valid_date"]) - pd.to_datetime(fore["date"])).dt.days
    # remove the forecast date column
    fore = fore.drop(columns=["valid_date"])

    # we'll run a forecast for each day, since the following day's forecast will be based on the previous day's forecast
    for d in range(0, 7):
        # Setup for the iteration
        print("Forecasting day: ", d+1)
        
        # set the forecast date
        forecast_date = pd.to_datetime(date) + pd.DateOffset(days=d)
        forecast_date = forecast_date.strftime("%Y-%m-%d")
        
        obs = test[test["date"] == forecast_date].copy()
        
        if d == 0:
            # prep data for the forecast
            obs_fore = prep_data_for_forecast_0(d, fore, obs, forecast_cols)
            features = obs_fore[forecast_cols_less].drop(columns = "date")

            # make the forecast for each pertmodelurbation
            pred_1, pred_2, pred_3, pred_4, pred_5, pred_6, pred_7, pred_8, pred_9 = pred_9_models(features)

            # make the forecasted temp data frame across all predictions
            all_forecasts = make_forecasted_temp(pred_1, pred_2, pred_3, pred_4, pred_5, pred_6, pred_7, pred_8, pred_9, obs_fore, forecast_date)

            
        elif d == 1:
            # remove the met data and the observed temperature data from yesterday and today (we'll replaced these with forecasted data)
            met_cols_d = make_met_cols(d)
            # drop the column name if it's not in the dataframe (not all variables have the same lag)
            met_cols_d = [col for col in met_cols_d if col in obs.columns]
            obs = obs.drop(columns=met_cols_d)

            # grab the forecast data for the offset date the merge with the observed data
            fore_select = fore[fore["offset"] == d].copy()
            print(fore_select.head)
            fore_select = fore_select.rename(columns={"number": "perturbation"})
            fore_select = fore_select.drop(columns=["offset"])
            print(fore_select.head)

            # and the previous day
            m1_select = fore[fore["offset"] == d-1].copy()
            m1_select = m1_select.rename(columns={"number": "perturbation"})
            m1_select = m1_select.drop(columns=["offset"])
            m1_select["date"] = pd.to_datetime(m1_select["date"]) + pd.DateOffset(days=1)
            m1_select["date"] = m1_select["date"].dt.strftime("%Y-%m-%d")
            print(m1_select.head)
            
            # join obs (one row) with fore_select (many rows) on the date column
            obs_fore = obs.join(fore_select.set_index("date"), on="date")
            print(obs_fore.head)
            obs_fore = obs_fore.reset_index()
            obs_fore = obs_fore.set_index(["date", "perturbation"])
            obs_fore = obs_fore.join(m1_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m1")
            
            # join the observational forecast data with the forecasted data from the previous day by date and perturbation
            to_forecast = all_forecasts.copy()
            to_forecast = to_forecast[to_forecast["date"] == pd.to_datetime(forecast_date) - pd.DateOffset(days=1)]
            to_forecast["date"] = pd.to_datetime(to_forecast["date"]) + pd.DateOffset(days=1)
            to_forecast["date"] = to_forecast["date"].dt.strftime("%Y-%m-%d")
            to_forecast.columns = to_forecast.columns.str.replace("degC", "degC_m1")
            to_forecast = to_forecast.set_index(["date", "perturbation"])
            print(to_forecast.head, obs_fore.head)
            to_forecast = to_forecast.join(obs_fore, on=["date", "perturbation"])

            # now we need to reorganize the columns to match the input columns, plus the model and peturbation colums
            # first, move the date and perturbation from the index to columns
            to_forecast = to_forecast.reset_index()
            # now change model and perturbation to the index
            to_forecast = to_forecast.set_index(["model", "perturbation"])
            # and now reorganize the columns to match the input columns
            to_forecast = to_forecast[forecast_cols_less]

            # and now we need to preprocess the data into features
            forecast_features = to_forecast.drop(columns = "date")

            # for each model, make the forecast and store the results
            # make a dataframe to store the forecasted data
            forecasted_temp = pd.DataFrame(columns=['date', 'perturbation', 'model', 'mean_1m_temp_degC', 'mean_0_5m_temp_degC'])
            
            forecasted_date = make_forecasted_temp_not0(forecast_features, forecasted_temp, forecast_date)
            
            # Append to the main dataframe
            all_forecasts = pd.concat([all_forecasts, forecasted_date])
        
            # remove forecasted_date from memory
            del forecasted_date
            
        elif d == 2:
            # remove the met data and the observed temperature data to be replaced with forecasted data
            met_cols_d = make_met_cols(d)
            # drop the column name if it's not in the dataframe (not all variables have the same lag)
            met_cols_d = [col for col in met_cols_d if col in obs.columns]
            obs = obs.drop(columns=met_cols_d)
            
            # grab the forecast data for the offset date the merge with the observed data
            fore_select = fore[fore["offset"] == d].copy()
            fore_select = fore_select.rename(columns={"number": "perturbation"})
            fore_select = fore_select.drop(columns=["offset"]) 
            
            # and the previous day
            m1_select = fore[fore["offset"] == d-1].copy()
            m1_select = m1_select.rename(columns={"number": "perturbation"})
            m1_select = m1_select.drop(columns=["offset"])
            m1_select["date"] = pd.to_datetime(m1_select["date"]) + pd.DateOffset(days=1)

            # and two days prior
            m2_select = fore[fore["offset"] == d-2].copy()
            m2_select = m2_select.rename(columns={"number": "perturbation"})
            m2_select = m2_select.drop(columns=["offset"])
            m2_select["date"] = pd.to_datetime(m2_select["date"]) + pd.DateOffset(days=2)

            # join obs (one row) with fore_select (many rows) on the date column
            obs_fore = obs.join(fore_select.set_index(["date"]), on=["date"])
            obs_fore = obs_fore.reset_index()
            obs_fore = obs_fore.set_index(["date", "perturbation"])
            obs_fore = obs_fore.join(m1_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m1")
            obs_fore = obs_fore.join(m2_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m2")
            
            # add yesterday's temp forecast to the observational forecast data by date and perturbation
            m1_forecast = all_forecasts.copy()
            m1_forecast = m1_forecast[m1_forecast["date"] == pd.to_datetime(forecast_date) - pd.DateOffset(days=1)]
            m1_forecast["date"] = pd.to_datetime(m1_forecast["date"]) + pd.DateOffset(days=1)
            m1_forecast.columns = m1_forecast.columns.str.replace("degC", "degC_m1")
            m1_forecast = m1_forecast.set_index(["date", "perturbation"])
            m1_forecast = m1_forecast.join(obs_fore, on=["date", "perturbation"])
            m1_forecast = m1_forecast.reset_index()
            m1_forecast = m1_forecast.set_index(["date", "perturbation", "model"])

            # and two days prior
            to_forecast = all_forecasts.copy()
            to_forecast = to_forecast[to_forecast["date"] == forecast_date - pd.DateOffset(days=2)]
            to_forecast["date"] = pd.to_datetime(to_forecast["date"]) + pd.DateOffset(days=2)
            to_forecast.columns = to_forecast.columns.str.replace("degC", "degC_m2")
            to_forecast = to_forecast.set_index(["date", "perturbation", "model"])
            to_forecast = to_forecast.join(m1_forecast, on=["date", "perturbation", "model"])

            # now we need to reorganize the columns to match the input columns, plus the model and peturbation colums
            # first, move the date and perturbation from the index to columns
            to_forecast = to_forecast.reset_index()
            # now change model and perturbation to the index
            to_forecast = to_forecast.set_index(["model", "perturbation"])
            # and now reorganize the columns to match the input columns
            to_forecast = to_forecast[forecast_cols_less]

            # and now we need to preprocess the data into features
            forecast_features = to_forecast.drop(columns = "date")
            # for each model, make the forecast and store the results
            # make a dataframe to store the forecasted data
            forecasted_temp = pd.DataFrame(columns=['date', 'perturbation', 'model', 'mean_1m_temp_degC', 'mean_0_5m_temp_degC'])
            
            forecasted_date = make_forecasted_temp_not0(forecast_features, forecasted_temp, forecast_date)
                        
            # Append to the main dataframe
            all_forecasts = pd.concat([all_forecasts, forecasted_date])

            # remove forecasted_date from memory
            del forecasted_date

        elif d == 3:
            
            # remove the met data and the observed temperature data to be replaced with forecasted data
            met_cols_d = make_met_cols(d)
            # drop the column name if it's not in the dataframe (not all variables have the same lag)
            met_cols_d = [col for col in met_cols_d if col in obs.columns]
            obs = obs.drop(columns=met_cols_d)
            
            # grab the forecast data for the offset date the merge with the observed data
            fore_select = fore[fore["offset"] == d].copy()
            fore_select = fore_select.rename(columns={"number": "perturbation"})
            fore_select = fore_select.drop(columns=["offset"])
            
            # and the previous day
            m1_select = fore[fore["offset"] == d-1].copy()
            m1_select = m1_select.rename(columns={"number": "perturbation"})
            m1_select = m1_select.drop(columns=["offset"])
            m1_select["date"] = pd.to_datetime(m1_select["date"]) + pd.DateOffset(days=1)

            # and two days prior
            m2_select = fore[fore["offset"] == d-2].copy()
            m2_select = m2_select.rename(columns={"number": "perturbation"})
            m2_select = m2_select.drop(columns=["offset"])
            m2_select["date"] = pd.to_datetime(m2_select["date"]) + pd.DateOffset(days=2)

            # and three days prior
            m3_select = fore[fore["offset"] == d-3].copy()
            m3_select = m3_select.rename(columns={"number": "perturbation"})
            m3_select = m3_select.drop(columns=["offset"])
            m3_select["date"] = pd.to_datetime(m3_select["date"]) + pd.DateOffset(days=3)
            
            # join obs (one row) with fore_select (many rows) on the date column
            obs_fore = obs.join(fore_select.set_index("date"), on="date")
            obs_fore = obs_fore.reset_index()
            obs_fore = obs_fore.set_index(["date", "perturbation"])
            obs_fore = obs_fore.join(m1_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m1")
            obs_fore = obs_fore.join(m2_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m2")
            obs_fore = obs_fore.join(m3_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m3")
            
            # add yesterday's temp forecast to the observational forecast data by date and perturbation
            m1_forecast = all_forecasts.copy()
            m1_forecast = m1_forecast[m1_forecast["date"] == forecast_date - pd.DateOffset(days=1)]
            m1_forecast["date"] = pd.to_datetime(m1_forecast["date"]) + pd.DateOffset(days=1)
            m1_forecast.columns = m1_forecast.columns.str.replace("degC", "degC_m1")
            m1_forecast = m1_forecast.set_index(["date", "perturbation"])
            m1_forecast = m1_forecast.join(obs_fore, on=["date", "perturbation"])
            m1_forecast = m1_forecast.reset_index()
            m1_forecast = m1_forecast.set_index(["date", "perturbation", "model"])

            # and two days prior
            m2_forecast = all_forecasts.copy()
            m2_forecast = m2_forecast[m2_forecast["date"] == forecast_date - pd.DateOffset(days=2)]
            m2_forecast["date"] = pd.to_datetime(m2_forecast["date"]) + pd.DateOffset(days=2)
            m2_forecast.columns = m2_forecast.columns.str.replace("degC", "degC_m2")
            m2_forecast = m2_forecast.set_index(["date", "perturbation", "model"])
            m2_forecast = m2_forecast.join(m1_forecast, on=["date", "perturbation", "model"])

            # and three days prior
            to_forecast = all_forecasts.copy()
            to_forecast = to_forecast[to_forecast["date"] == forecast_date - pd.DateOffset(days=3)]
            to_forecast["date"] = pd.to_datetime(to_forecast["date"]) + pd.DateOffset(days=3)
            to_forecast.columns = to_forecast.columns.str.replace("degC", "degC_m3")
            to_forecast = to_forecast.set_index(["date", "perturbation", "model"])
            to_forecast = to_forecast.join(m2_forecast, on=["date", "perturbation", "model"])

            # now we need to reorganize the columns to match the input columns, plus the model and peturbation colums
            # first, move the date and perturbation from the index to columns
            to_forecast = to_forecast.reset_index()
            # now change model and perturbation to the index
            to_forecast = to_forecast.set_index(["model", "perturbation"])
            # and now reorganize the columns to match the input columns
            to_forecast = to_forecast[forecast_cols_less]

            # and now we need to preprocess the data into features
            forecast_features = to_forecast.drop(columns = "date")

            # for each model, make the forecast and store the results
            # make a dataframe to store the forecasted data
            forecasted_temp = pd.DataFrame(columns=['date', 'perturbation', 'model', 'mean_1m_temp_degC', 'mean_0_5m_temp_degC'])
            
            forecasted_date = make_forecasted_temp_not0(forecast_features, forecasted_temp, forecast_date)
                        
            # Append to the main dataframe
            all_forecasts = pd.concat([all_forecasts, forecasted_date])
            
            # remove forecasted_date from memory
            del forecasted_date

        elif d == 4:
        
            # remove the met data and the observed temperature data to be replaced with forecasted data
            met_cols_d = make_met_cols(d)
            # drop the column name if it's not in the dataframe (not all variables have the same lag)
            met_cols_d = [col for col in met_cols_d if col in obs.columns]
            obs = obs.drop(columns=met_cols_d)
            
            # grab the forecast data for the offset date the merge with the observed data
            fore_select = fore[fore["offset"] == d].copy()
            fore_select = fore_select.rename(columns={"number": "perturbation"})
            fore_select = fore_select.drop(columns=["offset"])
            
            # and the previous day
            m1_select = fore[fore["offset"] == d-1].copy()
            m1_select = m1_select.rename(columns={"number": "perturbation"})
            m1_select = m1_select.drop(columns=["offset"])
            m1_select["date"] = pd.to_datetime(m1_select["date"]) + pd.DateOffset(days=1)

            # and two days prior
            m2_select = fore[fore["offset"] == d-2].copy()
            m2_select = m2_select.rename(columns={"number": "perturbation"})
            m2_select = m2_select.drop(columns=["offset"])
            m2_select["date"] = pd.to_datetime(m2_select["date"]) + pd.DateOffset(days=2)

            # and three days prior
            m3_select = fore[fore["offset"] == d-3].copy()
            m3_select = m3_select.rename(columns={"number": "perturbation"})
            m3_select = m3_select.drop(columns=["offset"])
            m3_select["date"] = pd.to_datetime(m3_select["date"]) + pd.DateOffset(days=3)

            # and four days prior
            m4_select = fore[fore["offset"] == d-4].copy()
            m4_select = m4_select.rename(columns={"number": "perturbation"})
            m4_select = m4_select.drop(columns=["offset"])
            m4_select["date"] = pd.to_datetime(m4_select["date"]) + pd.DateOffset(days=4)
            
            # join obs (one row) with fore_select (many rows) on the date column
            obs_fore = obs.join(fore_select.set_index("date"), on="date")
            obs_fore = obs_fore.reset_index()
            obs_fore = obs_fore.set_index(["date", "perturbation"])
            obs_fore = obs_fore.join(m1_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m1")
            obs_fore = obs_fore.join(m2_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m2")
            obs_fore = obs_fore.join(m3_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m3")
            obs_fore = obs_fore.join(m4_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m4")
            
            # add yesterday's temp forecast to the observational forecast data by date and perturbation
            m1_forecast = all_forecasts.copy()
            m1_forecast = m1_forecast[m1_forecast["date"] == forecast_date - pd.DateOffset(days=1)]
            m1_forecast["date"] = pd.to_datetime(m1_forecast["date"]) + pd.DateOffset(days=1)
            m1_forecast.columns = m1_forecast.columns.str.replace("degC", "degC_m1")
            m1_forecast = m1_forecast.set_index(["date", "perturbation"])
            m1_forecast = m1_forecast.join(obs_fore, on=["date", "perturbation"])
            m1_forecast = m1_forecast.reset_index()
            m1_forecast = m1_forecast.set_index(["date", "perturbation", "model"])

            # and two days prior
            m2_forecast = all_forecasts.copy()
            m2_forecast = m2_forecast[m2_forecast["date"] == forecast_date - pd.DateOffset(days=2)]
            m2_forecast["date"] = pd.to_datetime(m2_forecast["date"]) + pd.DateOffset(days=2)
            m2_forecast.columns = m2_forecast.columns.str.replace("degC", "degC_m2")
            m2_forecast = m2_forecast.set_index(["date", "perturbation", "model"])
            m2_forecast = m2_forecast.join(m1_forecast, on=["date", "perturbation", "model"])

            # and three days prior
            to_forecast = all_forecasts.copy()
            to_forecast = to_forecast[to_forecast["date"] == forecast_date - pd.DateOffset(days=3)]
            to_forecast["date"] = pd.to_datetime(to_forecast["date"]) + pd.DateOffset(days=3)
            to_forecast.columns = to_forecast.columns.str.replace("degC", "degC_m3")
            to_forecast = to_forecast.set_index(["date", "perturbation", "model"])
            to_forecast = to_forecast.join(m2_forecast, on=["date", "perturbation", "model"])

            # now we need to reorganize the columns to match the input columns, plus the model and peturbation colums
            # first, move the date and perturbation from the index to columns
            to_forecast = to_forecast.reset_index()
            # now change model and perturbation to the index
            to_forecast = to_forecast.set_index(["model", "perturbation"])
            # and now reorganize the columns to match the input columns
            to_forecast = to_forecast[forecast_cols_less]

            # and now we need to preprocess the data into features
            forecast_features = to_forecast.drop(columns = "date")

            # for each model, make the forecast and store the results
            # make a dataframe to store the forecasted data
            forecasted_temp = pd.DataFrame(columns=['date', 'perturbation', 'model', 'mean_1m_temp_degC', 'mean_0_5m_temp_degC'])
            
            forecasted_date = make_forecasted_temp_not0(forecast_features, forecasted_temp, forecast_date)
                        
            # Append to the main dataframe
            all_forecasts = pd.concat([all_forecasts, forecasted_date])
            
            # remove forecasted_date from memory
            del forecasted_date

        elif d == 5:
            
            # remove the met data and the observed temperature data to be replaced with forecasted data
            met_cols_d = make_met_cols(d)
            # drop the column name if it's not in the dataframe (not all variables have the same lag)
            met_cols_d = [col for col in met_cols_d if col in obs.columns]
            obs = obs.drop(columns=met_cols_d)
            
            # grab the forecast data for the offset date the merge with the observed data
            fore_select = fore[fore["offset"] == d].copy()
            fore_select = fore_select.rename(columns={"number": "perturbation"})
            fore_select = fore_select.drop(columns=["offset"])
            
            # and the previous day
            m1_select = fore[fore["offset"] == d-1].copy()
            m1_select = m1_select.rename(columns={"number": "perturbation"})
            m1_select = m1_select.drop(columns=["offset"])
            m1_select["date"] = pd.to_datetime(m1_select["date"]) + pd.DateOffset(days=1)

            # and two days prior
            m2_select = fore[fore["offset"] == d-2].copy()
            m2_select = m2_select.rename(columns={"number": "perturbation"})
            m2_select = m2_select.drop(columns=["offset"])
            m2_select["date"] = pd.to_datetime(m2_select["date"]) + pd.DateOffset(days=2)

            # and three days prior
            m3_select = fore[fore["offset"] == d-3].copy()
            m3_select = m3_select.rename(columns={"number": "perturbation"})
            m3_select = m3_select.drop(columns=["offset"])
            m3_select["date"] = pd.to_datetime(m3_select["date"]) + pd.DateOffset(days=3)

            # and four days prior
            m4_select = fore[fore["offset"] == d-4].copy()
            m4_select = m4_select.rename(columns={"number": "perturbation"})
            m4_select = m4_select.drop(columns=["offset"])
            m4_select["date"] = pd.to_datetime(m4_select["date"]) + pd.DateOffset(days=4)

            # and five days prior
            m5_select = fore[fore["offset"] == d-5].copy()
            m5_select = m5_select.rename(columns={"number": "perturbation"})
            m5_select = m5_select.drop(columns=["offset"])
            m5_select["date"] = pd.to_datetime(m5_select["date"]) + pd.DateOffset(days=5)
            
            # join obs (one row) with fore_select (many rows) on the date column
            obs_fore = obs.join(fore_select.set_index("date"), on=["date"])
            obs_fore = obs_fore.reset_index()
            obs_fore = obs_fore.set_index(["date", "perturbation"])
            obs_fore = obs_fore.join(m1_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m1")
            obs_fore = obs_fore.join(m2_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m2")
            obs_fore = obs_fore.join(m3_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m3")
            obs_fore = obs_fore.join(m4_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m4")
            obs_fore = obs_fore.join(m5_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m5")
            
            # add yesterday's temp forecast to the observational forecast data by date and perturbation
            m1_forecast = all_forecasts.copy()
            m1_forecast = m1_forecast[m1_forecast["date"] == forecast_date - pd.DateOffset(days=1)]
            m1_forecast["date"] = pd.to_datetime(m1_forecast["date"]) + pd.DateOffset(days=1)
            m1_forecast.columns = m1_forecast.columns.str.replace("degC", "degC_m1")
            m1_forecast = m1_forecast.set_index(["date", "perturbation"])
            m1_forecast = m1_forecast.join(obs_fore, on=["date", "perturbation"])
            m1_forecast = m1_forecast.reset_index()
            m1_forecast = m1_forecast.set_index(["date", "perturbation", "model"])

            # and two days prior
            m2_forecast = all_forecasts.copy()
            m2_forecast = m2_forecast[m2_forecast["date"] == forecast_date - pd.DateOffset(days=2)]
            m2_forecast["date"] = pd.to_datetime(m2_forecast["date"]) + pd.DateOffset(days=2)
            m2_forecast.columns = m2_forecast.columns.str.replace("degC", "degC_m2")
            m2_forecast = m2_forecast.set_index(["date", "perturbation", "model"])
            m2_forecast = m2_forecast.join(m1_forecast, on=["date", "perturbation", "model"])

            # and three days prior
            to_forecast = all_forecasts.copy()
            to_forecast = to_forecast[to_forecast["date"] == forecast_date - pd.DateOffset(days=3)]
            to_forecast["date"] = pd.to_datetime(to_forecast["date"]) + pd.DateOffset(days=3)
            to_forecast.columns = to_forecast.columns.str.replace("degC", "degC_m3")
            to_forecast = to_forecast.set_index(["date", "perturbation", "model"])
            to_forecast = to_forecast.join(m2_forecast, on=["date", "perturbation", "model"])

            # now we need to reorganize the columns to match the input columns, plus the model and peturbation colums
            # first, move the date and perturbation from the index to columns
            to_forecast = to_forecast.reset_index()
            # now change model and perturbation to the index
            to_forecast = to_forecast.set_index(["model", "perturbation"])
            # and now reorganize the columns to match the input columns
            to_forecast = to_forecast[forecast_cols_less]

            # and now we need to preprocess the data into features
            forecast_features = to_forecast.drop(columns = "date")

            # for each model, make the forecast and store the results
            # make a dataframe to store the forecasted data
            forecasted_temp = pd.DataFrame(columns=['date', 'perturbation', 'model', 'mean_1m_temp_degC', 'mean_0_5m_temp_degC'])
            
            forecasted_date = make_forecasted_temp_not0(forecast_features, forecasted_temp, forecast_date)
                        
            # Append to the main dataframe
            all_forecasts = pd.concat([all_forecasts, forecasted_date])
            
            # remove forecasted_date from memory
            del forecasted_date

        elif d == 6:

            # remove the met data and the observed temperature data to be replaced with forecasted data
            met_cols_d = make_met_cols(d)
            # drop the column name if it's not in the dataframe (not all variables have the same lag)
            met_cols_d = [col for col in met_cols_d if col in obs.columns]
            obs = obs.drop(columns=met_cols_d)
            
            # grab the forecast data for the offset date the merge with the observed data
            fore_select = fore[fore["offset"] == d].copy()
            fore_select = fore_select.rename(columns={"number": "perturbation"})
            fore_select = fore_select.drop(columns=["offset"])
            
            # and the previous day
            m1_select = fore[fore["offset"] == d-1].copy()
            m1_select = m1_select.rename(columns={"number": "perturbation"})
            m1_select = m1_select.drop(columns=["offset"])
            m1_select["date"] = pd.to_datetime(m1_select["date"]) + pd.DateOffset(days=1)

            # and two days prior
            m2_select = fore[fore["offset"] == d-2].copy()
            m2_select = m2_select.rename(columns={"number": "perturbation"})
            m2_select = m2_select.drop(columns=["offset"])
            m2_select["date"] = pd.to_datetime(m2_select["date"]) + pd.DateOffset(days=2)

            # and three days prior
            m3_select = fore[fore["offset"] == d-3].copy()
            m3_select = m3_select.rename(columns={"number": "perturbation"})
            m3_select = m3_select.drop(columns=["offset"])
            m3_select["date"] = pd.to_datetime(m3_select["date"]) + pd.DateOffset(days=3)

            # and four days prior
            m4_select = fore[fore["offset"] == d-4].copy()
            m4_select = m4_select.rename(columns={"number": "perturbation"})
            m4_select = m4_select.drop(columns=["offset"])
            m4_select["date"] = pd.to_datetime(m4_select["date"]) + pd.DateOffset(days=4)

            # and five days prior
            m5_select = fore[fore["offset"] == d-5].copy()
            m5_select = m5_select.rename(columns={"number": "perturbation"})
            m5_select = m5_select.drop(columns=["offset"])
            m5_select["date"] = pd.to_datetime(m5_select["date"]) + pd.DateOffset(days=5)

            # and six days prior
            m6_select = fore[fore["offset"] == d-6].copy()
            m6_select = m6_select.rename(columns={"number": "perturbation"})
            m6_select = m6_select.drop(columns=["offset"])
            m6_select["date"] = pd.to_datetime(m6_select["date"]) + pd.DateOffset(days=6)
            
            # join obs (one row) with fore_select (many rows) on the date column
            obs_fore = obs.join(fore_select.set_index(["date"]), on=["date"])
            obs_fore = obs_fore.reset_index()
            obs_fore = obs_fore.set_index(["date", "perturbation"])
            obs_fore = obs_fore.join(m1_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m1")
            obs_fore = obs_fore.join(m2_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m2")
            obs_fore = obs_fore.join(m3_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m3")
            obs_fore = obs_fore.join(m4_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m4")
            obs_fore = obs_fore.join(m5_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m5")
            obs_fore = obs_fore.join(m6_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m6")
            
            # add yesterday's temp forecast to the observational forecast data by date and perturbation
            m1_forecast = all_forecasts.copy()
            m1_forecast = m1_forecast[m1_forecast["date"] == forecast_date - pd.DateOffset(days=1)]
            m1_forecast["date"] = pd.to_datetime(m1_forecast["date"]) + pd.DateOffset(days=1)
            m1_forecast.columns = m1_forecast.columns.str.replace("degC", "degC_m1")
            m1_forecast = m1_forecast.set_index(["date", "perturbation"])
            m1_forecast = m1_forecast.join(obs_fore, on=["date", "perturbation"])
            m1_forecast = m1_forecast.reset_index()
            m1_forecast = m1_forecast.set_index(["date", "perturbation", "model"])

            # and two days prior
            m2_forecast = all_forecasts.copy()
            m2_forecast = m2_forecast[m2_forecast["date"] == forecast_date - pd.DateOffset(days=2)]
            m2_forecast["date"] = pd.to_datetime(m2_forecast["date"]) + pd.DateOffset(days=2)
            m2_forecast.columns = m2_forecast.columns.str.replace("degC", "degC_m2")
            m2_forecast = m2_forecast.set_index(["date", "perturbation", "model"])
            m2_forecast = m2_forecast.join(m1_forecast, on=["date", "perturbation", "model"])

            # and three days prior
            to_forecast = all_forecasts.copy()
            to_forecast = to_forecast[to_forecast["date"] == forecast_date - pd.DateOffset(days=3)]
            to_forecast["date"] = pd.to_datetime(to_forecast["date"]) + pd.DateOffset(days=3)
            to_forecast.columns = to_forecast.columns.str.replace("degC", "degC_m3")
            to_forecast = to_forecast.set_index(["date", "perturbation", "model"])
            to_forecast = to_forecast.join(m2_forecast, on=["date", "perturbation", "model"])

            # now we need to reorganize the columns to match the input columns, plus the model and peturbation colums
            # first, move the date and perturbation from the index to columns
            to_forecast = to_forecast.reset_index()
            # now change model and perturbation to the index
            to_forecast = to_forecast.set_index(["model", "perturbation"])
            # and now reorganize the columns to match the input columns
            to_forecast = to_forecast[forecast_cols_less]
            # and now we need to preprocess the data into features
            forecast_features = to_forecast.drop(columns = "date")
            
            # for each model, make the forecast and store the results
            # make a dataframe to store the forecasted data
            forecasted_temp = pd.DataFrame(columns=['date', 'perturbation', 'model', 'mean_1m_temp_degC', 'mean_0_5m_temp_degC'])
            
            forecasted_date = make_forecasted_temp_not0(forecast_features, forecasted_temp, forecast_date)
                        
            # Append to the main dataframe
            all_forecasts = pd.concat([all_forecasts, forecasted_date])

            # remove forecasted_date from memory
            del forecasted_date
        
    return all_forecasts

In [115]:
make_seven_day_forecast("2023-07-15")

Beginning forecast for: 2023-07-15
Forecasting day:  1
Forecasting day:  2
<bound method NDFrame.head of             date  number  air_temp_min_3h  air_temp_mean_3h  air_temp_max_3h  \
9549  2023-07-15       0         0.397907          0.649096         0.808427   
9556  2023-07-15       1         0.490930          1.031661         1.336444   
9563  2023-07-15       2         0.524307          0.644409         0.563961   
9570  2023-07-15       3         0.266432          0.476470         0.619324   
9577  2023-07-15       4         0.525775          0.704011         0.631026   
9584  2023-07-15       5         0.280733          0.599499         0.937195   
9591  2023-07-15       6         0.491944          0.771642         1.101193   
9598  2023-07-15       7         0.495143          0.851070         1.009862   
9605  2023-07-15       8         0.652141          0.993864         1.018543   
9612  2023-07-15       9         0.449091          0.630982         0.808427   
9619  2023-07-1

IndexError: cannot do a non-empty take from an empty axes.

# Make forecasts

In [29]:
datesequence = pd.date_range(start="2022-06-01", end="2022-09-20", freq="1D")
mean_std = pd.read_csv(os.path.join(data_dir, "mean_std_train_val_t2022_v2024-10-28.csv"))
mean_std = mean_std.set_index("Unnamed: 0")

for d in datesequence:
    date = d.strftime("%Y-%m-%d")
    # make the forecast
    forecast_data_to_save = make_seven_day_forecast(date)
    # calculate the values back from the transformed values
    forecast_data_to_save["mean_1m_temp_degC"] = calculate_vals(forecast_data_to_save["mean_1m_temp_degC"], mean_std.loc["mean_1m_temp_degC", "mean"], mean_std.loc["mean_1m_temp_degC", "std"])
    forecast_data_to_save["mean_0_5m_temp_degC"] = calculate_vals(forecast_data_to_save["mean_0_5m_temp_degC"], mean_std.loc["mean_0_5m_temp_degC", "mean"], mean_std.loc["mean_0_5m_temp_degC", "std"])
    # save the forecast
    forecast_data_to_save.to_csv(f"~/Documents/GitHub/ats-data-driven-forecasting/run-operational/output/forecasted_met/{date}_seven_day_forecasted_met.csv", index=False)

Beginning forecast for: 2022-06-01
Forecasting day:  1
Forecasting day:  2
Forecasting day:  3
Forecasting day:  4
Forecasting day:  5
Forecasting day:  6
Forecasting day:  7
Beginning forecast for: 2022-06-02
Forecasting day:  1
Forecasting day:  2
Forecasting day:  3
Forecasting day:  4
Forecasting day:  5
Forecasting day:  6
Forecasting day:  7
Beginning forecast for: 2022-06-03
Forecasting day:  1
Forecasting day:  2
Forecasting day:  3
Forecasting day:  4
Forecasting day:  5
Forecasting day:  6
Forecasting day:  7
Beginning forecast for: 2022-06-04
Forecasting day:  1
Forecasting day:  2
Forecasting day:  3
Forecasting day:  4
Forecasting day:  5
Forecasting day:  6
Forecasting day:  7
Beginning forecast for: 2022-06-05
Forecasting day:  1
Forecasting day:  2
Forecasting day:  3
Forecasting day:  4
Forecasting day:  5
Forecasting day:  6
Forecasting day:  7
Beginning forecast for: 2022-06-06
Forecasting day:  1
Forecasting day:  2
Forecasting day:  3
Forecasting day:  4
Forecastin