This script walks through a forecast for SMR using the NN-operational model developed in the NASA-NW repo. 

# Import Modules

In [1]:
#high level modules
import os
import imp
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

  import imp


In [2]:
# custom modules
this_dir = "/Users/steeleb/Documents/GitHub/ats-data-driven-forecasting/NN-operational/arNN/"

imp.load_source("universals", os.path.join(this_dir, "universal_functions.py"))
from universals import load_pickle_file, calculate_vals


# Import models

In [3]:
model_dir = "/Users/steeleb/Documents/GitHub/ats-data-driven-forecasting/data/NN_train_val_test/SMR_forecast/models/leaky_basic_5/"

model_1 = load_pickle_file("model_1.pkl", model_dir)
model_2 = load_pickle_file("model_2.pkl", model_dir)
model_3 = load_pickle_file("model_3.pkl", model_dir)
model_4 = load_pickle_file("model_4.pkl", model_dir)
model_5 = load_pickle_file("model_5.pkl", model_dir)
model_6 = load_pickle_file("model_6.pkl", model_dir)
model_7 = load_pickle_file("model_7.pkl", model_dir)
model_8 = load_pickle_file("model_8.pkl", model_dir)


# Import data

In [5]:
data_dir = "/Users/steeleb/Documents/GitHub/ats-data-driven-forecasting/data/NN_train_val_test/SMR_forecast/"

test = pd.read_csv(os.path.join(data_dir, "t2022_standardized_v2024-11-25.csv"))
forecast = pd.read_csv(os.path.join(data_dir, "t2022_forecast_std_v2024-10-28.csv"))

test["date"] = pd.to_datetime(test["date"])
forecast["date"] = pd.to_datetime(forecast["date"])
forecast["forecast_date"] = pd.to_datetime(forecast["forecast_date"])

# we need the test columns to be the same as the forecast columns at the end of this, so grab the names for now
forecast_cols = test.columns

# and let's drop the observed temp data from the forecast columns, too
forecast_cols_less = forecast_cols.drop(["mean_1m_temp_degC", "mean_0_5m_temp_degC", "date"])

# Create function to roll out forecast

This implementation just checks to see if, with observed data, we have any accuracy on a 7-day rollout.

In [9]:
def make_forecast(features, models, valid_date):
    predictions = pd.DataFrame(columns=['mean_1m_temp_degC', 'mean_0_5m_temp_degC', 'model', 'valid_date'])
    for i, model in enumerate(models, start=1):
        preds = {}
        preds = model.predict(features)
        temp_df = pd.DataFrame(preds, columns=['mean_1m_temp_degC', 'mean_0_5m_temp_degC'])
        for col in temp_df.columns:
            temp_df[col] = np.sqrt(temp_df[col])
        temp_df['model'] = i
        temp_df['valid_date'] = valid_date
        predictions = pd.concat([predictions, temp_df])
    return predictions

def make_forecast_by_model(features, models, valid_date):
    predictions = pd.DataFrame(columns=['mean_1m_temp_degC', 'mean_0_5m_temp_degC', 'model', 'valid_date'])
    for i, model in enumerate(models, start=1):
        preds = {}
        model_features = features.iloc[[i-1]]
        preds = model.predict(model_features)
        temp_df = pd.DataFrame(preds, columns=['mean_1m_temp_degC', 'mean_0_5m_temp_degC'])
        temp_df['model'] = i
        temp_df['valid_date'] = valid_date
        predictions = pd.concat([predictions, temp_df])
    return predictions

In [10]:

def make_seven_day_forecast_control(date):
    
    print(f"Beginning forecast for {date}")
    date = pd.to_datetime(date)
    
    # create a dataframe to store the forecasted data
    all_forecasts = pd.DataFrame(columns=['valid_date', 'model', 'mean_1m_temp_degC', 'mean_0_5m_temp_degC'])

    # we'll run a forecast for each day, since the following day's forecast will be based on the previous day's forecast
    for d in range(0, 7):
        
        # Setup for the iteration
        print("Forecasting day: ", d+1)
        
        # set the forecast date
        valid_date = pd.to_datetime(date) + pd.DateOffset(days=d)
        obs = test[test["date"] == valid_date].copy()
        
        # the first day will be a bit different from subsequent days
        if d == 0:

            # preprocess the data into labels and features
            features = obs.drop(columns = ["date", "mean_1m_temp_degC", "mean_0_5m_temp_degC"])
            
            forecasted_temp = make_forecast(features, [model_1, model_2, model_3, model_4, model_5, model_6, model_7, model_8], valid_date)
            
        elif d == 1:
            # remove the labels and observed temperature data from yesterday and today (we'll replaced these with forecasted data)
            obs_less = obs.drop(columns=["mean_1m_temp_degC", "mean_0_5m_temp_degC", 
                                         "mean_1m_temp_degC_m1", "mean_0_5m_temp_degC_m1"])
            # this should be one row of data
            
            # join the observed data with the forecasted data from the previous day by date
            yesterday = all_forecasts.copy()
            # get yesterday's forecasted data (valid date minus 1 day)
            yesterday = yesterday[yesterday["valid_date"] == valid_date - pd.DateOffset(days=1)]
            # relabel yesterday's forecasted data as today's m1 data
            yesterday = yesterday.rename(columns={"valid_date": "date",
                                                  "mean_1m_temp_degC": "mean_1m_temp_degC_m1", 
                                                  "mean_0_5m_temp_degC": "mean_0_5m_temp_degC_m1"})
            # now, set the date to today to join
            yesterday["date"] = pd.to_datetime(yesterday["date"]) + pd.DateOffset(days=1)
            yesterday = yesterday.set_index(["date"])
            
            # join the observed data with the forecasted data from the previous day by date
            to_forecast = obs_less.join(yesterday, on=["date"])
            
            # now we need to reorganize the columns to match the input columns, plus the model and peturbation colums
            to_forecast = to_forecast.reset_index()
            # now change model and date to the index
            to_forecast = to_forecast.set_index(["date", "model"])
            # and now reorganize the columns to match the input columns
            to_forecast = to_forecast[forecast_cols_less]

            # and now forecast on that data
            forecasted_temp = make_forecast_by_model(to_forecast, [model_1, model_2, model_3, model_4, model_5, model_6, model_7, model_8], valid_date)

        elif d == 2:
            # remove the labels and observed temperature through m2 (we'll replaced these with forecasted data)
            obs_less = obs.drop(columns=["mean_1m_temp_degC", "mean_0_5m_temp_degC", 
                                         "mean_1m_temp_degC_m1", "mean_0_5m_temp_degC_m1",
                                         "mean_1m_temp_degC_m2", "mean_0_5m_temp_degC_m2"])
            # this should be one row of data
            
            # join the observed data with the forecasted data from the previous day by date
            yesterday = all_forecasts.copy()
            # get yesterday's forecasted data (valid date minus 1 day)
            yesterday = yesterday[yesterday["valid_date"] == valid_date - pd.DateOffset(days=1)]
            # relabel yesterday's forecasted data as today's m1 data
            yesterday = yesterday.rename(columns={"valid_date": "date",
                                                  "mean_1m_temp_degC": "mean_1m_temp_degC_m1", 
                                                  "mean_0_5m_temp_degC": "mean_0_5m_temp_degC_m1"})
            # now, set the date to today to join
            yesterday["date"] = pd.to_datetime(yesterday["date"]) + pd.DateOffset(days=1)
            yesterday = yesterday.set_index(["date"])

            # and from the day before
            m2 = all_forecasts.copy()
            m2 = m2[m2["valid_date"] == valid_date - pd.DateOffset(days=2)]
            m2 = m2.rename(columns={"valid_date": "date",
                                    "mean_1m_temp_degC": "mean_1m_temp_degC_m2", 
                                    "mean_0_5m_temp_degC": "mean_0_5m_temp_degC_m2"})
            m2["date"] = pd.to_datetime(m2["date"]) + pd.DateOffset(days=2)
            m2 = m2.set_index(["date", "model"])
                        
            # join the observed data with the forecasted data from the previous day by date
            to_forecast = obs_less.join(yesterday, on=["date"])
            to_forecast = to_forecast.join(m2, on=["date", "model"])
            
            # now we need to reorganize the columns to match the input columns, plus the model and peturbation colums
            to_forecast = to_forecast.reset_index()
            # now change model and date to the index
            to_forecast = to_forecast.set_index(["date", "model"])
            # and now reorganize the columns to match the input columns
            to_forecast = to_forecast[forecast_cols_less]

            # and now forecast on that data
            forecasted_temp = make_forecast_by_model(to_forecast, [model_1, model_2, model_3, model_4, model_5, model_6, model_7, model_8], valid_date)
        
        elif d >= 3:
            # remove the labels and observed temperature through m3 (we'll replaced these with forecasted data)
            obs_less = obs.drop(columns=["mean_1m_temp_degC", "mean_0_5m_temp_degC", 
                                         "mean_1m_temp_degC_m1", "mean_0_5m_temp_degC_m1",
                                         "mean_1m_temp_degC_m2", "mean_0_5m_temp_degC_m2",
                                         "mean_1m_temp_degC_m3", "mean_0_5m_temp_degC_m3"])
            # this should be one row of data
            
            # join the observed data with the forecasted data from the previous day by date
            yesterday = all_forecasts.copy()
            # get yesterday's forecasted data (valid date minus 1 day)
            yesterday = yesterday[yesterday["valid_date"] == valid_date - pd.DateOffset(days=1)]
            # relabel yesterday's forecasted data as today's m1 data
            yesterday = yesterday.rename(columns={"valid_date": "date",
                                                  "mean_1m_temp_degC": "mean_1m_temp_degC_m1", 
                                                  "mean_0_5m_temp_degC": "mean_0_5m_temp_degC_m1"})
            # now, set the date to today to join
            yesterday["date"] = pd.to_datetime(yesterday["date"]) + pd.DateOffset(days=1)
            yesterday = yesterday.set_index(["date"])

            # and from the day before
            m2 = all_forecasts.copy()
            m2 = m2[m2["valid_date"] == valid_date - pd.DateOffset(days=2)]
            m2 = m2.rename(columns={"valid_date": "date",
                                    "mean_1m_temp_degC": "mean_1m_temp_degC_m2", 
                                    "mean_0_5m_temp_degC": "mean_0_5m_temp_degC_m2"})
            m2["date"] = pd.to_datetime(m2["date"]) + pd.DateOffset(days=2)
            m2 = m2.set_index(["date", "model"])

            # and from the day before
            m3 = all_forecasts.copy()
            m3 = m3[m3["valid_date"] == valid_date - pd.DateOffset(days=3)]
            m3 = m3.rename(columns={"valid_date": "date",
                                    "mean_1m_temp_degC": "mean_1m_temp_degC_m3", 
                                    "mean_0_5m_temp_degC": "mean_0_5m_temp_degC_m3"})
            m3["date"] = pd.to_datetime(m3["date"]) + pd.DateOffset(days=3)
            m3 = m3.set_index(["date", "model"])
                                    
            # join the observed data with the forecasted data from the previous day by date
            to_forecast = obs_less.join(yesterday, on=["date"])
            to_forecast = to_forecast.join(m2, on=["date", "model"])
            to_forecast = to_forecast.join(m3, on=["date", "model"])
            
            # now we need to reorganize the columns to match the input columns, plus the model and peturbation colums
            to_forecast = to_forecast.reset_index()
            # now change model and date to the index
            to_forecast = to_forecast.set_index(["date", "model"])
            # and now reorganize the columns to match the input columns
            to_forecast = to_forecast[forecast_cols_less]

            # and now forecast on that data
            forecasted_temp = make_forecast_by_model(to_forecast, [model_1, model_2, model_3, model_4, model_5, model_6, model_7, model_8], valid_date)

        # append the forecasted data to the all_forecasts dataframe
        all_forecasts = pd.concat([all_forecasts, forecasted_temp])
        
    return all_forecasts


# Make forecasts

In [11]:
datesequence = pd.date_range(start="2022-06-01", end="2022-10-01", freq="1D")
mean_std = pd.read_csv(os.path.join(data_dir, "mean_std_train_val_t2022_v2024-11-25.csv"))
mean_std = mean_std.set_index("Unnamed: 0")

for d in datesequence:
    date = d.strftime("%Y-%m-%d")
    # make the forecast
    forecast = make_seven_day_forecast_control(d)
    # calculate the values back from the transformed values
    forecast["mean_1m_temp_degC"] = calculate_vals(forecast["mean_1m_temp_degC"], mean_std.loc["mean_1m_temp_degC", "mean"], mean_std.loc["mean_1m_temp_degC", "std"])
    forecast["mean_0_5m_temp_degC"] = calculate_vals(forecast["mean_0_5m_temp_degC"], mean_std.loc["mean_0_5m_temp_degC", "mean"], mean_std.loc["mean_0_5m_temp_degC", "std"])
    # save the forecast
    forecast.to_csv(f"~/Documents/GitHub/ats-data-driven-forecasting/run-operational/output/control/{date}_seven_day_control.csv", index=False)
    

Beginning forecast for 2022-06-01 00:00:00
Forecasting day:  1


ValueError: Unexpected result of `predict_function` (Empty batch_outputs). Please use `Model.compile(..., run_eagerly=True)`, or `tf.config.run_functions_eagerly(True)` for more information of where went wrong, or file a issue/bug to `tf.keras`.