This script walks through a forecast for SMR using the NN-operational model developed in the NASA-NW repo. 

# Import Modules

In [32]:
#high level modules
import os
import imp
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [33]:
# custom modules
this_dir = "/Users/steeleb/Documents/GitHub/ats-data-driven-forecasting/NN-operational/arNN/"

imp.load_source("universals", os.path.join(this_dir, "universal_functions.py"))
from universals import load_pickle_file, twotemp_labels_features_test, predict_2_values_test


# Import models

In [34]:
model_dir = "/Users/steeleb/Documents/GitHub/ats-data-driven-forecasting/data/NN_train_val_test/SMR_forecast/models/leaky_basic_5/"

model_1 = load_pickle_file("model_1.pkl", model_dir)
model_2 = load_pickle_file("model_2.pkl", model_dir)
model_3 = load_pickle_file("model_3.pkl", model_dir)
model_4 = load_pickle_file("model_4.pkl", model_dir)
model_5 = load_pickle_file("model_5.pkl", model_dir)
model_6 = load_pickle_file("model_6.pkl", model_dir)
model_7 = load_pickle_file("model_7.pkl", model_dir)
model_8 = load_pickle_file("model_8.pkl", model_dir)


# Import data

In [50]:
data_dir = "/Users/steeleb/Documents/GitHub/ats-data-driven-forecasting/data/NN_train_val_test/SMR_forecast/"

test = pd.read_csv(os.path.join(data_dir, "t2022_standardized_v2024-10-28.csv"))
forecast = pd.read_csv(os.path.join(data_dir, "t2022_forecast_std_v2024-10-28.csv"))

test["date"] = pd.to_datetime(test["date"])
forecast["date"] = pd.to_datetime(forecast["date"])
forecast["forecast_date"] = pd.to_datetime(forecast["forecast_date"])

# we need the test columns to be the same as the forecast columns at the end of this, so grab the names for now
forecast_cols = test.columns

# Create function to roll out forecast

In [65]:
def make_forecast(date, n_days):
    
    print(date)
    date = pd.to_datetime(date)
    
    # get the forecast data from a specific date
    fore = forecast[forecast["forecast_date"] == date].copy()
    # earliest date is noon met for forecast. Calculatet the differencein dates and add as a column called "offset"
    fore["offset"] = (fore["date"] - fore["forecast_date"]).dt.days
    # remove the date column and rename forecast_date to date
    fore = fore.drop(columns=["forecast_date"])
    
    # we'll run a forecast for each day, since the following day's forecast will be based on the previous day's forecast
    for d in range(n_days):
        # Setup for the iteration
        print("Forecasting day: ", d+1)
        # set the forecast date
        forecast_date = pd.to_datetime(date) + pd.DateOffset(days=d)
        obs = test[test["date"] == forecast_date].copy()

        # the first day will be a bit different from subsequent days
        if d == 0:
            # remove the noon met data
            obs = obs.drop(columns=["noon_air_temp", "noon_ave_wind", "noon_solar_rad"])
            # grab the forecast data for the offset date
            fore_select = fore[fore["offset"] == d].copy()
            # join fore_select with obs, drop offset columns, and use the number column as index
            obs_fore = obs.join(fore_select.set_index("date"), on="date")
            obs_fore = obs_fore.drop(columns=["offset"])
            obs_fore = obs_fore.set_index("number")
            # now reorganize the columns to match the input columns
            obs_fore = obs_fore[forecast_cols]
            
            # preprocess the data into labels and features
            features, labels = twotemp_labels_features_test(obs_fore)
            
            # make the forecast for each perturbation
            pred_1 = model_1.predict(features)
            pred_2 = model_2.predict(features)
            pred_3 = model_3.predict(features)
            pred_4 = model_4.predict(features)
            pred_5 = model_5.predict(features)
            pred_6 = model_6.predict(features)
            pred_7 = model_7.predict(features)
            pred_8 = model_8.predict(features)

            # and now we need to create the dataframe for the next iteration
            # first, create a dataframe with the forecast data
            for i, pred in enumerate([pred_1, pred_2, pred_3, pred_4, pred_5, pred_6, pred_7, pred_8], start=1):
                forecasted_temp = pd.DataFrame(columns=['date', 'perturbation', 'model', 'mean_1m_temp_degC', 'mean_0_5m_temp_degC'])
                forecasted_temp["perturbation"] = obs_fore.index
                forecasted_temp['model'] = i
                forecasted_temp["mean_1m_temp_degC"] = [p[0] for p in pred]
                forecasted_temp["mean_0_5m_temp_degC"] = [p[1] for p in pred]
                forecasted_temp["date"] = forecast_date
                # Append to the main dataframe
                if 'all_forecasts' in locals():
                    all_forecasts = pd.concat([all_forecasts, forecasted_temp])
                else:
                    all_forecasts = forecasted_temp.copy()
        
        # for all other days, we need to:
        #   - use the forecast met data for d-offset for noon met data
        #   - drop the _m7 met data columns
        #   - rename the _m1, ..., _m6 columns to _m2, ..., _m7 for the met data columns
        #   - drop the _m3 column of the mean_1m_temp_degC and mean_0_5m_temp_degC
        #   - rename the _m1, _m2, _m3 columns to _m2, _m3
        #   - use the forecasted data from the previous day for mean_1m_temp_degC_m1 and mean_0_5m_temp_degC_m1
        #   - use the test data for all other columns (flow, chipmunk, north_fork)

        else:
            # remove the noon met data
            obs = obs.drop(columns=["noon_air_temp", "noon_ave_wind", "noon_solar_rad"])
            # grab the forecast data for the offset date
            fore_select = fore[fore["offset"] == d].copy()
            # join fore_select with obs, drop offset columns, and use the number column as index
            obs_fore = obs.join(fore_select.set_index("date"), on="date")
            obs_fore = obs_fore.drop(columns=["offset"])
            obs_fore = obs_fore.set_index("number")
            # remove all columns ending in _m7
            obs_fore = obs_fore.loc[:, ~obs_fore.columns.str.endswith("_m7")]
            # rename all columns ending in _m1, ..., _m6 to _m2, ..., _m7
            obs_fore.columns = obs_fore.columns.str.replace("_m1", "_m2")
            obs_fore.columns = obs_fore.columns.str.replace("_m2", "_m3")
            obs_fore.columns = obs_fore.columns.str.replace("_m3", "_m4")
            obs_fore.columns = obs_fore.columns.str.replace("_m4", "_m5")
            obs_fore.columns = obs_fore.columns.str.replace("_m5", "_m6")
            obs_fore.columns = obs_fore.columns.str.replace("_m6", "_m7")
            # drop the _m3 column of the mean_1m_temp_degC and mean_0_5m_temp_degC (this is now named _m4 from the previous step)
            obs_fore = obs_fore.drop(columns=["mean_1m_temp_degC_m4", "mean_0_5m_temp_degC_m4"])
            # use the forecasted data from the previous day for mean_1m_temp_degC_m1 and mean_0_5m_temp_degC_m1
            obs_fore["mean_1m_temp_degC_m1"] = all_forecasts[all_forecasts["date"] == forecast_date - pd.DateOffset(days=1)]["mean_1m_temp_degC"]
        
            # duplicate this forecast data to match with the outputs from the previous day
            
            # grab the forecast data for the offset date
            fore_select = fore[fore["offset"] == d].copy()
            # join fore_select with obs, drop offset columns, and use the number column as index
            obs_fore = obs.join(fore_select.set_index("date"), on="date")
            obs_fore = obs_fore.drop(columns=["offset"])
            obs_fore = obs_fore.set_index("number")
            # now reorganize the columns to match the input columns
            obs_fore = obs_fore[forecast_cols]all_forecasts


    

# create an empty dataframe to store the forecast data
# forecasted_data = pd.DataFrame(columns=["date", ])

make_forecast("2022-08-20", 7)

2022-08-20
Forecasting day:  1
         date  perturbation  model  mean_1m_temp_degC  mean_0_5m_temp_degC
0  2022-08-20             0      1           0.361165            -0.189793
1  2022-08-20             1      1           0.310787            -0.178734
2  2022-08-20             2      1           0.256008            -0.171581
3  2022-08-20             3      1           0.339003            -0.176153
4  2022-08-20             4      1           0.325255            -0.176620
..        ...           ...    ...                ...                  ...
26 2022-08-20            26      8           0.184289            -0.243250
27 2022-08-20            27      8           0.127128            -0.301402
28 2022-08-20            28      8           0.209748            -0.282130
29 2022-08-20            29      8           0.112261            -0.258826
30 2022-08-20            30      8           0.127622            -0.418501

[248 rows x 5 columns]


Index(['date', 'mean_1m_temp_degC', 'mean_0_5m_temp_degC',
       'mean_1m_temp_degC_m1', 'mean_0_5m_temp_degC_m1',
       'mean_1m_temp_degC_m2', 'mean_0_5m_temp_degC_m2',
       'mean_1m_temp_degC_m3', 'mean_0_5m_temp_degC_m3', 'pump_q_m1',
       'pump_q_m2', 'sum_pump_q_p2', 'max_pump_q_p2', 'sum_pump_q_p7',
       'max_pump_q_p7', 'ave_chip_q_m1', 'max_chip_q_m1', 'ave_chip_q_m2',
       'max_chip_q_m2', 'mean_chip_q_p7', 'max_chip_q_p7', 'noon_air_temp_m1',
       'noon_air_temp_m2', 'noon_air_temp_m3', 'noon_air_temp_m4',
       'noon_air_temp_m5', 'noon_air_temp_m6', 'noon_air_temp_m7',
       'noon_air_temp', 'noon_ave_wind_m1', 'noon_ave_wind_m2',
       'noon_ave_wind_m3', 'noon_ave_wind_m4', 'noon_ave_wind_m5',
       'noon_ave_wind_m6', 'noon_ave_wind_m7', 'noon_ave_wind',
       'noon_solar_rad_m1', 'noon_solar_rad_m2', 'noon_solar_rad_m3',
       'noon_solar_rad_m4', 'noon_solar_rad_m5', 'noon_solar_rad_m6',
       'noon_solar_rad_m7', 'noon_solar_rad', 'min_chip_q_m1', 