This script walks through a forecast for SMR using the NN-operational model developed in the NASA-NW repo. 

# Import Modules

In [2]:
#high level modules
import os
import imp
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

  import imp


In [3]:
# custom modules
this_dir = "/Users/steeleb/Documents/GitHub/ats-data-driven-forecasting/NN-operational/arNN/"

imp.load_source("universals", os.path.join(this_dir, "universal_functions.py"))
from universals import load_pickle_file, calculate_vals


# Import models

In [4]:
model_dir = "/Users/steeleb/Documents/GitHub/ats-data-driven-forecasting/data/NN_train_val_test/SMR_forecast/models/leaky_basic_5/"

model_1 = load_pickle_file("model_1.pkl", model_dir)
model_2 = load_pickle_file("model_2.pkl", model_dir)
model_3 = load_pickle_file("model_3.pkl", model_dir)
model_4 = load_pickle_file("model_4.pkl", model_dir)
model_5 = load_pickle_file("model_5.pkl", model_dir)
model_6 = load_pickle_file("model_6.pkl", model_dir)
model_7 = load_pickle_file("model_7.pkl", model_dir)
model_8 = load_pickle_file("model_8.pkl", model_dir)


# Import data

In [5]:
data_dir = "/Users/steeleb/Documents/GitHub/ats-data-driven-forecasting/data/NN_train_val_test/SMR_forecast/"

test = pd.read_csv(os.path.join(data_dir, "t2022_standardized_v2024-10-28.csv"))
forecast = pd.read_csv(os.path.join(data_dir, "t2022_forecast_std_v2024-10-28.csv"))

test["date"] = pd.to_datetime(test["date"])
forecast["date"] = pd.to_datetime(forecast["date"])
forecast["forecast_date"] = pd.to_datetime(forecast["forecast_date"])

# we need the test columns to be the same as the forecast columns at the end of this, so grab the names for now
forecast_cols = test.columns

# and let's drop the observed temp data from the forecast columns, too
forecast_cols_less = forecast_cols.drop(["mean_1m_temp_degC", "mean_0_5m_temp_degC"])

# Create function to roll out forecast

In [47]:
def make_seven_day_forecast(date):
    
    print(date)
    date = pd.to_datetime(date)
    
    # get the forecast data from a specific date
    fore = forecast[forecast["forecast_date"] == date].copy()
    # earliest date is noon met for forecast. Calculatet the differencein dates and add as a column called "offset"
    fore["offset"] = (fore["date"] - fore["forecast_date"]).dt.days
    # remove the date column and rename forecast_date to date
    fore = fore.drop(columns=["forecast_date"])
    
    # we'll run a forecast for each day, since the following day's forecast will be based on the previous day's forecast
    for d in range(0, 7):
        # Setup for the iteration
        print("Forecasting day: ", d+1)
        # set the forecast date
        forecast_date = pd.to_datetime(date) + pd.DateOffset(days=d)
        obs = test[test["date"] == forecast_date].copy()
        
        # the first day will be a bit different from subsequent days
        if d == 0:
            # remove the noon met data
            obs = obs.drop(columns=["noon_air_temp", "noon_ave_wind", "noon_solar_rad"])
            # grab the forecast data for the offset date
            fore_select = fore[fore["offset"] == d].copy()
            # join fore_select with obs, drop offset columns, and use the number column as index
            obs_fore = obs.join(fore_select.set_index("date"), on="date")
            obs_fore = obs_fore.drop(columns=["offset"])
            obs_fore = obs_fore.set_index("number")
            # now reorganize the columns to match the input columns
            obs_fore = obs_fore[forecast_cols]
            
            # preprocess the data into labels and features
            features = obs_fore.drop(columns = ["date", "mean_1m_temp_degC", "mean_0_5m_temp_degC"])
            # make the forecast for each perturbation
            pred_1 = model_1.predict(features)
            pred_2 = model_2.predict(features)
            pred_3 = model_3.predict(features)
            pred_4 = model_4.predict(features)
            pred_5 = model_5.predict(features)
            pred_6 = model_6.predict(features)
            pred_7 = model_7.predict(features)
            pred_8 = model_8.predict(features)

            # and now we need to create the dataframe for the next iteration
            # first, create a dataframe with the forecast data
            for i, pred in enumerate([pred_1, pred_2, pred_3, pred_4, pred_5, pred_6, pred_7, pred_8], start=1):
                forecasted_temp = pd.DataFrame(columns=['date', 'perturbation', 'model', 'mean_1m_temp_degC', 'mean_0_5m_temp_degC'])
                forecasted_temp["perturbation"] = obs_fore.index
                forecasted_temp['model'] = i
                forecasted_temp["mean_1m_temp_degC"] = [p[0] for p in pred]
                forecasted_temp["mean_0_5m_temp_degC"] = [p[1] for p in pred]
                forecasted_temp["date"] = forecast_date
                
                # Append to the main dataframe (or create it if it doesn't exist)
                if 'all_forecasts' in locals():
                    all_forecasts = pd.concat([all_forecasts, forecasted_temp])
                else:
                    all_forecasts = forecasted_temp.copy()
            
            # for all other days, we need to:
            #   - use the forecast met data for d-offset for noon met data
            #   - drop the _m7 met data columns
            #   - rename the _m1, ..., _m6 columns to _m2, ..., _m7 for the met data columns
            #   - drop the _m3 column of the mean_1m_temp_degC and mean_0_5m_temp_degC
            #   - rename the _m1, _m2, _m3 columns to _m2, _m3
            #   - use the forecasted data from the previous day for mean_1m_temp_degC_m1 and mean_0_5m_temp_degC_m1
            #   - use the test data for all other columns (flow, chipmunk, north_fork)
        
        elif d == 1:
            # remove the noon met data and the observed temperature data from yesterday and today (we'll replaced these with forecasted data)
            obs = obs.drop(columns=["noon_air_temp", "noon_ave_wind", "noon_solar_rad",
                                    "noon_air_temp_m1", "noon_ave_wind_m1", "noon_solar_rad_m1",
                                    "mean_1m_temp_degC", "mean_0_5m_temp_degC",
                                    "mean_1m_temp_degC_m1", "mean_0_5m_temp_degC_m1"])
            
            # grab the forecast data for the offset date the merge with the observed data
            fore_select = fore[fore["offset"] == d].copy()
            fore_select = fore_select.rename(columns={"number": "perturbation"})
            fore_select = fore_select.drop(columns=["offset"])
            
            # and the previous day
            m1_select = fore[fore["offset"] == d-1].copy()
            m1_select = m1_select.rename(columns={"number": "perturbation"})
            m1_select = m1_select.drop(columns=["offset"])
            m1_select["date"] = pd.to_datetime(m1_select["date"]) + pd.DateOffset(days=1)
            
            # join obs (one row) with fore_select (many rows) on the date column
            obs_fore = obs.join(fore_select.set_index("date"), on="date")
            obs_fore = obs_fore.reset_index()
            obs_fore = obs_fore.set_index(["date", "perturbation"])
            obs_fore = obs_fore.join(m1_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m1")
            # join the observational forecast data with the forecasted data from the previous day by date and perturbation
            to_forecast = all_forecasts.copy()
            to_forecast = to_forecast[to_forecast["date"] == forecast_date - pd.DateOffset(days=1)]
            to_forecast["date"] = pd.to_datetime(to_forecast["date"]) + pd.DateOffset(days=1)
            to_forecast.columns = to_forecast.columns.str.replace("degC", "degC_m1")
            to_forecast = to_forecast.set_index(["date", "perturbation"])
            to_forecast = to_forecast.join(obs_fore, on=["date", "perturbation"])

            # now we need to reorganize the columns to match the input columns, plus the model and peturbation colums
            # first, move the date and perturbation from the index to columns
            to_forecast = to_forecast.reset_index()
            # now change model and perturbation to the index
            to_forecast = to_forecast.set_index(["model", "perturbation"])
            # and now reorganize the columns to match the input columns
            to_forecast = to_forecast[forecast_cols_less]

            # and now we need to preprocess the data into features
            forecast_features = to_forecast.drop(columns = "date")

            # for each model, make the forecast and store the results
            # make a dataframe to store the forecasted data
            forecasted_temp = pd.DataFrame(columns=['date', 'perturbation', 'model', 'mean_1m_temp_degC', 'mean_0_5m_temp_degC'])
            
            # loop through the models
            for m in range(1, 9):
                # filter the forecast features for the model and drop the model column
                model_forecast_features = forecast_features[forecast_features.index.get_level_values(0) == m].copy()
                # make the forecast for each perturbation
                pred = eval(f"model_{m}").predict(model_forecast_features)
                forecasted_temp["perturbation"] = model_forecast_features.index.get_level_values(1)
                forecasted_temp['model'] = m
                forecasted_temp["mean_1m_temp_degC"] = [p[0] for p in pred]
                forecasted_temp["mean_0_5m_temp_degC"] = [p[1] for p in pred]
                forecasted_temp["date"] = forecast_date

                # Append to a dataframe (or create it if it doesn't exist)
                if 'forecasted_date' in locals():
                    forecasted_date = pd.concat([forecasted_date, forecasted_temp])
                else:
                    forecasted_date = forecasted_temp.copy()
            
            # Append to the main dataframe
            all_forecasts = pd.concat([all_forecasts, forecasted_date])
        
            # remove forecasted_date from memory
            del forecasted_date

        elif d == 2:
            # remove the noon met data and the observed temperature data from yesterday and today (we'll replaced these with forecasted data)
            obs = obs.drop(columns=["noon_air_temp", "noon_ave_wind", "noon_solar_rad",
                                    "noon_air_temp_m1", "noon_ave_wind_m1", "noon_solar_rad_m1",
                                    "noon_air_temp_m2", "noon_ave_wind_m2", "noon_solar_rad_m2",
                                    "mean_1m_temp_degC", "mean_0_5m_temp_degC",
                                    "mean_1m_temp_degC_m1", "mean_0_5m_temp_degC_m1", 
                                    "mean_1m_temp_degC_m2", "mean_0_5m_temp_degC_m2"])
            
            # grab the forecast data for the offset date the merge with the observed data
            fore_select = fore[fore["offset"] == d].copy()
            fore_select = fore_select.rename(columns={"number": "perturbation"})
            fore_select = fore_select.drop(columns=["offset"]) 
            
            # and the previous day
            m1_select = fore[fore["offset"] == d-1].copy()
            m1_select = m1_select.rename(columns={"number": "perturbation"})
            m1_select = m1_select.drop(columns=["offset"])
            m1_select["date"] = pd.to_datetime(m1_select["date"]) + pd.DateOffset(days=1)

            # and two days prior
            m2_select = fore[fore["offset"] == d-2].copy()
            m2_select = m2_select.rename(columns={"number": "perturbation"})
            m2_select = m2_select.drop(columns=["offset"])
            m2_select["date"] = pd.to_datetime(m2_select["date"]) + pd.DateOffset(days=2)

            # join obs (one row) with fore_select (many rows) on the date column
            obs_fore = obs.join(fore_select.set_index(["date"]), on=["date"])
            obs_fore = obs_fore.reset_index()
            obs_fore = obs_fore.set_index(["date", "perturbation"])
            obs_fore = obs_fore.join(m1_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m1")
            obs_fore = obs_fore.join(m2_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m2")
            
            # add yesterday's temp forecast to the observational forecast data by date and perturbation
            m1_forecast = all_forecasts.copy()
            m1_forecast = m1_forecast[m1_forecast["date"] == forecast_date - pd.DateOffset(days=1)]
            m1_forecast["date"] = pd.to_datetime(m1_forecast["date"]) + pd.DateOffset(days=1)
            m1_forecast.columns = m1_forecast.columns.str.replace("degC", "degC_m1")
            m1_forecast = m1_forecast.set_index(["date", "perturbation"])
            m1_forecast = m1_forecast.join(obs_fore, on=["date", "perturbation"])
            m1_forecast = m1_forecast.reset_index()
            m1_forecast = m1_forecast.set_index(["date", "perturbation", "model"])

            # and two days prior
            to_forecast = all_forecasts.copy()
            to_forecast = to_forecast[to_forecast["date"] == forecast_date - pd.DateOffset(days=2)]
            to_forecast["date"] = pd.to_datetime(to_forecast["date"]) + pd.DateOffset(days=2)
            to_forecast.columns = to_forecast.columns.str.replace("degC", "degC_m2")
            to_forecast = to_forecast.set_index(["date", "perturbation", "model"])
            to_forecast = to_forecast.join(m1_forecast, on=["date", "perturbation", "model"])

            # now we need to reorganize the columns to match the input columns, plus the model and peturbation colums
            # first, move the date and perturbation from the index to columns
            to_forecast = to_forecast.reset_index()
            # now change model and perturbation to the index
            to_forecast = to_forecast.set_index(["model", "perturbation"])
            # and now reorganize the columns to match the input columns
            to_forecast = to_forecast[forecast_cols_less]

            # and now we need to preprocess the data into features
            forecast_features = to_forecast.drop(columns = "date")
            # for each model, make the forecast and store the results
            # make a dataframe to store the forecasted data
            forecasted_temp = pd.DataFrame(columns=['date', 'perturbation', 'model', 'mean_1m_temp_degC', 'mean_0_5m_temp_degC'])
            
            # loop through the models
            for m in range(1, 9):
                # filter the forecast features for the model and drop the model column
                model_forecast_features = forecast_features[forecast_features.index.get_level_values(0) == m].copy()
                # make the forecast for each perturbation
                pred = eval(f"model_{m}").predict(model_forecast_features)
                forecasted_temp["perturbation"] = model_forecast_features.index.get_level_values(1)
                forecasted_temp['model'] = m
                forecasted_temp["mean_1m_temp_degC"] = [p[0] for p in pred]
                forecasted_temp["mean_0_5m_temp_degC"] = [p[1] for p in pred]
                forecasted_temp["date"] = forecast_date

                # Append to a dataframe (or create it if it doesn't exist)
                if 'forecasted_date' in locals():
                    forecasted_date = pd.concat([forecasted_date, forecasted_temp])
                else:
                    forecasted_date = forecasted_temp.copy()
            
            # Append to the main dataframe
            all_forecasts = pd.concat([all_forecasts, forecasted_date])

            # remove forecasted_date from memory
            del forecasted_date

        elif d == 3:
            
            # remove the noon met data and the observed temperature data from yesterday and today (we'll replaced these with forecasted data)
            obs = obs.drop(columns=["noon_air_temp", "noon_ave_wind", "noon_solar_rad",
                                    "noon_air_temp_m1", "noon_ave_wind_m1", "noon_solar_rad_m1",
                                    "noon_air_temp_m2", "noon_ave_wind_m2", "noon_solar_rad_m2",
                                    "noon_air_temp_m3", "noon_ave_wind_m3", "noon_solar_rad_m3",
                                    "mean_1m_temp_degC", "mean_0_5m_temp_degC",
                                    "mean_1m_temp_degC_m1", "mean_0_5m_temp_degC_m1", 
                                    "mean_1m_temp_degC_m2", "mean_0_5m_temp_degC_m2",
                                    "mean_1m_temp_degC_m3", "mean_0_5m_temp_degC_m3"])
            
            # grab the forecast data for the offset date the merge with the observed data
            fore_select = fore[fore["offset"] == d].copy()
            fore_select = fore_select.rename(columns={"number": "perturbation"})
            fore_select = fore_select.drop(columns=["offset"])
            
            # and the previous day
            m1_select = fore[fore["offset"] == d-1].copy()
            m1_select = m1_select.rename(columns={"number": "perturbation"})
            m1_select = m1_select.drop(columns=["offset"])
            m1_select["date"] = pd.to_datetime(m1_select["date"]) + pd.DateOffset(days=1)

            # and two days prior
            m2_select = fore[fore["offset"] == d-2].copy()
            m2_select = m2_select.rename(columns={"number": "perturbation"})
            m2_select = m2_select.drop(columns=["offset"])
            m2_select["date"] = pd.to_datetime(m2_select["date"]) + pd.DateOffset(days=2)

            # and three days prior
            m3_select = fore[fore["offset"] == d-3].copy()
            m3_select = m3_select.rename(columns={"number": "perturbation"})
            m3_select = m3_select.drop(columns=["offset"])
            m3_select["date"] = pd.to_datetime(m3_select["date"]) + pd.DateOffset(days=3)
            
            # join obs (one row) with fore_select (many rows) on the date column
            obs_fore = obs.join(fore_select.set_index("date"), on="date")
            obs_fore = obs_fore.reset_index()
            obs_fore = obs_fore.set_index(["date", "perturbation"])
            obs_fore = obs_fore.join(m1_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m1")
            obs_fore = obs_fore.join(m2_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m2")
            obs_fore = obs_fore.join(m3_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m3")
            
            # add yesterday's temp forecast to the observational forecast data by date and perturbation
            m1_forecast = all_forecasts.copy()
            m1_forecast = m1_forecast[m1_forecast["date"] == forecast_date - pd.DateOffset(days=1)]
            m1_forecast["date"] = pd.to_datetime(m1_forecast["date"]) + pd.DateOffset(days=1)
            m1_forecast.columns = m1_forecast.columns.str.replace("degC", "degC_m1")
            m1_forecast = m1_forecast.set_index(["date", "perturbation"])
            m1_forecast = m1_forecast.join(obs_fore, on=["date", "perturbation"])
            m1_forecast = m1_forecast.reset_index()
            m1_forecast = m1_forecast.set_index(["date", "perturbation", "model"])

            # and two days prior
            m2_forecast = all_forecasts.copy()
            m2_forecast = m2_forecast[m2_forecast["date"] == forecast_date - pd.DateOffset(days=2)]
            m2_forecast["date"] = pd.to_datetime(m2_forecast["date"]) + pd.DateOffset(days=2)
            m2_forecast.columns = m2_forecast.columns.str.replace("degC", "degC_m2")
            m2_forecast = m2_forecast.set_index(["date", "perturbation", "model"])
            m2_forecast = m2_forecast.join(m1_forecast, on=["date", "perturbation", "model"])

            # and three days prior
            to_forecast = all_forecasts.copy()
            to_forecast = to_forecast[to_forecast["date"] == forecast_date - pd.DateOffset(days=3)]
            to_forecast["date"] = pd.to_datetime(to_forecast["date"]) + pd.DateOffset(days=3)
            to_forecast.columns = to_forecast.columns.str.replace("degC", "degC_m3")
            to_forecast = to_forecast.set_index(["date", "perturbation", "model"])
            to_forecast = to_forecast.join(m2_forecast, on=["date", "perturbation", "model"])

            # now we need to reorganize the columns to match the input columns, plus the model and peturbation colums
            # first, move the date and perturbation from the index to columns
            to_forecast = to_forecast.reset_index()
            # now change model and perturbation to the index
            to_forecast = to_forecast.set_index(["model", "perturbation"])
            # and now reorganize the columns to match the input columns
            to_forecast = to_forecast[forecast_cols_less]

            # and now we need to preprocess the data into features
            forecast_features = to_forecast.drop(columns = "date")

            # for each model, make the forecast and store the results
            # make a dataframe to store the forecasted data
            forecasted_temp = pd.DataFrame(columns=['date', 'perturbation', 'model', 'mean_1m_temp_degC', 'mean_0_5m_temp_degC'])
            
            # loop through the models
            for m in range(1, 9):
                # filter the forecast features for the model and drop the model column
                model_forecast_features = forecast_features[forecast_features.index.get_level_values(0) == m].copy()
                # make the forecast for each perturbation
                pred = eval(f"model_{m}").predict(model_forecast_features)
                forecasted_temp["perturbation"] = model_forecast_features.index.get_level_values(1)
                forecasted_temp['model'] = m
                forecasted_temp["mean_1m_temp_degC"] = [p[0] for p in pred]
                forecasted_temp["mean_0_5m_temp_degC"] = [p[1] for p in pred]
                forecasted_temp["date"] = forecast_date

                # Append to a dataframe (or create it if it doesn't exist)
                if 'forecasted_date' in locals():
                    forecasted_date = pd.concat([forecasted_date, forecasted_temp])
                else:
                    forecasted_date = forecasted_temp.copy()
            
            # Append to the main dataframe
            all_forecasts = pd.concat([all_forecasts, forecasted_date])
            
            # remove forecasted_date from memory
            del forecasted_date

        elif d == 4:
       
            # remove the noon met data and the observed temperature data from yesterday and today (we'll replaced these with forecasted data)
            obs = obs.drop(columns=["noon_air_temp", "noon_ave_wind", "noon_solar_rad",
                                    "noon_air_temp_m1", "noon_ave_wind_m1", "noon_solar_rad_m1",
                                    "noon_air_temp_m2", "noon_ave_wind_m2", "noon_solar_rad_m2",
                                    "noon_air_temp_m3", "noon_ave_wind_m3", "noon_solar_rad_m3",
                                    "noon_air_temp_m4", "noon_ave_wind_m4", "noon_solar_rad_m4",
                                    "mean_1m_temp_degC", "mean_0_5m_temp_degC",
                                    "mean_1m_temp_degC_m1", "mean_0_5m_temp_degC_m1", 
                                    "mean_1m_temp_degC_m2", "mean_0_5m_temp_degC_m2",
                                    "mean_1m_temp_degC_m3", "mean_0_5m_temp_degC_m3"])
            
            # grab the forecast data for the offset date the merge with the observed data
            fore_select = fore[fore["offset"] == d].copy()
            fore_select = fore_select.rename(columns={"number": "perturbation"})
            fore_select = fore_select.drop(columns=["offset"])
            
            # and the previous day
            m1_select = fore[fore["offset"] == d-1].copy()
            m1_select = m1_select.rename(columns={"number": "perturbation"})
            m1_select = m1_select.drop(columns=["offset"])
            m1_select["date"] = pd.to_datetime(m1_select["date"]) + pd.DateOffset(days=1)

            # and two days prior
            m2_select = fore[fore["offset"] == d-2].copy()
            m2_select = m2_select.rename(columns={"number": "perturbation"})
            m2_select = m2_select.drop(columns=["offset"])
            m2_select["date"] = pd.to_datetime(m2_select["date"]) + pd.DateOffset(days=2)

            # and three days prior
            m3_select = fore[fore["offset"] == d-3].copy()
            m3_select = m3_select.rename(columns={"number": "perturbation"})
            m3_select = m3_select.drop(columns=["offset"])
            m3_select["date"] = pd.to_datetime(m3_select["date"]) + pd.DateOffset(days=3)

            # and four days prior
            m4_select = fore[fore["offset"] == d-4].copy()
            m4_select = m4_select.rename(columns={"number": "perturbation"})
            m4_select = m4_select.drop(columns=["offset"])
            m4_select["date"] = pd.to_datetime(m4_select["date"]) + pd.DateOffset(days=4)
            
            # join obs (one row) with fore_select (many rows) on the date column
            obs_fore = obs.join(fore_select.set_index("date"), on="date")
            obs_fore = obs_fore.reset_index()
            obs_fore = obs_fore.set_index(["date", "perturbation"])
            obs_fore = obs_fore.join(m1_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m1")
            obs_fore = obs_fore.join(m2_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m2")
            obs_fore = obs_fore.join(m3_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m3")
            obs_fore = obs_fore.join(m4_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m4")
            
            # add yesterday's temp forecast to the observational forecast data by date and perturbation
            m1_forecast = all_forecasts.copy()
            m1_forecast = m1_forecast[m1_forecast["date"] == forecast_date - pd.DateOffset(days=1)]
            m1_forecast["date"] = pd.to_datetime(m1_forecast["date"]) + pd.DateOffset(days=1)
            m1_forecast.columns = m1_forecast.columns.str.replace("degC", "degC_m1")
            m1_forecast = m1_forecast.set_index(["date", "perturbation"])
            m1_forecast = m1_forecast.join(obs_fore, on=["date", "perturbation"])
            m1_forecast = m1_forecast.reset_index()
            m1_forecast = m1_forecast.set_index(["date", "perturbation", "model"])

            # and two days prior
            m2_forecast = all_forecasts.copy()
            m2_forecast = m2_forecast[m2_forecast["date"] == forecast_date - pd.DateOffset(days=2)]
            m2_forecast["date"] = pd.to_datetime(m2_forecast["date"]) + pd.DateOffset(days=2)
            m2_forecast.columns = m2_forecast.columns.str.replace("degC", "degC_m2")
            m2_forecast = m2_forecast.set_index(["date", "perturbation", "model"])
            m2_forecast = m2_forecast.join(m1_forecast, on=["date", "perturbation", "model"])

            # and three days prior
            to_forecast = all_forecasts.copy()
            to_forecast = to_forecast[to_forecast["date"] == forecast_date - pd.DateOffset(days=3)]
            to_forecast["date"] = pd.to_datetime(to_forecast["date"]) + pd.DateOffset(days=3)
            to_forecast.columns = to_forecast.columns.str.replace("degC", "degC_m3")
            to_forecast = to_forecast.set_index(["date", "perturbation", "model"])
            to_forecast = to_forecast.join(m2_forecast, on=["date", "perturbation", "model"])

            # now we need to reorganize the columns to match the input columns, plus the model and peturbation colums
            # first, move the date and perturbation from the index to columns
            to_forecast = to_forecast.reset_index()
            # now change model and perturbation to the index
            to_forecast = to_forecast.set_index(["model", "perturbation"])
            # and now reorganize the columns to match the input columns
            to_forecast = to_forecast[forecast_cols_less]

            # and now we need to preprocess the data into features
            forecast_features = to_forecast.drop(columns = "date")

            # for each model, make the forecast and store the results
            # make a dataframe to store the forecasted data
            forecasted_temp = pd.DataFrame(columns=['date', 'perturbation', 'model', 'mean_1m_temp_degC', 'mean_0_5m_temp_degC'])
            
            # loop through the models
            for m in range(1, 9):
                # filter the forecast features for the model and drop the model column
                model_forecast_features = forecast_features[forecast_features.index.get_level_values(0) == m].copy()
                # make the forecast for each perturbation
                pred = eval(f"model_{m}").predict(model_forecast_features)
                forecasted_temp["perturbation"] = model_forecast_features.index.get_level_values(1)
                forecasted_temp['model'] = m
                forecasted_temp["mean_1m_temp_degC"] = [p[0] for p in pred]
                forecasted_temp["mean_0_5m_temp_degC"] = [p[1] for p in pred]
                forecasted_temp["date"] = forecast_date

                # Append to a dataframe (or create it if it doesn't exist)
                if 'forecasted_date' in locals():
                    forecasted_date = pd.concat([forecasted_date, forecasted_temp])
                else:
                    forecasted_date = forecasted_temp.copy()
            
            # Append to the main dataframe
            all_forecasts = pd.concat([all_forecasts, forecasted_date])
            
            # remove forecasted_date from memory
            del forecasted_date

        elif d == 5:
           
            # remove the noon met data and the observed temperature data from yesterday and today (we'll replaced these with forecasted data)
            obs = obs.drop(columns=["noon_air_temp", "noon_ave_wind", "noon_solar_rad",
                                    "noon_air_temp_m1", "noon_ave_wind_m1", "noon_solar_rad_m1",
                                    "noon_air_temp_m2", "noon_ave_wind_m2", "noon_solar_rad_m2",
                                    "noon_air_temp_m3", "noon_ave_wind_m3", "noon_solar_rad_m3",
                                    "noon_air_temp_m4", "noon_ave_wind_m4", "noon_solar_rad_m4",
                                    "noon_air_temp_m5", "noon_ave_wind_m5", "noon_solar_rad_m5",
                                    "mean_1m_temp_degC", "mean_0_5m_temp_degC",
                                    "mean_1m_temp_degC_m1", "mean_0_5m_temp_degC_m1", 
                                    "mean_1m_temp_degC_m2", "mean_0_5m_temp_degC_m2",
                                    "mean_1m_temp_degC_m3", "mean_0_5m_temp_degC_m3"])
            
            # grab the forecast data for the offset date the merge with the observed data
            fore_select = fore[fore["offset"] == d].copy()
            fore_select = fore_select.rename(columns={"number": "perturbation"})
            fore_select = fore_select.drop(columns=["offset"])
            
            # and the previous day
            m1_select = fore[fore["offset"] == d-1].copy()
            m1_select = m1_select.rename(columns={"number": "perturbation"})
            m1_select = m1_select.drop(columns=["offset"])
            m1_select["date"] = pd.to_datetime(m1_select["date"]) + pd.DateOffset(days=1)

            # and two days prior
            m2_select = fore[fore["offset"] == d-2].copy()
            m2_select = m2_select.rename(columns={"number": "perturbation"})
            m2_select = m2_select.drop(columns=["offset"])
            m2_select["date"] = pd.to_datetime(m2_select["date"]) + pd.DateOffset(days=2)

            # and three days prior
            m3_select = fore[fore["offset"] == d-3].copy()
            m3_select = m3_select.rename(columns={"number": "perturbation"})
            m3_select = m3_select.drop(columns=["offset"])
            m3_select["date"] = pd.to_datetime(m3_select["date"]) + pd.DateOffset(days=3)

            # and four days prior
            m4_select = fore[fore["offset"] == d-4].copy()
            m4_select = m4_select.rename(columns={"number": "perturbation"})
            m4_select = m4_select.drop(columns=["offset"])
            m4_select["date"] = pd.to_datetime(m4_select["date"]) + pd.DateOffset(days=4)

            # and five days prior
            m5_select = fore[fore["offset"] == d-5].copy()
            m5_select = m5_select.rename(columns={"number": "perturbation"})
            m5_select = m5_select.drop(columns=["offset"])
            m5_select["date"] = pd.to_datetime(m5_select["date"]) + pd.DateOffset(days=5)
            
            # join obs (one row) with fore_select (many rows) on the date column
            obs_fore = obs.join(fore_select.set_index("date"), on=["date"])
            obs_fore = obs_fore.reset_index()
            obs_fore = obs_fore.set_index(["date", "perturbation"])
            obs_fore = obs_fore.join(m1_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m1")
            obs_fore = obs_fore.join(m2_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m2")
            obs_fore = obs_fore.join(m3_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m3")
            obs_fore = obs_fore.join(m4_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m4")
            obs_fore = obs_fore.join(m5_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m5")
            
            # add yesterday's temp forecast to the observational forecast data by date and perturbation
            m1_forecast = all_forecasts.copy()
            m1_forecast = m1_forecast[m1_forecast["date"] == forecast_date - pd.DateOffset(days=1)]
            m1_forecast["date"] = pd.to_datetime(m1_forecast["date"]) + pd.DateOffset(days=1)
            m1_forecast.columns = m1_forecast.columns.str.replace("degC", "degC_m1")
            m1_forecast = m1_forecast.set_index(["date", "perturbation"])
            m1_forecast = m1_forecast.join(obs_fore, on=["date", "perturbation"])
            m1_forecast = m1_forecast.reset_index()
            m1_forecast = m1_forecast.set_index(["date", "perturbation", "model"])

            # and two days prior
            m2_forecast = all_forecasts.copy()
            m2_forecast = m2_forecast[m2_forecast["date"] == forecast_date - pd.DateOffset(days=2)]
            m2_forecast["date"] = pd.to_datetime(m2_forecast["date"]) + pd.DateOffset(days=2)
            m2_forecast.columns = m2_forecast.columns.str.replace("degC", "degC_m2")
            m2_forecast = m2_forecast.set_index(["date", "perturbation", "model"])
            m2_forecast = m2_forecast.join(m1_forecast, on=["date", "perturbation", "model"])

            # and three days prior
            to_forecast = all_forecasts.copy()
            to_forecast = to_forecast[to_forecast["date"] == forecast_date - pd.DateOffset(days=3)]
            to_forecast["date"] = pd.to_datetime(to_forecast["date"]) + pd.DateOffset(days=3)
            to_forecast.columns = to_forecast.columns.str.replace("degC", "degC_m3")
            to_forecast = to_forecast.set_index(["date", "perturbation", "model"])
            to_forecast = to_forecast.join(m2_forecast, on=["date", "perturbation", "model"])

            # now we need to reorganize the columns to match the input columns, plus the model and peturbation colums
            # first, move the date and perturbation from the index to columns
            to_forecast = to_forecast.reset_index()
            # now change model and perturbation to the index
            to_forecast = to_forecast.set_index(["model", "perturbation"])
            # and now reorganize the columns to match the input columns
            to_forecast = to_forecast[forecast_cols_less]

            # and now we need to preprocess the data into features
            forecast_features = to_forecast.drop(columns = "date")

            # for each model, make the forecast and store the results
            # make a dataframe to store the forecasted data
            forecasted_temp = pd.DataFrame(columns=['date', 'perturbation', 'model', 'mean_1m_temp_degC', 'mean_0_5m_temp_degC'])
            
            # loop through the models
            for m in range(1, 9):
                # filter the forecast features for the model and drop the model column
                model_forecast_features = forecast_features[forecast_features.index.get_level_values(0) == m].copy()
                # make the forecast for each perturbation
                pred = eval(f"model_{m}").predict(model_forecast_features)
                forecasted_temp["perturbation"] = model_forecast_features.index.get_level_values(1)
                forecasted_temp['model'] = m
                forecasted_temp["mean_1m_temp_degC"] = [p[0] for p in pred]
                forecasted_temp["mean_0_5m_temp_degC"] = [p[1] for p in pred]
                forecasted_temp["date"] = forecast_date

                # Append to a dataframe (or create it if it doesn't exist)
                if 'forecasted_date' in locals():
                    forecasted_date = pd.concat([forecasted_date, forecasted_temp])
                else:
                    forecasted_date = forecasted_temp.copy()
            
            # Append to the main dataframe
            all_forecasts = pd.concat([all_forecasts, forecasted_date])
            
            # remove forecasted_date from memory
            del forecasted_date

        elif d == 6:

            # remove the noon met data and the observed temperature data from yesterday and today (we'll replaced these with forecasted data)
            obs = obs.drop(columns=["noon_air_temp", "noon_ave_wind", "noon_solar_rad",
                                    "noon_air_temp_m1", "noon_ave_wind_m1", "noon_solar_rad_m1",
                                    "noon_air_temp_m2", "noon_ave_wind_m2", "noon_solar_rad_m2",
                                    "noon_air_temp_m3", "noon_ave_wind_m3", "noon_solar_rad_m3",
                                    "noon_air_temp_m4", "noon_ave_wind_m4", "noon_solar_rad_m4",
                                    "noon_air_temp_m5", "noon_ave_wind_m5", "noon_solar_rad_m5",
                                    "noon_air_temp_m6", "noon_ave_wind_m6", "noon_solar_rad_m6",
                                    "mean_1m_temp_degC", "mean_0_5m_temp_degC",
                                    "mean_1m_temp_degC_m1", "mean_0_5m_temp_degC_m1", 
                                    "mean_1m_temp_degC_m2", "mean_0_5m_temp_degC_m2",
                                    "mean_1m_temp_degC_m3", "mean_0_5m_temp_degC_m3"])
            
            # grab the forecast data for the offset date the merge with the observed data
            fore_select = fore[fore["offset"] == d].copy()
            fore_select = fore_select.rename(columns={"number": "perturbation"})
            fore_select = fore_select.drop(columns=["offset"])
            
            # and the previous day
            m1_select = fore[fore["offset"] == d-1].copy()
            m1_select = m1_select.rename(columns={"number": "perturbation"})
            m1_select = m1_select.drop(columns=["offset"])
            m1_select["date"] = pd.to_datetime(m1_select["date"]) + pd.DateOffset(days=1)

            # and two days prior
            m2_select = fore[fore["offset"] == d-2].copy()
            m2_select = m2_select.rename(columns={"number": "perturbation"})
            m2_select = m2_select.drop(columns=["offset"])
            m2_select["date"] = pd.to_datetime(m2_select["date"]) + pd.DateOffset(days=2)

            # and three days prior
            m3_select = fore[fore["offset"] == d-3].copy()
            m3_select = m3_select.rename(columns={"number": "perturbation"})
            m3_select = m3_select.drop(columns=["offset"])
            m3_select["date"] = pd.to_datetime(m3_select["date"]) + pd.DateOffset(days=3)

            # and four days prior
            m4_select = fore[fore["offset"] == d-4].copy()
            m4_select = m4_select.rename(columns={"number": "perturbation"})
            m4_select = m4_select.drop(columns=["offset"])
            m4_select["date"] = pd.to_datetime(m4_select["date"]) + pd.DateOffset(days=4)

            # and five days prior
            m5_select = fore[fore["offset"] == d-5].copy()
            m5_select = m5_select.rename(columns={"number": "perturbation"})
            m5_select = m5_select.drop(columns=["offset"])
            m5_select["date"] = pd.to_datetime(m5_select["date"]) + pd.DateOffset(days=5)

            # and six days prior
            m6_select = fore[fore["offset"] == d-6].copy()
            m6_select = m6_select.rename(columns={"number": "perturbation"})
            m6_select = m6_select.drop(columns=["offset"])
            m6_select["date"] = pd.to_datetime(m6_select["date"]) + pd.DateOffset(days=6)
            
            # join obs (one row) with fore_select (many rows) on the date column
            obs_fore = obs.join(fore_select.set_index(["date"]), on=["date"])
            obs_fore = obs_fore.reset_index()
            obs_fore = obs_fore.set_index(["date", "perturbation"])
            obs_fore = obs_fore.join(m1_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m1")
            obs_fore = obs_fore.join(m2_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m2")
            obs_fore = obs_fore.join(m3_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m3")
            obs_fore = obs_fore.join(m4_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m4")
            obs_fore = obs_fore.join(m5_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m5")
            obs_fore = obs_fore.join(m6_select.set_index(["date", "perturbation"]), on=["date", "perturbation"], rsuffix="_m6")
            
            # add yesterday's temp forecast to the observational forecast data by date and perturbation
            m1_forecast = all_forecasts.copy()
            m1_forecast = m1_forecast[m1_forecast["date"] == forecast_date - pd.DateOffset(days=1)]
            m1_forecast["date"] = pd.to_datetime(m1_forecast["date"]) + pd.DateOffset(days=1)
            m1_forecast.columns = m1_forecast.columns.str.replace("degC", "degC_m1")
            m1_forecast = m1_forecast.set_index(["date", "perturbation"])
            m1_forecast = m1_forecast.join(obs_fore, on=["date", "perturbation"])
            m1_forecast = m1_forecast.reset_index()
            m1_forecast = m1_forecast.set_index(["date", "perturbation", "model"])

            # and two days prior
            m2_forecast = all_forecasts.copy()
            m2_forecast = m2_forecast[m2_forecast["date"] == forecast_date - pd.DateOffset(days=2)]
            m2_forecast["date"] = pd.to_datetime(m2_forecast["date"]) + pd.DateOffset(days=2)
            m2_forecast.columns = m2_forecast.columns.str.replace("degC", "degC_m2")
            m2_forecast = m2_forecast.set_index(["date", "perturbation", "model"])
            m2_forecast = m2_forecast.join(m1_forecast, on=["date", "perturbation", "model"])

            # and three days prior
            to_forecast = all_forecasts.copy()
            to_forecast = to_forecast[to_forecast["date"] == forecast_date - pd.DateOffset(days=3)]
            to_forecast["date"] = pd.to_datetime(to_forecast["date"]) + pd.DateOffset(days=3)
            to_forecast.columns = to_forecast.columns.str.replace("degC", "degC_m3")
            to_forecast = to_forecast.set_index(["date", "perturbation", "model"])
            to_forecast = to_forecast.join(m2_forecast, on=["date", "perturbation", "model"])

            # now we need to reorganize the columns to match the input columns, plus the model and peturbation colums
            # first, move the date and perturbation from the index to columns
            to_forecast = to_forecast.reset_index()
            # now change model and perturbation to the index
            to_forecast = to_forecast.set_index(["model", "perturbation"])
            # and now reorganize the columns to match the input columns
            to_forecast = to_forecast[forecast_cols_less]
            # and now we need to preprocess the data into features
            forecast_features = to_forecast.drop(columns = "date")
            
            # for each model, make the forecast and store the results
            # make a dataframe to store the forecasted data
            forecasted_temp = pd.DataFrame(columns=['date', 'perturbation', 'model', 'mean_1m_temp_degC', 'mean_0_5m_temp_degC'])
            
            # loop through the models
            for m in range(1, 9):
                # filter the forecast features for the model and drop the model column
                model_forecast_features = forecast_features[forecast_features.index.get_level_values(0) == m].copy()
                # make the forecast for each perturbation
                pred = eval(f"model_{m}").predict(model_forecast_features)
                forecasted_temp["perturbation"] = model_forecast_features.index.get_level_values(1)
                forecasted_temp['model'] = m
                forecasted_temp["mean_1m_temp_degC"] = [p[0] for p in pred]
                forecasted_temp["mean_0_5m_temp_degC"] = [p[1] for p in pred]
                forecasted_temp["date"] = forecast_date

                # Append to a dataframe (or create it if it doesn't exist)
                if 'forecasted_date' in locals():
                    forecasted_date = pd.concat([forecasted_date, forecasted_temp])
                else:
                    forecasted_date = forecasted_temp.copy()
            
            # Append to the main dataframe
            all_forecasts = pd.concat([all_forecasts, forecasted_date])

            # remove forecasted_date from memory
            del forecasted_date
        
    return all_forecasts


# Make a few forecasts

In [48]:
jul07_seven_day = make_seven_day_forecast("2022-07-07")
aug20_seven_day = make_seven_day_forecast("2022-08-20")
sept11_seven_day = make_seven_day_forecast("2022-09-11")

2022-07-07
Forecasting day:  1
Forecasting day:  2
Forecasting day:  3
Forecasting day:  4
Forecasting day:  5
Forecasting day:  6
Forecasting day:  7
2022-08-20
Forecasting day:  1
Forecasting day:  2
Forecasting day:  3
Forecasting day:  4
Forecasting day:  5
Forecasting day:  6
Forecasting day:  7
2022-09-11
Forecasting day:  1
Forecasting day:  2
Forecasting day:  3
Forecasting day:  4
Forecasting day:  5
Forecasting day:  6
Forecasting day:  7


# Calculate actual values from the forecasted data

I'm not sure why this creates dupes, but it does, i'll have to figure that out later.

In [49]:
jul07_seven_day

Unnamed: 0,date,perturbation,model,mean_1m_temp_degC,mean_0_5m_temp_degC
0,2022-07-07,0,1,1.441043,1.349815
1,2022-07-07,1,1,1.353076,1.366558
2,2022-07-07,2,1,1.428462,1.380403
3,2022-07-07,3,1,1.288299,1.339410
4,2022-07-07,4,1,1.414040,1.342410
...,...,...,...,...,...
26,2022-07-13,26,8,2.289272,1.392042
27,2022-07-13,27,8,2.602299,1.246265
28,2022-07-13,28,8,2.555099,1.410753
29,2022-07-13,29,8,2.450350,1.319690


In [50]:
mean_std = pd.read_csv(os.path.join(data_dir, "mean_std_train_val_t2022_v2024-10-28.csv"))
mean_std = mean_std.set_index("Unnamed: 0")

jul07_seven_day["mean_1m_temp_degC"] = calculate_vals(jul07_seven_day["mean_1m_temp_degC"], mean_std.loc["mean_1m_temp_degC", "mean"], mean_std.loc["mean_1m_temp_degC", "std"])
jul07_seven_day["mean_0_5m_temp_degC"] = calculate_vals(jul07_seven_day["mean_0_5m_temp_degC"], mean_std.loc["mean_0_5m_temp_degC", "mean"], mean_std.loc["mean_0_5m_temp_degC", "std"])

aug20_seven_day["mean_1m_temp_degC"] = calculate_vals(aug20_seven_day["mean_1m_temp_degC"], mean_std.loc["mean_1m_temp_degC", "mean"], mean_std.loc["mean_1m_temp_degC", "std"])
aug20_seven_day["mean_0_5m_temp_degC"] = calculate_vals(aug20_seven_day["mean_0_5m_temp_degC"], mean_std.loc["mean_0_5m_temp_degC", "mean"], mean_std.loc["mean_0_5m_temp_degC", "std"])

sept11_seven_day["mean_1m_temp_degC"] = calculate_vals(sept11_seven_day["mean_1m_temp_degC"], mean_std.loc["mean_1m_temp_degC", "mean"], mean_std.loc["mean_1m_temp_degC", "std"])
sept11_seven_day["mean_0_5m_temp_degC"] = calculate_vals(sept11_seven_day["mean_0_5m_temp_degC"], mean_std.loc["mean_0_5m_temp_degC", "mean"], mean_std.loc["mean_0_5m_temp_degC", "std"])


In [51]:
# save a csv of those forecasts
jul07_seven_day.to_csv("~/Documents/GitHub/ats-data-driven-forecasting/run-operational/output/jul07_seven_day.csv", index=False)
aug20_seven_day.to_csv("~/Documents/GitHub/ats-data-driven-forecasting/run-operational/output/aug20_seven_day.csv", index=False)
sept11_seven_day.to_csv("~/Documents/GitHub/ats-data-driven-forecasting/run-operational/output/sept11_seven_day.csv", index=False)