## Solar Energy Forecasting with Flow Forecast

In this notebook we will walk through using Flow Forecast to forecast solar energy generation at several power-plants in India. 

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
!pip install flood_forecast
from flood_forecast.trainer import train_function

## Data Preprocessing

In order to get the data in a usable format we need to join the station data to the weather data.

In [None]:
import pandas as pd
solar_data = pd.read_csv("../input/solar-power-generation-data/Plant_1_Generation_Data.csv")
weather_data = pd.read_csv("../input/solar-power-generation-data/Plant_1_Weather_Sensor_Data.csv")

In [None]:
solar_data.groupby("SOURCE_KEY").count()

In [None]:
solar_data.head()

In [None]:
solar_data[solar_data["SOURCE_KEY"]=="1BY6WEcLGh8j5v7"].groupby("DC_POWER").count()

In [None]:
weather_data.head()

In [None]:
solar_data.head()

In [None]:
weather_data["DATE_TIME"] = pd.to_datetime(weather_data["DATE_TIME"])
solar_data["DATE_TIME"] = pd.to_datetime(solar_data["DATE_TIME"])

In [None]:
mrged_df = solar_data.merge(weather_data, left_on="DATE_TIME", right_on="DATE_TIME", how="left")
mrged_df

In [None]:
solar_data.merge(weather_data, left_on="DATE_TIME", right_on="DATE_TIME", how="left").to_csv("merged_file.csv")


In [None]:
dropped = mrged_df[mrged_df["SOURCE_KEY_x"]=="1BY6WEcLGh8j5v7"].dropna()
dropped["datetime"] = dropped["DATE_TIME"]
dropped.to_csv("example.csv")

## Simple Forecast with Flow Forecast

In [None]:
def make_config_file(flow_file_path, gage_id, station_id, weight_path=None, pretrained=[]):
  run = wandb.init(project="solar_tutorial")
  wandb_config = run.config
  the_wandb_c = run.config
  print(wandb_config)
  the_config4 = {"model_name": "DARNN",
        "model_type": "PyTorch",
        "model_params": {
        "n_time_series":7,
        "forecast_history":wandb_config["forecast_history"],
        "hidden_size_encoder":wandb_config["hidden_encoder"],
        "decoder_hidden_size": wandb_config["hidden_decoder"],
        "out_feats": 1,
        "dropout": wandb_config["dropout"],
        "gru_lstm": False},
       "dataset_params":{"class": "default",
       "num_workers":2,
       "pin_memory": True,
       "training_path": flow_file_path,
       "validation_path": flow_file_path,
       "test_path": flow_file_path,
       "batch_size":wandb_config["batch_size"],
       "forecast_history":wandb_config["forecast_history"] - 1,
       "forecast_length":wandb_config["forecast_length"],
       "scaler": "StandardScaler",
       "interpolate": False,
       "train_start":0,
       "train_end": 1500,
       "valid_start":1501,
       "valid_end": 1750,
       "sort_column": "DATE_TIME",
       "test_start": 1751,
       "test_end":2099,
       "target_col": ["DC_POWER"],
       "relevant_cols": ["DC_POWER", "AMBIENT_TEMPERATURE", "MODULE_TEMPERATURE", "IRRADIATION"], 
      "feature_param":
          {
            "datetime_params":{
            "hour":"numerical",
            "day_of_week":"numerical",
            "month": "numerical"
            }
          }
       },
    "training_params":
    {
       "criterion":"MSE",
       "optimizer": "SGD",
       #"criterion_params":{"baseline_method":"mean"},
    "optim_params":{
       "lr": the_wandb_c["lr"]
    },
       "epochs": 12,
       "batch_size":wandb_config["batch_size"]
    },
    "early_stopping":{
        "patience":3
    },
    "GCS": False,
    "sweep":True,
    "wandb":False,
    "forward_params":{},
   "metrics":["MSE", "MAPE"],
   "inference_params":
   {     
         "datetime_start":"2020-06-17",
          "hours_to_forecast":30, 
          "num_prediction_samples": 20,
          "test_csv_path":flow_file_path,
          "decoder_params":{
            "decoder_function": "simple_decode", 
            "unsqueeze_dim": 1},
          "dataset_params":{
             "file_path": flow_file_path,
             "interpolate_param": False,
             "sort_column": "DATE_TIME",
             "scaling": "StandardScaler",
             "forecast_history": wandb_config["forecast_history"] - 1,
             "forecast_length":wandb_config["forecast_length"],
             "relevant_cols": ["DC_POWER", "AMBIENT_TEMPERATURE", "MODULE_TEMPERATURE", "IRRADIATION"],
             "target_col": ["DC_POWER"],
            "feature_params":{
                
         "datetime_params":{
            "hour":"numerical",
            "day_of_week":"numerical",
            "month": "numerical"
         
     }
             }
          }
          } 
    }

      
  if weight_path:
    the_config4["weight_path"] = weight_path
  wandb.config.update(the_config4)
  print("config made")
  return the_config4
  
wandb_sweep_config_full = {
  "name": "Default sweep",
  "method": "grid",
  "parameters": {
        "forecast_length":{
            "values":[1]}
            ,
        "batch_size": {
            "values": [10, 20, 40]
        },
        "lr":{
            "values":[0.001, 0.0001, .01]
        },
        "forecast_history":{
            "values":[14, 20, 24, 48]
        },
        "out_seq_length":{
            "values":[1]
        },
        "hidden_encoder":
        {
            "values":[32, 64, 128, 256]
        },
        "hidden_decoder":
        {
            # 32, 64, 128 
            "values":[32, 64, 128, 256]
        }, 
        "dropout":
        {
            "values": [0.1, 0.3, 0.5, 0.8]
        }
  
    }
}

In [None]:
import wandb
from kaggle_secrets import UserSecretsClient
import os
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("WANDB_KEY")
os.environ["WANDB_API_KEY"] = secret_value_0
sweep_id = wandb.sweep(wandb_sweep_config_full, project="solar_tutorial")
file_path = "example.csv"
wandb.agent(sweep_id, lambda: train_function("PyTorch", make_config_file(file_path, "1BY6WEcLGh8j5v7", "s", None)))

## Debugging COnfing

In [None]:
def make_config_file(flow_file_path, gage_id, station_id, weight_path=None, pretrained=[]):
  run = wandb.init(project="solar_tutorial")
  wandb_config = run.config
  the_wandb_c = run.config
  print(wandb_config)
  the_config4 = {"model_name": "DARNN",
        "model_type": "PyTorch",
        "model_params": {
        "n_time_series":7,
        "forecast_history":14,
        "hidden_size_encoder":32,
        "decoder_hidden_size": 32,
        "out_feats": 1,
        "dropout": 0.1,
        "gru_lstm": False},
       "dataset_params":{"class": "default",
       "num_workers":2,
       "pin_memory": True,
       "training_path": flow_file_path,
       "validation_path": flow_file_path,
       "test_path": flow_file_path,
       "batch_size":20,
       "forecast_history":14 - 1,
       "forecast_length":1,
       "scaler": "StandardScaler",
       "train_start":0,
       "train_end": 1500,
       "valid_start":1501,
       "valid_end": 1750,
       "sort_column": "DATE_TIME",
       "test_start": 1751,
       "test_end":2099,
       "target_col": ["DC_POWER"],
       "relevant_cols": ["DC_POWER", "AMBIENT_TEMPERATURE", "MODULE_TEMPERATURE", "IRRADIATION"], 
       "interpolate": False,
      "feature_param":
          {
            "datetime_params":{
            "hour":"numerical",
            "day_of_week":"numerical",
            "month": "numerical"
            }
          }
       },
    "training_params":
    {
       "criterion":"MSE",
       "optimizer": "SGD",
       #"criterion_params":{"baseline_method":"mean"},
    "optim_params":{
       "lr": 0.01
    },
       "epochs": 12,
       "batch_size":10
    },
    "early_stopping":{
        "patience":3
    },
    "GCS": True,
    "sweep":True,
    "wandb":False,
    "forward_params":{},
   "metrics":["MSE", "MAPE"],
   "inference_params":
   {     
         "datetime_start":"2020-06-17 08:00:00",
          "hours_to_forecast":30, 
          "num_prediction_samples": 20,
          "test_csv_path":flow_file_path,
          "decoder_params":{
            "decoder_function": "simple_decode", 
            "unsqueeze_dim": 1},
          "dataset_params":{
             "file_path": flow_file_path,
             "sort_column": "DATE_TIME",
             "scaling": "StandardScaler",
             "forecast_history": 14 - 1,
             "forecast_length":1,
             "relevant_cols": ["DC_POWER", "AMBIENT_TEMPERATURE", "MODULE_TEMPERATURE", "IRRADIATION"],
             "target_col": ["DC_POWER"],
             "interpolate": False,
            "feature_params":{
                
         "datetime_params":{
            "hour":"numerical",
            "day_of_week":"numerical",
            "month": "numerical"
         
     }
             }
          }
          } 
    }

      
  if weight_path:
    the_config4["weight_path"] = weight_path
  wandb.config.update(the_config4)
  print("config made")
  return the_config4

In [None]:
train_function("PyTorch", make_config_file(file_path, "1BY6WEcLGh8j5v7", "s", None))