In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from transform_output_format import get_4D_output, get_2D_output
from sklearn.base import clone
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import HuberRegressor
from sklearn.linear_model import LassoLars
from sklearn.linear_model import PassiveAggressiveRegressor
from sklearn.linear_model import SGDRegressor
from utils import load_data_input
import pickle
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.multioutput import MultiOutputRegressor

In [2]:
GHI,CLS,SZA,SAA,dates = load_data_input("/home/jambe/solar-forecasting/X_train_copernicus.npz")
y_train_csv = pd.read_csv('/home/jambe/solar-forecasting/y_train_zRvpCeO_nQsYtKN.csv')
y_train_4D = get_4D_output(y_train_csv)

In [3]:

X = np.concatenate([GHI,CLS,SZA,SAA], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y_train_4D, test_size=0.33, random_state=42)

In [4]:
def prepare_data(sequence):
    """_summary_

    Args:
        sequence (array(nb_examples,nb_img,81,81)): _description_
    """
    nb_samples, nb_img, size1, size2 = sequence.shape
    seq_swap = sequence.swapaxes(1,2).swapaxes(2,3)
    return seq_swap.reshape((nb_samples*size1*size2,nb_img))


In [4]:
X_train.shape

(1236, 28, 81, 81)

In [15]:
def create_model_BT(n_estimators=50, lr=0.1, max_depth=4, subsample=0.5, min_samples_split = 0.05, max_features=0.5, n_jobs=-1, verbose=0):
    model_basis = GradientBoostingRegressor(loss="ls", learning_rate=lr, n_estimators=n_estimators,max_depth=max_depth, subsample=subsample, min_samples_split=min_samples_split, max_features=max_features, verbose=verbose)
    model = MultiOutputRegressor(model_basis, n_jobs=n_jobs)
    return model


In [6]:
model= create_model_BT(n_estimators=50, max_depth=10, subsample=0.5, verbose=1)
X_train_reshape = prepare_data(X_train[:,:,15:66,15:66])
y_train_reshape = prepare_data(y_train[:])
X_test_reshape = prepare_data(X_test[:,:,15:66,15:66])
y_test_reshape = prepare_data(y_test[:])
y_train_reshape.shape

(3214836, 4)

In [8]:
model.fit(X_train_reshape,y_train_reshape)

      Iter       Train Loss      OOB Improve   Remaining Time 
      Iter       Train Loss      OOB Improve   Remaining Time 
      Iter       Train Loss      OOB Improve   Remaining Time 
      Iter       Train Loss      OOB Improve   Remaining Time 
         1       57881.3071       12351.1569           10.31m
         1       54369.8593       12249.6014           10.46m
         1       54780.3195       12637.6317           10.50m
         1       55449.4122       12170.4253           10.65m
         2       44500.1108        9907.6035            9.96m
         2       47782.0482       10039.9006            9.96m
         2       44532.0218       10247.0468           10.06m
         2       45569.1834        9878.8914           10.11m
         3       36406.3410        8078.3637            9.48m
         3       39638.1227        8161.0281            9.48m
         3       36257.3031        8313.4722            9.57m
         3       37523.4794        8027.6567            9.62m
    

MultiOutputRegressor(estimator=GradientBoostingRegressor(loss='ls', max_depth=5,
                                                         n_estimators=25,
                                                         subsample=0.5,
                                                         verbose=1),
                     n_jobs=-1)

In [9]:
model.score(X_test_reshape, y_test_reshape)

0.9666777406834202

In [10]:
filename = 'boosted_trees_0.sav'
pickle.dump(model, open(filename, 'wb'))

In [2]:
loaded_model = pickle.load(open('boosted_trees_0.sav', 'rb'))

In [5]:
#GHI_test,CLS_test,SZA_test,SAA_test,dates_test = load_data_input("/home/jambe/solar-forecasting/X_test_copernicus.npz")
X_test = np.concatenate([GHI_test,CLS_test,SZA_test,SAA_test], axis=1)
X_test_reshape = prepare_data(X_test[:,:,15:66,15:66])


In [6]:
X_test_reshape.shape

(4788441, 28)

In [9]:
y_predict = loaded_model.predict(X_test_reshape)
# y_preds = y_predict.reshape(1841,4,51,51)

In [14]:
print(y_predict.shape)
y_predict = y_predict.reshape(1841, 51, 51, 4)
y_predict = y_predict.swapaxes(2,3).swapaxes(1,2)
y_preds_2D = get_2D_output(y_predict)
y_preds_2D.to_csv('boosted_trees.csv', index=False)

(4788441, 4)
