In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from transform_output_format import get_4D_output, get_2D_output
from sklearn.base import clone
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import HuberRegressor
from sklearn.linear_model import LassoLars
from sklearn.linear_model import PassiveAggressiveRegressor
from sklearn.linear_model import SGDRegressor
from utils import load_data_input
import pickle
from sklearn.model_selection import train_test_split

In [2]:
GHI,CLS,SZA,SAA,dates = load_data_input("X_train_copernicus.npz")
y_train_csv = pd.read_csv('y_train_zRvpCeO_nQsYtKN.csv')
y_train_4D = get_4D_output(y_train_csv)

In [3]:

X = np.concatenate([GHI,CLS,SZA,SAA], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y_train_4D, test_size=0.33, random_state=42)

def prepare_data(sequence):
    """_summary_

    Args:
        sequence (array(nb_examples,nb_img,81,81)): _description_
    """
    nb_samples, nb_img, size1, size2 = sequence.shape
    seq_swap = sequence.swapaxes(1,2).swapaxes(2,3)
    return seq_swap.reshape((nb_samples*size1*size2,nb_img))


In [4]:
X_train.shape

(1236, 28, 81, 81)

In [30]:
model= ElasticNet(tol=1e-2, l1_ratio=0.5)
X_train_reshape = prepare_data(X_train[:,:,15:66,15:66])
y_train_reshape = prepare_data(y_train[:])
y_train_reshape.shape

(3214836, 4)

In [31]:
model.fit(X_train_reshape,y_train_reshape)

  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


ElasticNet(tol=0.01)

In [34]:
print(model.coef_.shape)
print(model.intercept_.shape)

(4, 28)
(4,)


In [35]:
X_test_reshape = prepare_data(X_test[:,:,15:66,15:66])
y_test_reshape = prepare_data(y_test)
model.score(X_test_reshape,y_test_reshape)

0.972836658471628

In [None]:
0.9758750168814646

In [5]:
# %%
filename = 'linearreg_model.sav'
pickle.dump(model, open(filename, 'wb'))
# %%
loaded_model = pickle.load(open(filename, 'rb'))
GHI_test,CLS_test,SZA_test,SAA_test,dates_test = load_data_input("X_test_copernicus.npz")

In [8]:
GHI_test_r = prepare_data(GHI_test[:,:,15:66,15:66])
CLS_test_r = prepare_data(CLS_test[:,:,15:66,15:66])
X_test = np.concatenate([GHI_test_r, CLS_test_r], axis=1)
# %%
y_predict = model.predict(X_test)
y_preds = y_predict.reshape(1841,4,51,51)

In [36]:
def get_models(models=dict()):
	# linear models
	models['lr'] = LinearRegression()
	models['sgd'] = SGDRegressor(max_iter=1000, tol=1e-3)
	models['en'] = ElasticNet()
	models['huber'] = HuberRegressor()
	models['llars'] = LassoLars()
	models['pa'] = PassiveAggressiveRegressor(max_iter=1000, tol=1e-3)
	print('Defined %d models' % len(models))
	return models
 
# %%
models = get_models()
# %%
# fit a single model
def fit_model(model, X, y):
	# clone the model configuration
	local_model = clone(model)
	# fit the model
	local_model.fit(X, y)
	return local_model
 
# fit one model for each variable and each forecast lead time [var][time][model]
def fit_models(model, train):
	# prepare structure for saving models
	models = [[list() for _ in range(train.shape[1])] for _ in range(train.shape[0])]
	# enumerate vars
	for i in range(train.shape[0]):
		# enumerate lead times
		for j in range(train.shape[1]):
			# get data
			data = train[i, j]
			X, y = data[:, :-1], data[:, -1]
			# fit model
			local_model = fit_model(model, X, y)
			models[i][j].append(local_model)
	return models

Defined 6 models


In [39]:
def train_models(models_dict:dict, X_train, y_train, X_test, y_test):
    for model_name, model in models_dict.items():
        fitted_model = fit_model(model,X_train, y_train[:,3])
        print(f'{model_name} : {fitted_model.score(X_test,y_test[:,3])}')

In [40]:
train_models(models,X_train_reshape,y_train_reshape, X_test_reshape, y_test_reshape)

lr : 0.9543696372527282
