In [3]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.base import BaseEstimator, TransformerMixin

In [23]:
### Set path to save data folder
fol = '../../offline_data/mlwwcoast_prep_data'

# Select buoy
bb = 3

# Select with variable to predict
myvar = 'u' 

# Data load parameters
back = 24 # Select how many hours to go back in time
forward = 24 # Select forecast hour to predict

# Select file
fload = '{:s}/input_{:s}_buoy{:d}_back{:d}_for{:d}.npz'.format(fol,myvar,bb,back,forward)
D = np.load(fload)

# Keys in D
# X -> History Matrix (time,hr-back) where hr-back = 0,1,2,...
# Y -> Labels, (time,hr-forward) where hr_forward = 1,2,3... (pred we are trying to make)
# F -> Weather Forecast, (time,hr-forward)
# t -> time (time,)
# avg -> daily average, smoothed (time,1)

# Regression Settings
hr_for = 3
hr_back = 23

# Make input
X = D['X'][:,1:hr_back]
f = D['F'][:,hr_for]
f = np.expand_dims(f,1)
X = np.concatenate((X,D['avg']),axis=1)
X = np.concatenate((X,f),axis=1)

# Make labels
y = D['Y'][:,hr_for]

# Train/Test Set
x_tr, x_te, y_tr, y_te = train_test_split(X, y, test_size = 0.25, random_state = 42)

# Creating an object of LinearRegression class
LR = LinearRegression()

# fitting the training data
LR.fit(x_tr,y_tr)

# Make predictions
y_p = np.squeeze(LR.predict(x_te))
y = np.squeeze(y_te)

# Evaluate error
r2 = r2_score(y.T,y_p)
mae = np.mean(np.abs(y.T-y_p))
mae_baseline = np.mean(np.abs(np.squeeze(f)-D['Y'][:,hr_for]))

print(mae,mae_baseline)

1.073423912141214 1.256025449752339


    Custom Transformer

In [24]:
class SelectHours(BaseEstimator, TransformerMixin):
    def __init__(self, hr_back=24):        
        self.hr_back = hr_back
    def fit(self, X, y=None):
        return self
    def transform(self,X):
        return X[:,hr_back-1]
        

In [30]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

my_pipeline = Pipeline([
    ('selecthours',SelectHours()),
    #('std_scaler',StandardScaler()),
])

X_prepared = my_pipeline.fit_transform(X)