Data Challenge : Can you predict the tide ?
Author : Oumeima EL GHARBI
Date : November 2022

# PART 1 : Preprocessing

### Importing libraries

In [1]:
%reset -f

from functions import *
from surge_prediction_metric import *

from sklearn.model_selection import train_test_split
from sklearn.neighbors import BallTree
from sklearn.multioutput import MultiOutputRegressor
from sklearn.kernel_ridge import KernelRidge
from xgboost import XGBRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor


import warnings
warnings.filterwarnings('ignore')
#warnings.filterwarnings(action="once")

%matplotlib inline
%autosave 300

  from pandas import MultiIndex, Int64Index


Autosaving every 300 seconds


In [2]:
# Starting time
t0 = time()

seed = 42 #42
baseline_error = 0.7720

#### Loading dataset

In [3]:
# load raw dataset
input_path = "./dataset/source/"
output_path = "./dataset/output/"


X_train_filename = "X_train_surge_new.npz"
Y_train_filename = "Y_train_surge.csv"
X_test_filename = "X_test_surge_new.npz"

X_train_file = "{}{}".format(input_path, X_train_filename)
Y_train_file = "{}{}".format(input_path, Y_train_filename)
X_test_file = "{}{}".format(input_path, X_test_filename)

In [4]:
X_train = np.load(X_train_file)
Y_train = pd.read_csv(Y_train_file)
X_test = np.load(X_test_file)

In [5]:
X_train

<numpy.lib.npyio.NpzFile at 0x2767bdc9640>

In [6]:
surge_train = np.array(Y_train)[:,1:]

In [7]:
surge_train

array([[ 0.58693592,  1.06958024,  0.76792754, ..., -0.42270688,
        -0.45623606, -0.82505705],
       [ 0.76792754, -0.1001619 ,  0.07077463, ..., -0.82505705,
        -0.99270295, -0.99270295],
       [ 0.07077463, -0.24428486, -0.35489084, ..., -0.99270295,
        -0.32211934, -0.88373311],
       ...,
       [ 0.48303332,  0.53330877,  1.44832196, ...,  1.22022298,
         1.44654495,  2.67036005],
       [ 1.44832196,  1.82036029,  1.66283054, ...,  2.67036005,
         2.62006628,  2.67874235],
       [ 1.66283054,  1.52876268,  1.28408882, ...,  2.67874235,
         1.22022298,  0.72566756]])

In [8]:
np.array(X_train)

array(['id_sequence', 't_slp', 'slp', 't_surge1_input', 'surge1_input',
       't_surge2_input', 'surge2_input', 't_surge1_output',
       't_surge2_output'], dtype='<U15')

In [9]:
pd.DataFrame(X_train)#.shape

Unnamed: 0,0
0,id_sequence
1,t_slp
2,slp
3,t_surge1_input
4,surge1_input
5,t_surge2_input
6,surge2_input
7,t_surge1_output
8,t_surge2_output


In [10]:
X_train['slp'].shape

(5599, 40, 41, 41)

#### Making X or slp_train for X_train and X_test

##### Preparing X_train to train

In [11]:
slp_train = generate_X_train_test(X_train)

slp_train

array([[100849.72, 100797.72, 100754.72, ..., 101970.81, 101981.81,
        101990.81],
       [100940.37, 100881.37, 100823.37, ..., 101515.72, 101562.72,
        101611.72],
       [101862.91, 101879.91, 101894.91, ..., 101005.37, 101038.37,
        101076.37],
       ...,
       [102048.13, 102080.13, 102106.13, ..., 100781.81, 100724.81,
        100664.81],
       [102111.95, 102134.95, 102149.95, ..., 101352.13, 101376.13,
        101392.13],
       [101972.53, 101984.53, 101993.53, ..., 100412.28, 100450.28,
        100483.28]], dtype=float32)

##### Preparing X_test for the submission

In [12]:
slp_test = generate_X_train_test(X_test)
X_test['id_sequence']

array([5600, 5601, 5602, 5603, 5604, 5605, 5606, 5607, 5608, 5609, 5610,
       5611, 5612, 5613, 5614, 5615, 5616, 5617, 5618, 5619, 5620, 5621,
       5622, 5623, 5624, 5625, 5626, 5627, 5628, 5629, 5630, 5631, 5632,
       5633, 5634, 5635, 5636, 5637, 5638, 5639, 5640, 5641, 5642, 5643,
       5644, 5645, 5646, 5647, 5648, 5649, 5650, 5651, 5652, 5653, 5654,
       5655, 5656, 5657, 5658, 5659, 5660, 5661, 5662, 5663, 5664, 5665,
       5666, 5667, 5668, 5669, 5670, 5671, 5672, 5673, 5674, 5675, 5676,
       5677, 5678, 5679, 5680, 5681, 5682, 5683, 5684, 5685, 5686, 5687,
       5688, 5689, 5690, 5691, 5692, 5693, 5694, 5695, 5696, 5697, 5698,
       5699, 5700, 5701, 5702, 5703, 5704, 5705, 5706, 5707, 5708, 5709,
       5710, 5711, 5712, 5713, 5714, 5715, 5716, 5717, 5718, 5719, 5720,
       5721, 5722, 5723, 5724, 5725, 5726, 5727, 5728, 5729, 5730, 5731,
       5732, 5733, 5734, 5735, 5736, 5737, 5738, 5739, 5740, 5741, 5742,
       5743, 5744, 5745, 5746, 5747, 5748, 5749, 57

#### Making y or surge_train / Y_train

In [13]:
surge_train = generate_Y_train(Y_train)
surge_train

array([[ 0.58693592,  1.06958024,  0.76792754, ..., -0.42270688,
        -0.45623606, -0.82505705],
       [ 0.76792754, -0.1001619 ,  0.07077463, ..., -0.82505705,
        -0.99270295, -0.99270295],
       [ 0.07077463, -0.24428486, -0.35489084, ..., -0.99270295,
        -0.32211934, -0.88373311],
       ...,
       [ 0.48303332,  0.53330877,  1.44832196, ...,  1.22022298,
         1.44654495,  2.67036005],
       [ 1.44832196,  1.82036029,  1.66283054, ...,  2.67036005,
         2.62006628,  2.67874235],
       [ 1.66283054,  1.52876268,  1.28408882, ...,  2.67874235,
         1.22022298,  0.72566756]])

In [14]:
surge_train_df = Y_train_to_dataframe(surge_train, X_train)
surge_train_df

Unnamed: 0,surge1_t0,surge1_t1,surge1_t2,surge1_t3,surge1_t4,surge1_t5,surge1_t6,surge1_t7,surge1_t8,surge1_t9,surge2_t0,surge2_t1,surge2_t2,surge2_t3,surge2_t4,surge2_t5,surge2_t6,surge2_t7,surge2_t8,surge2_t9
1,0.586936,1.069580,0.767928,-0.100162,0.070775,-0.244285,-0.354891,-0.928031,-0.773853,-0.375001,-0.053886,0.356847,0.348464,0.264641,0.901696,0.449052,0.113760,-0.422707,-0.456236,-0.825057
2,0.767928,-0.100162,0.070775,-0.244285,-0.354891,-0.928031,-0.773853,-0.375001,-0.361594,-0.210768,0.348464,0.264641,0.901696,0.449052,0.113760,-0.422707,-0.456236,-0.825057,-0.992703,-0.992703
3,0.070775,-0.244285,-0.354891,-0.928031,-0.773853,-0.375001,-0.361594,-0.210768,0.288635,-0.726929,0.901696,0.449052,0.113760,-0.422707,-0.456236,-0.825057,-0.992703,-0.992703,-0.322119,-0.883733
4,-0.354891,-0.928031,-0.773853,-0.375001,-0.361594,-0.210768,0.288635,-0.726929,-0.576103,1.160076,0.113760,-0.422707,-0.456236,-0.825057,-0.992703,-0.992703,-0.322119,-0.883733,-0.473001,-0.422707
5,-0.773853,-0.375001,-0.361594,-0.210768,0.288635,-0.726929,-0.576103,1.160076,0.442813,0.305393,-0.456236,-0.825057,-0.992703,-0.992703,-0.322119,-0.883733,-0.473001,-0.422707,-0.473001,-0.531677
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5595,1.451674,1.220407,0.556771,1.709754,0.483033,0.533309,1.448322,1.820360,1.662831,1.528763,0.759197,0.884931,0.298171,-0.397560,-0.540059,-0.498148,-0.498148,-0.405942,0.164054,0.331700
5596,0.556771,1.709754,0.483033,0.533309,1.448322,1.820360,1.662831,1.528763,1.284089,0.070775,0.298171,-0.397560,-0.540059,-0.498148,-0.498148,-0.405942,0.164054,0.331700,1.052577,1.220223
5597,0.483033,0.533309,1.448322,1.820360,1.662831,1.528763,1.284089,0.070775,0.888589,1.545521,-0.540059,-0.498148,-0.498148,-0.405942,0.164054,0.331700,1.052577,1.220223,1.446545,2.670360
5598,1.448322,1.820360,1.662831,1.528763,1.284089,0.070775,0.888589,1.545521,1.166779,0.653970,-0.498148,-0.405942,0.164054,0.331700,1.052577,1.220223,1.446545,2.670360,2.620066,2.678742


#### Splitting train / test set using X_train and Y_train

In [15]:
X_train_surge, X_val_surge, y_train_surge, y_val_surge = train_test_split(slp_train, surge_train_df, test_size=0.3, random_state=seed)

In [16]:
slp_train.shape

(5599, 3362)

In [17]:
surge_train.shape

(5599, 20)

In [18]:
X_train_surge.shape

(3919, 3362)

In [19]:
y_val_surge.shape

(1680, 20)

In [20]:
X_train_surge

array([[102176.35, 102156.35, 102133.35, ..., 101640.01, 101663.01,
        101685.01],
       [101805.57, 101868.57, 101921.57, ..., 100350.2 , 100314.2 ,
        100281.2 ],
       [102114.1 , 102082.1 , 102049.1 , ..., 101215.24, 101180.24,
        101145.24],
       ...,
       [101786.12, 101687.12, 101576.12, ..., 100236.8 , 100318.8 ,
        100393.8 ],
       [102499.41, 102499.41, 102497.41, ..., 100472.49, 100492.49,
        100510.49],
       [101569.21, 101633.21, 101687.21, ...,  98750.56,  98741.56,
         98742.56]], dtype=float32)

In [21]:
y_val_surge

Unnamed: 0,surge1_t0,surge1_t1,surge1_t2,surge1_t3,surge1_t4,surge1_t5,surge1_t6,surge1_t7,surge1_t8,surge1_t9,surge2_t0,surge2_t1,surge2_t2,surge2_t3,surge2_t4,surge2_t5,surge2_t6,surge2_t7,surge2_t8,surge2_t9
4441,-0.167196,-0.234230,-0.110217,-0.029776,0.047313,-0.046535,-0.354891,-0.405166,-0.314670,-0.690061,-0.104180,-0.162856,-0.104180,-0.548441,-0.783146,-0.481383,-0.774763,-0.422707,-0.296972,0.398758
2472,0.238359,2.105255,0.888589,1.062877,1.103097,-1.638591,-1.145891,-0.321374,-0.579455,-0.666599,-1.336377,-0.967556,-0.892115,-0.011974,-0.095797,0.105378,-0.095797,-0.162856,0.113760,0.331700
3317,-0.096810,-0.100162,-0.103514,0.265173,-0.244285,-0.321374,-0.552641,-0.579455,-0.646488,-0.110217,1.002283,0.960372,0.893313,0.943607,0.700521,0.457434,0.365229,0.608315,0.289788,0.390376
81,1.331013,0.553419,1.220407,0.637211,-0.013018,-0.150437,0.141160,0.198139,0.060720,-0.371649,-0.925645,-1.403435,-1.252554,-1.420200,-1.026232,-0.833439,-0.045504,-0.028739,-0.347266,-0.975938
4806,1.776788,3.053785,3.516319,2.557734,0.922106,2.068386,1.713106,1.153373,2.406907,1.947725,0.901696,1.236988,0.717285,0.650227,0.708903,0.356847,0.474199,0.222730,0.247877,0.055084
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1392,-1.440841,-1.135836,-0.760446,-0.747039,-0.271098,-0.499014,-0.539234,-0.696764,-0.874404,-0.901217,1.295664,1.589044,1.228605,0.273024,-0.305355,-0.355649,-0.716087,-1.051379,-0.489765,-0.188003
4351,-0.116920,-0.328077,-0.371649,-0.455442,-0.455442,-0.499014,-0.143734,-0.009666,-0.096810,-0.485607,-0.364031,-0.322119,-0.489765,-0.791528,-0.556824,-0.690940,-0.422707,-0.674176,-0.196385,-0.313737
221,-1.420730,-1.561502,-1.702273,-1.826286,-1.779362,-1.283311,-1.249794,-0.921328,-0.991713,-0.777205,-0.380795,0.843020,-0.028739,-0.120944,-0.540059,-0.313737,-1.219025,-1.235790,-1.587846,-1.638140
1746,-0.163844,-0.458793,-0.006314,-0.090107,-0.250988,-0.368298,-0.214120,0.208194,0.074126,-0.056590,0.423905,0.524492,0.750814,0.532875,1.060959,0.641845,0.708903,0.775961,0.616698,0.474199


In [22]:
pd.DataFrame(X_train["id_sequence"])

Unnamed: 0,0
0,1
1,2
2,3
3,4
4,5
...,...
5594,5595
5595,5596
5596,5597
5597,5598


In [23]:
pd.DataFrame(Y_train)

Unnamed: 0,id_sequence,surge1_t0,surge1_t1,surge1_t2,surge1_t3,surge1_t4,surge1_t5,surge1_t6,surge1_t7,surge1_t8,...,surge2_t0,surge2_t1,surge2_t2,surge2_t3,surge2_t4,surge2_t5,surge2_t6,surge2_t7,surge2_t8,surge2_t9
0,1,0.586936,1.069580,0.767928,-0.100162,0.070775,-0.244285,-0.354891,-0.928031,-0.773853,...,-0.053886,0.356847,0.348464,0.264641,0.901696,0.449052,0.113760,-0.422707,-0.456236,-0.825057
1,2,0.767928,-0.100162,0.070775,-0.244285,-0.354891,-0.928031,-0.773853,-0.375001,-0.361594,...,0.348464,0.264641,0.901696,0.449052,0.113760,-0.422707,-0.456236,-0.825057,-0.992703,-0.992703
2,3,0.070775,-0.244285,-0.354891,-0.928031,-0.773853,-0.375001,-0.361594,-0.210768,0.288635,...,0.901696,0.449052,0.113760,-0.422707,-0.456236,-0.825057,-0.992703,-0.992703,-0.322119,-0.883733
3,4,-0.354891,-0.928031,-0.773853,-0.375001,-0.361594,-0.210768,0.288635,-0.726929,-0.576103,...,0.113760,-0.422707,-0.456236,-0.825057,-0.992703,-0.992703,-0.322119,-0.883733,-0.473001,-0.422707
4,5,-0.773853,-0.375001,-0.361594,-0.210768,0.288635,-0.726929,-0.576103,1.160076,0.442813,...,-0.456236,-0.825057,-0.992703,-0.992703,-0.322119,-0.883733,-0.473001,-0.422707,-0.473001,-0.531677
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5594,5595,1.451674,1.220407,0.556771,1.709754,0.483033,0.533309,1.448322,1.820360,1.662831,...,0.759197,0.884931,0.298171,-0.397560,-0.540059,-0.498148,-0.498148,-0.405942,0.164054,0.331700
5595,5596,0.556771,1.709754,0.483033,0.533309,1.448322,1.820360,1.662831,1.528763,1.284089,...,0.298171,-0.397560,-0.540059,-0.498148,-0.498148,-0.405942,0.164054,0.331700,1.052577,1.220223
5596,5597,0.483033,0.533309,1.448322,1.820360,1.662831,1.528763,1.284089,0.070775,0.888589,...,-0.540059,-0.498148,-0.498148,-0.405942,0.164054,0.331700,1.052577,1.220223,1.446545,2.670360
5597,5598,1.448322,1.820360,1.662831,1.528763,1.284089,0.070775,0.888589,1.545521,1.166779,...,-0.498148,-0.405942,0.164054,0.331700,1.052577,1.220223,1.446545,2.670360,2.620066,2.678742


In [24]:
X_train_surge.shape

(3919, 3362)

In [25]:
y_train_surge.shape

(3919, 20)

#### Saving testing set

In [26]:
y_val_surge.to_csv(output_path + "Y_val_true.csv", index_label='id_sequence', sep=',')

# Part II : Predictive modeling

In [27]:
X_train_surge

array([[102176.35, 102156.35, 102133.35, ..., 101640.01, 101663.01,
        101685.01],
       [101805.57, 101868.57, 101921.57, ..., 100350.2 , 100314.2 ,
        100281.2 ],
       [102114.1 , 102082.1 , 102049.1 , ..., 101215.24, 101180.24,
        101145.24],
       ...,
       [101786.12, 101687.12, 101576.12, ..., 100236.8 , 100318.8 ,
        100393.8 ],
       [102499.41, 102499.41, 102497.41, ..., 100472.49, 100492.49,
        100510.49],
       [101569.21, 101633.21, 101687.21, ...,  98750.56,  98741.56,
         98742.56]], dtype=float32)

In [28]:
y_train_surge

Unnamed: 0,surge1_t0,surge1_t1,surge1_t2,surge1_t3,surge1_t4,surge1_t5,surge1_t6,surge1_t7,surge1_t8,surge1_t9,surge2_t0,surge2_t1,surge2_t2,surge2_t3,surge2_t4,surge2_t5,surge2_t6,surge2_t7,surge2_t8,surge2_t9
5569,0.013796,0.000389,0.315448,0.107643,-0.086755,0.010444,0.271876,0.375779,0.281932,0.271876,0.432287,0.516110,0.105378,0.130525,0.440670,0.415523,0.138907,0.298171,0.122142,0.340082
2837,0.845017,0.556771,1.857229,0.372427,-0.914624,-0.304615,-0.411870,-0.750391,-0.626378,-0.559344,-1.302848,-1.042997,-1.084908,-0.783146,-0.992703,-1.571081,-1.403435,-1.017850,-0.313737,-0.598735
84,0.141160,0.198139,0.060720,-0.371649,0.265173,1.297496,1.860581,1.284089,1.274034,-0.133679,-0.045504,-0.028739,-0.347266,-0.975938,-0.908880,-1.219025,-1.260936,-1.068144,-0.732852,-0.196385
3659,-0.854294,-0.904569,-1.008472,-1.078857,-1.072154,-0.669950,-0.562696,-0.006314,0.245063,0.332207,-5.762229,-4.194740,-5.720317,-4.286945,-4.999440,-4.949146,-4.270180,-4.714442,-3.993565,-3.054748
791,-0.814073,-0.807370,-0.961548,-1.162650,-1.380510,-1.112374,-0.626378,-0.123624,0.214898,0.000389,-1.135202,-1.026232,-0.959174,-0.607117,-0.841822,-0.355649,-0.514912,-0.196385,-0.003592,-0.548441
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3773,1.072932,1.049470,0.741114,0.707597,0.848368,0.499792,0.335559,1.485191,1.565631,1.213703,0.465816,-0.540059,-0.674176,-0.975938,-0.489765,-0.372413,0.340082,0.599933,0.348464,0.314935
5192,-0.187306,0.070775,0.241711,0.402593,0.945567,1.448322,0.868478,0.781334,0.204842,-0.039831,0.155671,0.063466,0.390376,0.507728,0.306553,0.365229,0.432287,0.088613,0.080231,-0.179620
5227,-0.180603,-0.086755,0.754521,0.054016,0.975733,0.228304,0.224953,-0.190658,-0.328077,-0.485607,-0.070650,0.281406,-0.020357,0.197583,0.004790,0.490963,0.071849,0.843020,0.457434,0.558022
5391,0.395889,0.808148,0.590288,0.348965,0.077478,0.238359,0.188084,0.543364,1.062877,0.295338,0.901696,0.901696,0.331700,0.692138,-0.196385,-0.296972,-0.850204,-0.598735,-0.892115,-0.598735


#### Baseline / Benchmark : Ball Tree

#### 1) Kernel Ridge

##### Hyperparameters tuning

#### 2) XGBoost

#### 3) SVR : 0.72 without tuning

##### Tuning

#### 4) MLRegressor : supports multioutput

##### Hyperparameters tuning

#### 5) Neural Network

In [29]:
X_train_surge.shape[1]

3362

In [None]:
from keras.callbacks import ModelCheckpoint
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten

In [30]:
#NN_model.fit(X_train_surge, y_train_surge, epochs=500, batch_size=32, validation_split = 0.2, callbacks=callbacks_list)


## Neural Networks : https://www.nature.com/articles/s41598-021-96674-0
1) Artificial Neural Network (ANN)
2) a Long Term Short Term Neural Network (LSTM)
3) a Convolutional Neural Network (CNN)
4) a Convolutional LSTM (ConvLSTM), which is a combination of the latter two. In

In [31]:
x, y = slp_train, surge_train_df
print(x.shape)
print(y.shape)

(5599, 3362)
(5599, 20)


In [32]:
#The LSTM model input dimension requires the third dimension that will be the number of the single input row. We'll reshape the x data.

x = x.reshape(x.shape[0], x.shape[1], 1)
print("x:", x.shape, "y:", y.shape)

x: (5599, 3362, 1) y: (5599, 20)


In [36]:
in_dim = (x.shape[1], x.shape[2])
out_dim = y.shape[1]

print(in_dim)
print(out_dim)

(3362, 1)
20


In [37]:
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.3, random_state=seed)


In [38]:
from keras.models import Sequential
from keras.layers import Dense, LSTM
from sklearn.metrics import mean_squared_error

We'll start by defining the sequential model. The sequential model contains LSTM layers with ReLU activations, Dense output layer,  and Adam optimizer with MSE loss function. We'll set the input dimension in the first layer and output dimension in the last layer of the model.

In [39]:
model = Sequential()
model.add(LSTM(64, input_shape=in_dim, activation="relu"))
model.add(Dense(out_dim))
model.compile(loss="mse", optimizer="adam")

In [40]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 64)                16896     
                                                                 
 dense (Dense)               (None, 20)                1300      
                                                                 
Total params: 18,196
Trainable params: 18,196
Non-trainable params: 0
_________________________________________________________________


In [41]:
model.fit(xtrain, ytrain, epochs=100, batch_size=12, verbose=0)


KeyboardInterrupt



In [None]:
# 2) save model
path_model = './model/'
dict_models = {'LSTM': model}
for name, model in dict_models.items():
    filename = path_model + name + '.sav'
    # save the model to disk
    pickle.dump(model, open(filename, 'wb'))

In [42]:
ypred = model.predict(xtest)

print("y1 MSE:%.4f" % mean_squared_error(ytest[:,0], ypred[:,0]))
print("y2 MSE:%.4f" % mean_squared_error(ytest[:,1], ypred[:,1]))




InvalidIndexError: (slice(None, None, None), 0)

In [None]:
# 4) save
save_y_pred("LSTM", output_path, ypred, y_val_surge.index, submit=False)

# 5) evaluate
evaluate_surge(y_pred_filename="Y_val_pred_LSTM.csv", y_true_filename="Y_val_true.csv", path_output=output_path)

In [43]:
ypred

array([[nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan]], dtype=float32)

In [None]:
x_ax = range(len(xtest))
plt.title("LSTM multi-output prediction")
plt.scatter(x_ax, ytest[:,0],  s=6, label="y1-test")
plt.plot(x_ax, ypred[:,0], label="y1-pred")
plt.scatter(x_ax, ytest[:,1],  s=6, label="y2-test")
plt.plot(x_ax, ypred[:,1], label="y2-pred")
plt.legend()
plt.show()

#### Saving models

# Part III : Submission prediction

#### Predicting Y_test for each model using models trained using split X_train

#### Predicting Y_test for the best model using all of X_train to train : no gain for XGBoost / SVR ?

In [33]:
slp_train

array([[100849.72, 100797.72, 100754.72, ..., 101970.81, 101981.81,
        101990.81],
       [100940.37, 100881.37, 100823.37, ..., 101515.72, 101562.72,
        101611.72],
       [101862.91, 101879.91, 101894.91, ..., 101005.37, 101038.37,
        101076.37],
       ...,
       [102048.13, 102080.13, 102106.13, ..., 100781.81, 100724.81,
        100664.81],
       [102111.95, 102134.95, 102149.95, ..., 101352.13, 101376.13,
        101392.13],
       [101972.53, 101984.53, 101993.53, ..., 100412.28, 100450.28,
        100483.28]], dtype=float32)

In [34]:
surge_train_df

Unnamed: 0,surge1_t0,surge1_t1,surge1_t2,surge1_t3,surge1_t4,surge1_t5,surge1_t6,surge1_t7,surge1_t8,surge1_t9,surge2_t0,surge2_t1,surge2_t2,surge2_t3,surge2_t4,surge2_t5,surge2_t6,surge2_t7,surge2_t8,surge2_t9
1,0.586936,1.069580,0.767928,-0.100162,0.070775,-0.244285,-0.354891,-0.928031,-0.773853,-0.375001,-0.053886,0.356847,0.348464,0.264641,0.901696,0.449052,0.113760,-0.422707,-0.456236,-0.825057
2,0.767928,-0.100162,0.070775,-0.244285,-0.354891,-0.928031,-0.773853,-0.375001,-0.361594,-0.210768,0.348464,0.264641,0.901696,0.449052,0.113760,-0.422707,-0.456236,-0.825057,-0.992703,-0.992703
3,0.070775,-0.244285,-0.354891,-0.928031,-0.773853,-0.375001,-0.361594,-0.210768,0.288635,-0.726929,0.901696,0.449052,0.113760,-0.422707,-0.456236,-0.825057,-0.992703,-0.992703,-0.322119,-0.883733
4,-0.354891,-0.928031,-0.773853,-0.375001,-0.361594,-0.210768,0.288635,-0.726929,-0.576103,1.160076,0.113760,-0.422707,-0.456236,-0.825057,-0.992703,-0.992703,-0.322119,-0.883733,-0.473001,-0.422707
5,-0.773853,-0.375001,-0.361594,-0.210768,0.288635,-0.726929,-0.576103,1.160076,0.442813,0.305393,-0.456236,-0.825057,-0.992703,-0.992703,-0.322119,-0.883733,-0.473001,-0.422707,-0.473001,-0.531677
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5595,1.451674,1.220407,0.556771,1.709754,0.483033,0.533309,1.448322,1.820360,1.662831,1.528763,0.759197,0.884931,0.298171,-0.397560,-0.540059,-0.498148,-0.498148,-0.405942,0.164054,0.331700
5596,0.556771,1.709754,0.483033,0.533309,1.448322,1.820360,1.662831,1.528763,1.284089,0.070775,0.298171,-0.397560,-0.540059,-0.498148,-0.498148,-0.405942,0.164054,0.331700,1.052577,1.220223
5597,0.483033,0.533309,1.448322,1.820360,1.662831,1.528763,1.284089,0.070775,0.888589,1.545521,-0.540059,-0.498148,-0.498148,-0.405942,0.164054,0.331700,1.052577,1.220223,1.446545,2.670360
5598,1.448322,1.820360,1.662831,1.528763,1.284089,0.070775,0.888589,1.545521,1.166779,0.653970,-0.498148,-0.405942,0.164054,0.331700,1.052577,1.220223,1.446545,2.670360,2.620066,2.678742


###  End of notebook

In [None]:
# End of pipeline time
t1 = time()
print("computing time : {:8.6f} sec".format(t1 - t0))
print("computing time : " + strftime('%H:%M:%S', gmtime(t1 - t0)))