In [94]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import math

dataset = pd.read_csv('CSIRO_Recons_gmsl_yr_2019.csv', header=0)
dataset.head()

dataset["Time"] = [math.floor(x) for x in dataset["Time"]]
dataset

Unnamed: 0,Time,GMSL (mm),GMSL uncertainty (mm)
0,1880,-30.3,24.2
1,1881,-24.7,24.2
2,1882,-41.5,23.0
3,1883,-36.2,22.8
4,1884,-15.3,22.2
...,...,...,...
135,2015,207.8,7.1
136,2016,208.1,7.2
137,2017,215.1,7.5
138,2018,216.6,7.9


In [95]:
import pandas as pd

slid_window = pd.DataFrame({
    "t_2": dataset.head(21)["GMSL (mm)"].shift(2), 
    "t_1": dataset.head(21)["GMSL (mm)"].shift(), 
    "GMSL data": dataset.head(21)["GMSL (mm)"]
})

slid_window

Unnamed: 0,t_2,t_1,GMSL data
0,,,-30.3
1,,-30.3,-24.7
2,-30.3,-24.7,-41.5
3,-24.7,-41.5,-36.2
4,-41.5,-36.2,-15.3
5,-36.2,-15.3,-16.8
6,-15.3,-16.8,-19.2
7,-16.8,-19.2,-24.8
8,-19.2,-24.8,-22.7
9,-24.8,-22.7,-21.1


In [96]:
! pip install tsextract



In [97]:
from tsextract.feature_extraction.extract import build_features

features_request = {
    "window":[10]
}

features = build_features(dataset["GMSL (mm)"], features_request, include_tzero=False)
features

Unnamed: 0_level_0,T-10,T-9,T-8,T-7,T-6,T-5,T-4,T-3,T-2,T-1,Target_Tplus3
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
10,-30.3,-24.7,-41.5,-36.2,-15.3,-16.8,-19.2,-24.8,-22.7,-21.1,-12.9
11,-24.7,-41.5,-36.2,-15.3,-16.8,-19.2,-24.8,-22.7,-21.1,-19.1,-22.6
12,-41.5,-36.2,-15.3,-16.8,-19.2,-24.8,-22.7,-21.1,-19.1,-20.8,-10.8
13,-36.2,-15.3,-16.8,-19.2,-24.8,-22.7,-21.1,-19.1,-20.8,-17.6,-18.4
14,-15.3,-16.8,-19.2,-24.8,-22.7,-21.1,-19.1,-20.8,-17.6,-12.9,-13.2
...,...,...,...,...,...,...,...,...,...,...,...
132,159.6,168.1,166.8,167.2,171.3,172.7,181.1,187.5,194.2,195.4,207.8
133,168.1,166.8,167.2,171.3,172.7,181.1,187.5,194.2,195.4,202.4,208.1
134,166.8,167.2,171.3,172.7,181.1,187.5,194.2,195.4,202.4,200.2,215.1
135,167.2,171.3,172.7,181.1,187.5,194.2,195.4,202.4,200.2,210.9,216.6


In [98]:
from tsextract.feature_extraction.extract import build_features
from tsextract.domain.statistics import median, mean, skew, kurtosis
from tsextract.domain.temporal import abs_energy

features_request = {
    "window":[2], 
    "window_statistic":[24, median], 
    "difference":[12, 10],
    "difference_statistic":[15, 10, abs_energy], 
}

features_request

{'window': [2],
 'window_statistic': [24, <function tsextract.domain.statistics.median(data)>],
 'difference': [12, 10],
 'difference_statistic': [15,
  10,
  <function tsextract.domain.temporal.abs_energy(data)>]}

In [99]:
features_1 = build_features(dataset["GMSL (mm)"], features_request, include_tzero=False)
features_1

Unnamed: 0_level_0,T-2,T-1,window_statistic_24_median,difference_12_10-2,difference_12_10-1,difference_statistic_15_10_abs_energy,Target_Tplus3
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
24,2.5,10.5,-18.00,23.4,22.8,2123.86,0.1
25,10.5,0.2,-17.20,22.8,5.5,1851.35,-2.4
26,0.2,-5.3,-16.05,5.5,19.9,1897.67,2.1
27,-5.3,1.5,-14.25,19.9,13.3,1670.55,2.0
28,1.5,0.1,-13.05,13.3,1.4,1124.95,10.3
...,...,...,...,...,...,...,...
132,194.2,195.4,150.15,38.7,42.8,8645.43,207.8
133,195.4,202.4,153.95,42.8,32.1,7800.95,208.1
134,202.4,200.2,158.15,32.1,44.1,8153.75,215.1
135,200.2,210.9,163.20,44.1,40.6,7953.11,216.6


In [100]:
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

from sklearn.ensemble import GradientBoostingRegressor

scaler_features = StandardScaler().fit(features_1[features_1.columns.values[:-1]])
scaled_features = scaler_features.transform(features_1[features_1.columns.values[:-1]])

scaler_label = StandardScaler().fit(np.array(features_1[features_1.columns.values[-1]]).reshape(-1, 1))
scaled_label = scaler_label.transform(np.array(features_1[features_1.columns.values[-1]]).reshape(-1, 1))


### Split data using train proportion of 0.7
train_size = int(scaled_features[:, :-1].shape[0] * 0.7)

X_train, y_train = scaled_features[:train_size, :-1], scaled_label[:train_size, :]
X_test, y_test = scaled_features[train_size:, :-1], scaled_label[train_size:, :]

X_test

array([[ 0.78010404,  0.63927754,  0.60584761, -0.18345867,  1.09800092],
       [ 0.67945437,  0.78310022,  0.66834657,  1.12184936, -0.04859393],
       [ 0.82522286,  0.76769065,  0.75721229, -0.03842444, -0.82792012],
       [ 0.8096048 ,  0.59133665,  0.83142981, -0.82704805, -0.68459577],
       [ 0.63086488,  0.6016097 ,  0.86170274, -0.68201382, -0.478567  ],
       [ 0.64127691,  0.61188275,  0.87830403, -0.47352712, -0.63084913],
       [ 0.65168895,  0.69235496,  0.89002259, -0.62762599,  0.21118146],
       [ 0.73324988,  0.76940282,  0.91541279,  0.22445009, -0.20087606],
       [ 0.81134014,  0.79337327,  0.951545  , -0.19252331, -1.03394889],
       [ 0.83563489,  0.84645068,  0.98279449, -1.03553475, -0.26358047],
       [ 0.8894304 ,  0.89439157,  1.03064525, -0.25597578, -1.41913309],
       [ 0.9380199 ,  0.81734371,  1.07361329, -1.42531423, -1.03394889],
       [ 0.85992964,  0.87555765,  1.08533185, -1.03553475,  0.30075919],
       [ 0.91893117,  0.95431769,  1.1

In [101]:
from keras.models import Sequential
from keras.layers import Dense, Dropout


model = Sequential()
model.add(Dense(128, input_dim=X_train.shape[1], kernel_initializer='normal', activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(64, kernel_initializer='normal', activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(1, kernel_initializer='normal'))

# Compile model
model.compile(loss='mean_squared_error', optimizer='adam')

history = model.fit(x=X_train, y=y_train, batch_size=32, epochs=100, verbose=1)

ModuleNotFoundError: No module named 'tensorflow'