In [None]:
pip install scalecast --upgrade

In [None]:
import tensorflow as tf
from scalecast.Forecaster import Forecaster
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import pickle
import seaborn as sns

# Load dataset and quick check
df = pd.read_excel('time_series_north_extent.xlsx')
df.index = pd.to_datetime(df['DATE'])
df.head()

In [None]:
# Call the Forecaster object with the y and current_dates parameters specified
f = Forecaster(
y=df['extent'],
current_dates=df['DATE'])
print(f)

In [None]:
# Training and Test datasets
from sklearn.model_selection import train_test_split

train_lstm, val_lstm = train_test_split(df, test_size=0.2,shuffle=False)

In [None]:
# Looking for the best model test 6
from tensorflow.keras.callbacks import EarlyStopping

f.set_test_length(len(val_lstm))       # 1. Observations to test the results
f.generate_future_dates(len(val_lstm)) # 2. Future points to forecast
f.set_estimator('lstm')                # 3. LSTM neural network

f.manual_forecast(
    call_me='lstm_test6',
    lags=20,
    batch_size=12,
    epochs=22,
    validation_split=.2,
    shuffle=True,
    activation='tanh',
    optimizer='Adam',
    learning_rate=0.001,
    lstm_layer_sizes=(60,),
    dropout=(0,),
    plot_loss=True,
)
f.plot_test_set(order_by='TestSetMAE',models='top_3',ci=True)


In [None]:
# Display Model summaries with their metrics

f.export('model_summaries',determine_best_by='TestSetMAE')[
    ['ModelNickname',
     'TestSetMAPE',
     'TestSetRMSE',
     'TestSetR2',
     'TestSetMAE',
     'best_model']]

In [None]:
# PACF (Partial Auto Correlation Function) plot, which measures how much the y variable, in our case, ice extent, 
# is correlated to past values of itself and how far back a statistically significant correlation exists. 

f.plot_pacf(lags=26)
plt.show()

In [None]:
# Decompose the series into its trend, seasonal, and residual parts

f.seasonal_decompose().plot()
plt.show()

In [None]:
# series’ stationarity, p value is not less than 0.05 and thus it is stationary
stat, pval, _, _, _, _ = f.adf_test(full_res=True)
print("stat: %s" %stat)
print("pval: %s" %pval)

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

f.set_test_length(len(val_lstm))       # 1. Observations to test the results
f.generate_future_dates(len(val_lstm)) # 2. Future points to forecast
f.set_estimator('lstm')                # 3. LSTM neural network

f.manual_forecast(
    call_me='lstm_test1',
    lags=36,
    batch_size=32,
    epochs=20,
    validation_split=.2,
    shuffle=True,
    callbacks=EarlyStopping(
        monitor='val_loss',               
        patience=5,
    ),
    activation='tanh',
    optimizer='Adam',
    learning_rate=0.001,
    lstm_layer_sizes=(32,32),
    dropout=(0,0),
    plot_loss=True
)
f.plot_test_set(order_by='TestSetMAE',models='top_3',ci=True)

In [None]:
# Looking for the best model test 2

f.manual_forecast(
    call_me='lstm_test2',
    lags=24,
    batch_size=16,
    epochs=50,
    validation_split=.2,
    shuffle=True,
    activation='tanh',
    optimizer='Adam',
    learning_rate=0.001,
    lstm_layer_sizes=(16,),
    callbacks=EarlyStopping(
        monitor='val_loss',               
        patience=5,
    ),
    dropout=(0,),
    plot_loss=True
)
f.plot_test_set(order_by='TestSetMAE',models='top_3',ci=True)

In [None]:
# Looking for the best model test 3

f.manual_forecast(
    call_me='lstm_test3',
    lags=48,
    batch_size=24,
    epochs=45,
    validation_split=.2,
    shuffle=True,
    activation='tanh',
    optimizer='Adam',
    callbacks=EarlyStopping(
        monitor='val_loss',               
        patience=5,
    ),
    learning_rate=0.001,
    lstm_layer_sizes=(24,),
    dropout=(0,),
    plot_loss=True
)
f.plot_test_set(order_by='TestSetMAE',models='top_3',ci=True)

In [None]:
# Looking for the best model test 4

f.manual_forecast(
    call_me='lstm_test4',
    lags=16,
    batch_size=12,
    epochs=15,
    validation_split=.2,
    shuffle=True,
    activation='tanh',
    optimizer='Adam',
    learning_rate=0.001,
    lstm_layer_sizes=(64,),
    dropout=(0,),
    plot_loss=True,
)
f.plot_test_set(order_by='TestSetMAE',models='top_3',ci=True)

In [None]:
# Looking for the best model test 5

f.manual_forecast(
    call_me='lstm_test5',
    lags=28,
    batch_size=28,
    epochs=12,
    validation_split=.2,
    shuffle=True,
    activation='tanh',
    optimizer='Adam',
    learning_rate=0.001,
    lstm_layer_sizes=(128,),
    dropout=(0,),
    plot_loss=True,
)
f.plot_test_set(order_by='TestSetMAE',models='top_3',ci=True)

In [None]:
# Looking for the best model test 7

f.manual_forecast(
    call_me='lstm_test7',
    lags=16,
    batch_size=12,
    epochs=15,
    validation_split=.2,
    shuffle=True,
    activation='tanh',
    optimizer='Adam',
    learning_rate=0.001,
    lstm_layer_sizes=(64,),
    callbacks=EarlyStopping(
        monitor='val_loss',               
        patience=5,
    ),
    dropout=(0,),
    plot_loss=True,
)
f.plot_test_set(order_by='TestSetMAE',models='top_3',ci=True)

In [None]:
# Looking for the best model test 8

f.manual_forecast(
    call_me='lstm_test8',
    lags=16,
    batch_size=12,
    epochs=15,
    validation_split=.2,
    shuffle=True,
    activation='tanh',
    optimizer='Adam',
    learning_rate=0.1,
    lstm_layer_sizes=(128,),
    callbacks=EarlyStopping(
        monitor='val_loss',               
        patience=5,
    ),
    dropout=(0,),
    plot_loss=True,
)
f.plot_test_set(order_by='TestSetMAE',models='top_3',ci=True)

In [None]:
# Looking for the best model test 9

f.manual_forecast(
    call_me='lstm_test9',
    lags=30,
    batch_size=12,
    epochs=20,
    validation_split=.2,
    shuffle=True,
    activation='tanh',
    optimizer='Adam',
    learning_rate=0.0001,
    lstm_layer_sizes=(64,),
    callbacks=EarlyStopping(
        monitor='val_loss',               
        patience=5,
    ),
    dropout=(0,),
    plot_loss=True,
)
f.plot_test_set(order_by='TestSetMAE',models='top_3',ci=True)

In [None]:
# Looking for the best model test 10

f.manual_forecast(
    call_me='lstm_test10',
    lags=16,
    batch_size=12,
    epochs=100,
    validation_split=.2,
    shuffle=True,
    activation='tanh',
    optimizer='Adam',
    learning_rate=0.0001,
    lstm_layer_sizes=(64,),
    callbacks=EarlyStopping(
        monitor='val_loss',               
        patience=5,
    ),
    dropout=(0,),
    plot_loss=True,
)
f.plot_test_set(order_by='TestSetMAE',models='top_3',ci=True)

Normalization

In [None]:
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from scalecast.Forecaster import Forecaster
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import pickle
import seaborn as sns
from sklearn.model_selection import train_test_split

# load dataset
df_norm = pd.read_excel('time_series_north_extent.xlsx')

# Normalize data
data_norm = df_norm['extent'].values.reshape(-1,1)

# Initialize MinMaxScaler
scaler = MinMaxScaler()

# Fit and transform data using the scaler
normalized_data = scaler.fit_transform(data_norm)
normalized_data_df = pd.DataFrame(normalized_data,columns=['extent'])
df_norm['extent'] = normalized_data_df['extent']

# Set Inde to datetiem
df_norm.index = pd.to_datetime(df_norm['DATE'])

# Call the Forecaster object with the y and current_dates parameters specified
f_norm = Forecaster(
y=df_norm['extent'],
current_dates=df_norm['DATE'])

# Training and Test datasets
train_lstm_norm, val_lstm_norm = train_test_split(df_norm, test_size=0.2,
                                                  shuffle=False)
# Looking for the best model test 6
f_norm.set_test_length(len(val_lstm_norm))       
f_norm.generate_future_dates(len(val_lstm_norm))
f_norm.set_estimator('lstm')    
f_norm.manual_forecast(
    call_me='lstm_test6_norm',
    lags=20,
    batch_size=12,
    epochs=22,
    validation_split=.2,
    shuffle=True,
    activation='tanh',
    optimizer='Adam',
    learning_rate=0.001,
    lstm_layer_sizes=(60,),
    dropout=(0,),
    plot_loss=True,
)
f_norm.plot_test_set(order_by='TestSetMAE',models='top_3',ci=True)

# Model summaries
f_norm.export('model_summaries',determine_best_by='TestSetMAE')[
    ['ModelNickname', 'TestSetMAPE','TestSetRMSE', 'TestSetR2',
     'TestSetMAE','best_model']]

In [None]:
f_norm.export('model_summaries',determine_best_by='TestSetMAE')[
    ['ModelNickname',
     'TestSetMAPE',
     'TestSetRMSE',
     'TestSetR2',
     'TestSetMAE',
     'best_model']
]