In [None]:
import io
import math

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# import seaborn as sns
import tensorflow as tf
import tensorflow.keras as keras

# from sklearn.linear_model import LinearRegression
# from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

from tensorflow.keras import Sequential

from tensorflow.keras.callbacks import Callback, ModelCheckpoint, EarlyStopping, LearningRateScheduler
from tensorflow.keras.layers import Dense, Input, Lambda, LSTM

In [None]:
df = pd.read_csv("../datasets/consolidated_data/oil_data.csv", index_col="DATEPRD")

date = df.index
df.head()

In [None]:
#function to plot a series
def plot_series(x, y, ylabel=None, format="-", start=None, end=None, label=None):
    plt.figure(figsize=(10, 6))
    plt.plot(x[start:end], y[start:end], format, label=label)
    plt.xlabel("Date")
    plt.ylabel(f"{ylabel} Volume")
    if label:
        plt.legend(fontsize=14)
    plt.grid(True)
    plt.show()


#clear session 
def refresh():
    keras.backend.clear_session()
    tf.random.set_seed(42)
    np.random.seed(42)

class ResetStatesCallback(Callback):
    def on_epoch_begin(self, epoch, logs):
        self.model.reset_states()

In [None]:
oil_data = df.BORE_OIL_VOL.to_numpy()

plot_series(date, oil_data, "Oil", label="Daily Oil Production")

oil_data

In [None]:
gas_data = df.BORE_GAS_VOL.to_numpy()

plt.figure(figsize=(10, 6))
plot_series(date, gas_data, "Gas", label="Daily Gas Production")
plt.show()

gas_data

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(df)

scaled_df = pd.DataFrame(scaled, index=df.index, columns=df.columns)
scaled_df.head()

In [None]:
scaled_oil_data = scaled_df.BORE_OIL_VOL.to_numpy()

# scaled_gas_data = scaled_df.BORE_GAS_VOL.to_numpy()

In [None]:
#split the time series into training and validation sets
split_date = 2140

date_train = date[:split_date]
date_valid = date[split_date:]

oil_train = scaled_oil_data[:split_date]
oil_valid = scaled_oil_data[split_date:]

# gas_train = scaled_gas_data[:split_date]
# gas_valid = scaled_gas_data[split_date:]

print(oil_train.shape, oil_valid.shape, date_train.shape, date_valid.shape)

# NAIVE Forecast

In [None]:
naive_oil_forecast = scaled_oil_data[split_date - 1:-1]

plt.figure(figsize=(10, 6))
plot_series(date_valid, oil_valid, label="Actual")
plot_series(date_valid, naive_oil_forecast,"Oil", label="Naive Forecast")

plt.show()

mae = keras.metrics.mean_absolute_error(oil_valid, naive_oil_forecast).numpy()
mse = keras.metrics.mean_squared_error(oil_valid, naive_oil_forecast).numpy()
rmse = math.sqrt(mse)

print(f'mae = {mae}, \nmse = {mse}, \nrmse = {rmse}')

In [None]:
naive_gas_forecast = scaled_gas_data[split_date - 1:-1]

plt.figure(figsize=(10, 6))
plot_series(date_valid, gas_valid, label="Actual")
plot_series(date_valid, naive_gas_forecast,"Gas", label="Naive Forecast")

plt.show()

mae = keras.metrics.mean_absolute_error(gas_valid, naive_gas_forecast).numpy()
mse = keras.metrics.mean_squared_error(gas_valid, naive_gas_forecast).numpy()
rmse = math.sqrt(mse)

print(f'mae = {mae}, \nmse = {mse}, \nrmse = {rmse}')

# Moving Average

In [None]:
def moving_average_forecast(series, window_size):
    """Forecasts the mean of the last few values.
    If window_size=1, then this is equivalent to naive forecast"""
    
    mov = np.cumsum(series)
    mov[window_size:] = mov[window_size:] - mov[:-window_size]
    
    return mov[window_size - 1:-1] / window_size

In [None]:
oil_moving_avg = moving_average_forecast(scaled_oil_data, 30)[split_date - 30:]

plt.figure(figsize=(10, 6))
plot_series(date_valid, oil_valid, label="Actual")
plot_series(date_valid, oil_moving_avg, "Oil", label="30-day Moving average")

plt.show()

mae = keras.metrics.mean_absolute_error(oil_valid, oil_moving_avg).numpy()
mse = keras.metrics.mean_squared_error(oil_valid, oil_moving_avg).numpy()
rmse = math.sqrt(mse)

print(f'mae = {mae}, \nmse = {mse}, \nrmse = {rmse}')

In [None]:
gas_moving_avg = moving_average_forecast(scaled_gas_data, 30)[split_date - 30:]

plt.figure(figsize=(10, 6))
plot_series(date_valid, gas_valid, label="Actual")
plot_series(date_valid, gas_moving_avg, "Gas", label="30-day Moving average")

plt.show()

mae = keras.metrics.mean_absolute_error(gas_valid, gas_moving_avg).numpy()
mse = keras.metrics.mean_squared_error(gas_valid, gas_moving_avg).numpy()
rmse = math.sqrt(mse)

print(f'mae = {mae}, \nmse = {mse}, \nrmse = {rmse}')

# Linear Model

Oil

In [None]:
def window_dataset(series, window_size, batch_size=30, shuffle_buffer=100):
    
    dataset = tf.data.Dataset.from_tensor_slices(series)
    dataset = dataset.window(window_size + 1, shift=1, drop_remainder=True)
    dataset = dataset.flat_map(lambda window: window.batch(window_size + 1))
    dataset = dataset.shuffle(shuffle_buffer)
    dataset = dataset.map(lambda window: (window[:-1], window[-1]))
    dataset = dataset.batch(batch_size).prefetch(2)
    
    return dataset


def model_forecast(model, series, window_size):
    
    dataset = tf.data.Dataset.from_tensor_slices(series)
    dataset = dataset.window(window_size, shift=1, drop_remainder=True)
    dataset = dataset.flat_map(lambda w: w.batch(window_size))
    dataset = dataset.batch(30).prefetch(2)
    
    forecast = model.predict(dataset)
    
    return forecast

In [None]:
refresh()

window_size = 30

oil_train_set = window_dataset(oil_train, window_size)

model = Sequential( [ Dense(1, input_shape=[window_size]) ] )

lr_schedule = LearningRateScheduler(
                lambda epoch: 1e-4 * 10**(epoch / 30)
                )

optimizer = keras.optimizers.SGD(learning_rate=1e-4, momentum=0.9)

model.compile(loss=keras.losses.Huber(),
              optimizer=optimizer,
              metrics=["mae"])

history = model.fit(oil_train_set, epochs=100, callbacks=[lr_schedule])

In [None]:
plt.semilogx(history.history["lr"], history.history["loss"])
plt.axis([5e-5, 5e-1, 0, 0.3])

plt.show()

max(history.history["loss"])

In [None]:
keras.backend.clear_session()

tf.random.set_seed(42)
np.random.seed(42)

window_size = 30

oil_train_set = window_dataset(oil_train, window_size)
oil_valid_set = window_dataset(oil_valid, window_size)

model = Sequential( [ Dense(1, input_shape=[window_size]) ] )

optimizer = keras.optimizers.SGD(learning_rate=1e-3, momentum=0.9)

model.compile(loss=keras.losses.Huber(),
              optimizer=optimizer,
              metrics=["mae"])

early_stopping = EarlyStopping(patience=10)

model.fit(
    
    oil_train_set,
    epochs=500,
    validation_data=oil_valid_set,
    callbacks=[early_stopping]

)


In [None]:
linear_oil_forecast = model_forecast(model, scaled_oil_data[split_date - window_size:-1], window_size)[:, 0]

plt.figure(figsize=(10, 6))
plot_series(date_valid, oil_valid, label="Actual")
plot_series(date_valid, linear_oil_forecast, "Oil", label="Linear Model")

plt.show()

mae = keras.metrics.mean_absolute_error(oil_valid, linear_oil_forecast).numpy()
mse = keras.metrics.mean_squared_error(oil_valid, linear_oil_forecast).numpy()
rmse = math.sqrt(mse)

print(f'mae = {mae}, \nmse = {mse}, \nrmse = {rmse}')

In [None]:
#model.summary()
#print(model.trainable_variables)

Gas

In [None]:
refresh()

window_size = 30

gas_train_set = window_dataset(gas_train, window_size)

model = Sequential( [ Dense(1, input_shape=[window_size]) ] )

lr_schedule = LearningRateScheduler(
                lambda epoch: 1e-4 * 10**(epoch / 30)
                )

optimizer = keras.optimizers.SGD(learning_rate=1e-4, momentum=0.9)

model.compile(loss=keras.losses.Huber(),
              optimizer=optimizer,
              metrics=["mae"])

history = model.fit(gas_train_set, epochs=100, callbacks=[lr_schedule])

In [None]:
plt.semilogx(history.history["lr"], history.history["loss"])
plt.axis([5e-5, 5e-1, 0, 0.3])

plt.show()

max(history.history["loss"])

In [None]:
keras.backend.clear_session()

tf.random.set_seed(42)
np.random.seed(42)

window_size = 30

gas_train_set = window_dataset(gas_train, window_size)
gas_valid_set = window_dataset(gas_valid, window_size)

model = Sequential( [ Dense(1, input_shape=[window_size]) ] )

optimizer = keras.optimizers.SGD(learning_rate=1e-3, momentum=0.9)

model.compile(loss=keras.losses.Huber(),
              optimizer=optimizer,
              metrics=["mae"])

early_stopping = EarlyStopping(patience=10)

model.fit(
    
    gas_train_set,
    epochs=500,
    validation_data=gas_valid_set,
    callbacks=[early_stopping]

)


In [None]:
linear_gas_forecast = model_forecast(model, scaled_gas_data[split_date - window_size:-1], window_size)[:, 0]

plt.figure(figsize=(10, 6))
plot_series(date_valid, gas_valid, label="Actual")
plot_series(date_valid, linear_gas_forecast, "Gas", label="Linear Model")

plt.show()

mae = keras.metrics.mean_absolute_error(gas_valid, linear_gas_forecast).numpy()
mse = keras.metrics.mean_squared_error(gas_valid, linear_gas_forecast).numpy()
rmse = math.sqrt(mse)

print(f'mae = {mae}, \nmse = {mse}, \nrmse = {rmse}')

In [None]:
#model.summary()
#print(model.trainable_variables)

#  FF-NN

Oil

In [None]:
refresh()

window_size = 30

oil_train_set = window_dataset(oil_train, window_size)

model = Sequential([
            Dense(10, activation="relu", input_shape=[window_size]),
            Dense(10, activation="relu"),
            Dense(1)
        ])

lr_schedule = LearningRateScheduler(lambda epoch: 1e-5 * 10**(epoch / 20))

optimizer = keras.optimizers.SGD(learning_rate=1e-5, momentum=0.9)

model.compile(loss=keras.losses.Huber(),
              optimizer=optimizer,
              metrics=["mae"])

history = model.fit(oil_train_set, epochs=100, callbacks=[lr_schedule])

In [None]:
plt.semilogx(history.history["lr"], history.history["loss"])
plt.axis([1e-5, 1, 0, 0.12])

plt.show()

max(history.history["lr"])

In [None]:
refresh()

window_size = 30

oil_train_set = window_dataset(oil_train, window_size)
oil_valid_set = window_dataset(oil_valid, window_size)

model = Sequential([
            Dense(10, activation="relu", input_shape=[window_size]),
            Dense(10, activation="relu"),
            Dense(1)
        ])

optimizer = keras.optimizers.SGD(learning_rate=1e-3, momentum=0.9)

model.compile(loss=keras.losses.Huber(),
              optimizer=optimizer,
              metrics=["mae"])

early_stopping = EarlyStopping(patience=10)

model.fit(oil_train_set,
          epochs=500,
          validation_data=oil_valid_set,
          callbacks=[early_stopping])

In [None]:
ffnn_oil_forecast = model_forecast(
    
                    model,
                    scaled_oil_data[split_date - window_size:-1],
                    window_size

                                )[:, 0]

plt.figure(figsize=(10, 6))
plot_series(date_valid, oil_valid, label="Actual")
plot_series(date_valid, ffnn_oil_forecast, "Oil", label="FF-NN Model")

plt.show()

mae = keras.metrics.mean_absolute_error(oil_valid, ffnn_oil_forecast).numpy()
mse = keras.metrics.mean_squared_error(oil_valid, ffnn_oil_forecast).numpy()
rmse = math.sqrt(mse)

print(f'mae = {mae}, \nmse = {mse}, \nrmse = {rmse}')

# RNN Model - LSTM

In [None]:
def sequential_window_dataset(series, window_size):
    
    series = tf.expand_dims(series, axis=-1)
    dataset = tf.data.Dataset.from_tensor_slices(series)
    dataset = dataset.window(window_size + 1, shift=window_size, drop_remainder=True)
    dataset = dataset.flat_map(lambda window: window.batch(window_size + 1))
    dataset = dataset.map(lambda window: (window[:-1], window[1:]))
    
    return dataset.batch(1).prefetch(1)

Oil

In [None]:
refresh()

window_size = 30

oil_train_set = sequential_window_dataset(oil_train, window_size)

model = Sequential([
    
        LSTM(100, return_sequences=True, stateful=True, batch_input_shape=[1, None, 1]),
        LSTM(100, return_sequences=True, stateful=True),
        Dense(1)
    
        ])

learning_rate_schedule = LearningRateScheduler(lambda epoch: 1e-5 * 10**(epoch / 20))
reset_states = ResetStatesCallback()

optimizer = keras.optimizers.SGD(learning_rate=1e-4, momentum=0.9)

model.compile(loss=keras.losses.Huber(), optimizer=optimizer, metrics=["mae"])

history = model.fit(oil_train_set, epochs=100, callbacks=[learning_rate_schedule, reset_states])

In [None]:
plt.semilogx(history.history["lr"], history.history["loss"])
plt.axis([1e-5, 10, 0.009, 0.12])
plt.show()

max(history.history["lr"])

In [None]:
refresh()

window_size = 30
oil_train_set = sequential_window_dataset(oil_train, window_size)
oil_valid_set = sequential_window_dataset(oil_valid, window_size)

model = Sequential([
                    
        LSTM(100, return_sequences=True, stateful=True, batch_input_shape=[1, None, 1]),
        LSTM(100, return_sequences=True, stateful=True),
        Dense(1)
        
        ])

optimizer = keras.optimizers.SGD(learning_rate=1e-3, momentum=0.9)

model.compile(loss=keras.losses.Huber(), optimizer=optimizer, metrics=["mae"])

reset_states = ResetStatesCallback()
model_checkpoint = ModelCheckpoint("LSTM_oil_checkpoint.h5", save_best_only=True)
early_stopping = EarlyStopping(patience=50)

model.fit(oil_train_set, epochs=500, validation_data=oil_valid_set, 
          callbacks=[early_stopping, model_checkpoint, reset_states])

In [None]:
#load best model
model = keras.models.load_model("LSTM_oil_checkpoint.h5")

In [None]:
#forecast
LSTM_oil_forecast = model.predict(scaled_oil_data[np.newaxis, :, np.newaxis])
LSTM_oil_forecast = LSTM_oil_forecast[0, split_date - 1:-1, 0]

plt.figure(figsize=(10, 6))
plot_series(date_valid, oil_valid, label="Actual")
plot_series(date_valid, LSTM_oil_forecast, "Oil", label="LSTM Model")

plt.show()

mae = keras.metrics.mean_absolute_error(oil_valid, LSTM_oil_forecast).numpy()
mse = keras.metrics.mean_squared_error(oil_valid, LSTM_oil_forecast).numpy()
rmse = math.sqrt(mse)

print(f'mae = {mae}, \nmse = {mse}, \nrmse = {rmse}')