In [21]:
import pandas as pd
spot = pd.read_csv('./data/spot/clarkson_data.csv', delimiter=';', parse_dates=['Date'], dayfirst=True)
pmx_forw = pd.read_csv('./data/ffa/PMAX_FFA.csv', delimiter=';', parse_dates=['Date'], dayfirst=True)
csz_forw = pd.read_csv('./data/ffa/CSZ_FFA.csv', delimiter=';', parse_dates=['Date'], dayfirst=True)
smx_forw = pd.read_csv('./data/ffa/SMX_FFA.csv', delimiter=';', parse_dates=['Date'], dayfirst=True)


In [22]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler

# Merge data frames on the Date column
data_combined = pd.merge(spot, smx_forw, on='Date')
s_col = "SMX"
f_col = "1Q"

# Remove rows with NA or 0 in specific columns (assuming 'SMX' and '1Q' are column names in 'data_combined')
data_combined = data_combined[(data_combined[s_col].notna() & data_combined[s_col] != 0) & (data_combined[f_col].notna() & data_combined[f_col] != 0)]

# Transform data to log levels
data_log_levels = pd.DataFrame()
data_log_levels["spot"] = np.log(data_combined[s_col])
data_log_levels["forwp"] = np.log(data_combined[f_col])
data_log_levels.index = data_combined["Date"]


# Split into train and test sets
split_index = round(len(data_log_levels) * 0.8)
hor = 30
train = data_log_levels.iloc[:split_index]
test = data_log_levels.iloc[split_index:split_index+hor]
#train.head()
data_log_levels.head()
train.head()
#test.head()

# Normalize features
scaler = MinMaxScaler()
train_scal = scaler.fit_transform(train)
print(train_scal)


TypeError: 'int' object is not callable

In [None]:
# Convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=10, is_test=False):
    X, Y = [], []
    if is_test:  # for test data, we just need the last entry for 1-step ahead forecast
        X = dataset[-1:,:,]
        return X, None
    else:
        for i in range(look_back, len(dataset)- hor + 1):
            X.append(dataset[i-look_back:i])
            Y.append(dataset[i:i+hor])
    return np.array(X), np.array(Y)

look_back = 20  # Adjust based on your temporal structure
trainX, trainY = create_dataset(train_scal, look_back)
print(trainX)

In [None]:
# Create and fit the MLP model
from keras.layers import Dense
from keras import Sequential
from sklearn.metrics import mean_squared_error
trainX_flat = trainX.reshape(trainX.shape[0], -1)
trainY_flat = trainY.reshape(trainY.shape[0], -1)

model_mlp = Sequential()
model_mlp.add(Dense(32, input_dim=trainX_flat.shape[1], activation='relu'))
model_mlp.add(Dense(trainY_flat.shape[1], activation="linear"))
model_mlp.compile(loss='mean_squared_error', optimizer='adam')
model_mlp.fit(trainX_flat, trainY_flat, epochs=1000, batch_size=2, verbose=2)



In [None]:
# Make predictions
trainPredict_scal_flat = model_mlp.predict(trainX_flat)


def create_even_odd_array(arr):
    """
    Returns an array where the first column contains values from even positions
    and the second column contains values from odd positions of the original array.
    """
    return arr.reshape(-1, 2)



testX, _ = create_dataset(trainX, look_back=look_back, is_test=True)
testX_flat = testX.reshape(testX.shape[0], -1)
testPredict_scal_flat = model_mlp.predict(testX_flat)
testPredict_scal = create_even_odd_array(testPredict_scal_flat)


# Invert predictions
#trainPredict = scaler.inverse_transform(trainPredict_scal)
testPredict = scaler.inverse_transform(testPredict_scal)
print(testPredict[:, 0])

# Calculate mean squared error
testScore = mean_squared_error(test["spot"], testPredict[:,0])
testScoreForw = mean_squared_error(test["forwp"], testPredict[:,1])
print('Test Score spot: %.5f MSE' % (testScore))
print('Test Score forw: %.5f MSE' % (testScoreForw))




def random_walk_predictions(training_data, testing_data):
    """
    Generates Random Walk predictions where the next value is assumed to be the last observed value.
    
    Parameters:
    - training_data: DataFrame containing the training data.
    - testing_data: DataFrame containing the test data.
    
    Returns:
    - predictions: Numpy array containing Random Walk predictions for the test set.
    """
    # Last observed values from the training set
    last_observed_spot = training_data['spot'].iloc[-1]
    last_observed_forwp = training_data['forwp'].iloc[-1]
    
    # Create an array of predictions, each one equal to the last observed values
    predictions = np.tile([last_observed_spot, last_observed_forwp], (len(testing_data), 1))
    
    return predictions

# Generate Random Walk predictions for the test set
rw_predictions = random_walk_predictions(train, test)

# Benchmark Random Walk model by calculating the MSE
rw_testScore_spot = mean_squared_error(test["spot"].values, rw_predictions[:, 0])
rw_testScore_forw = mean_squared_error(test["forwp"].values, rw_predictions[:, 1])

print('Random Walk Test Score spot: %.5f MSE' % (rw_testScore_spot))
print('Random Walk Test Score forw: %.5f MSE' % (rw_testScore_forw))

In [None]:
from keras.models import Sequential
from keras.layers import LSTM, Dense

# Define the LSTM model
model_lstm = Sequential()
model_lstm.add(LSTM(units=50, return_sequences=True, input_shape=(trainX.shape[1], trainX.shape[2])))
model_lstm.add(LSTM(units=50))
model_lstm.add(Dense(trainY_flat.shape[1], activation='linear'))  # Assuming multi-step forecasting

model_lstm.compile(loss='mean_squared_error', optimizer='adam')
model_lstm.fit(trainX, trainY_flat, epochs=100, batch_size=32, verbose=2)



In [None]:
# Prepare the last sequence from the training set as the input for the first prediction
testX_last_sequence = train_scal[-look_back:].reshape(1, look_back, train_scal.shape[1])

# Make predictions
testPredict_scal_flat = model_lstm.predict(testX_last_sequence)

# Since you're predicting `hor` steps ahead, you might need to adjust the code to generate
# multiple steps if your LSTM model is set up for single-step predictions.
# For simplicity, this example directly uses the LSTM output for multi-step predictions.

# Invert scaling
testPredict_scal = create_even_odd_array(testPredict_scal_flat)
testPredict = scaler.inverse_transform(testPredict_scal)


# Calculate and print MSE for each target
testScore_spot_lstm = mean_squared_error(test["spot"].iloc[:hor].values, testPredict[:,0])
testScore_forw_lstm = mean_squared_error(test["forwp"].iloc[:hor].values, testPredict[:,1])
print('LSTM Test Score spot: %.5f MSE' % (testScore_spot_lstm))
print('LSTM Test Score forw: %.5f MSE' % (testScore_forw_lstm))



In [None]:
from keras.models import Sequential
from keras.layers import LSTM, Dense

# Number of rounds based on the test set size and forecast horizon
num_rounds =  30  # Adjusted to ensure we don't exceed the test set
look_back = 10  # Adjust based on your temporal structure
hor = 5

# Initialize dictionary to store MSE results for each model
mse_results = {
    'MLP_spot': [],
    'MLP_forwp': [],
    'LSTM_spot': [],
    'LSTM_forwp': [],
    'RW_spot': [],
    'RW_forwp': [],
}


def create_even_odd_array(arr):
    """
    Returns an array where the first column contains values from even positions
    and the second column contains values from odd positions of the original array.
    """
    return arr.reshape(-1, 2)


# Adjust train and test sets for each forecast round
for round in range(1, num_rounds + 1):
    print("Round", round)
    # Define new split point for each round
    split_index = split_index + (round - 1) * hor
    
    # Update train and test sets
    train = data_log_levels.iloc[:split_index]
    test = data_log_levels.iloc[split_index:split_index+hor]

    #Scale train set
    train_scal = scaler.fit_transform(train)

    trainX, trainY = create_dataset(train_scal, look_back)
    # Create and fit the MLP model

    trainX_flat = trainX.reshape(trainX.shape[0], -1)
    trainY_flat = trainY.reshape(trainY.shape[0], -1)

    model_mlp = Sequential()
    model_mlp.add(Dense(32, input_dim=trainX_flat.shape[1], activation='relu'))
    model_mlp.add(Dense(trainY_flat.shape[1], activation="linear"))
    model_mlp.compile(loss='mean_squared_error', optimizer='adam')
    model_mlp.fit(trainX_flat, trainY_flat, epochs=10, batch_size=2, verbose=1)


    # Make predictions
    trainPredict_scal_flat = model_mlp.predict(trainX_flat)
    testX = train_scal[-look_back:].reshape(1, look_back, train_scal.shape[1])
    #testX, _ = create_dataset(trainX, look_back=look_back, is_test=True)
    testX_flat = testX.reshape(testX.shape[0], -1)
    testPredict_scal_flat = model_mlp.predict(testX_flat)
    testPredict_scal = create_even_odd_array(testPredict_scal_flat)

    # Invert predictions
    testPredict_mlp = scaler.inverse_transform(testPredict_scal)

    # Calculate mean squared error
    testScore = mean_squared_error(test["spot"], testPredict_mlp[:,0])
    testScoreForw = mean_squared_error(test["forwp"], testPredict_mlp[:,1])
    print('Test Score spot MLP: %.5f MSE' % (testScore))
    print('Test Score forw MLP: %.5f MSE' % (testScoreForw))



    # Define the LSTM model
    model_lstm = Sequential()
    model_lstm.add(LSTM(units=50, return_sequences=True, input_shape=(trainX.shape[1], trainX.shape[2])))
    model_lstm.add(LSTM(units=50))
    model_lstm.add(Dense(trainY_flat.shape[1], activation='linear'))  # Assuming multi-step forecasting

    model_lstm.compile(loss='mean_squared_error', optimizer='adam')
    model_lstm.fit(trainX, trainY_flat, epochs=10, batch_size=32, verbose=1)

    # Prepare the last sequence from the training set as the input for the first prediction
    #testX_last_sequence = train_scal[-look_back:].reshape(1, look_back, train_scal.shape[1])

    # Make predictions
    testPredict_scal_flat = model_lstm.predict(testX)

    # Since you're predicting `hor` steps ahead, you might need to adjust the code to generate
    # multiple steps if your LSTM model is set up for single-step predictions.
    # For simplicity, this example directly uses the LSTM output for multi-step predictions.

    # Invert scaling
    testPredict_scal = create_even_odd_array(testPredict_scal_flat)
    testPredict_lstm = scaler.inverse_transform(testPredict_scal)


    # Calculate and print MSE for each target
    testScore_spot_lstm = mean_squared_error(test["spot"].iloc[:hor].values, testPredict_lstm[:,0])
    testScore_forw_lstm = mean_squared_error(test["forwp"].iloc[:hor].values, testPredict_lstm[:,1])
    print('LSTM Test Score spot: %.5f MSE' % (testScore_spot_lstm))
    print('LSTM Test Score forw: %.5f MSE' % (testScore_forw_lstm))


    # Random Walk Predictions for comparison
    rw_predictions = random_walk_predictions(train, test)
    
    # Calculate and append MSE for each model for this round
    mse_results['MLP_spot'].append(mean_squared_error(test["spot"], testPredict_mlp[:, 0]))
    mse_results['MLP_forwp'].append(mean_squared_error(test["forwp"], testPredict_mlp[:, 1]))

    mse_results['LSTM_spot'].append(mean_squared_error(test["spot"], testPredict_lstm[:, 0]))
    mse_results['LSTM_forwp'].append(mean_squared_error(test["forwp"], testPredict_lstm[:, 1]))

    mse_results['RW_spot'].append(mean_squared_error(test["spot"], rw_predictions[:, 0]))
    mse_results['RW_forwp'].append(mean_squared_error(test["forwp"], rw_predictions[:, 1]))




In [None]:
for key, values in mse_results.items():
    mean = sum(values) / len(values) * 100
    print(f"Mean for {key}: {mean}")