In [69]:
import pandas as pd
spot = pd.read_csv('./data/spot/clarkson_data.csv', delimiter=';', parse_dates=['Date'], dayfirst=True)
pmx_forw = pd.read_csv('./data/ffa/PMAX_FFA.csv', delimiter=';', parse_dates=['Date'], dayfirst=True)
csz_forw = pd.read_csv('./data/ffa/CSZ_FFA.csv', delimiter=';', parse_dates=['Date'], dayfirst=True)
smx_forw = pd.read_csv('./data/ffa/SMX_FFA.csv', delimiter=';', parse_dates=['Date'], dayfirst=True)


In [70]:
import numpy as np
# Merge data frames on the Date column
data_combined = pd.merge(spot, smx_forw, on='Date')
s_col = "SMX"
f_col = "1Q"

# Remove rows with NA or 0 in specific columns (assuming 'SMX' and '1Q' are column names in 'data_combined')
data_combined = data_combined[(data_combined[s_col].notna() & data_combined[s_col] != 0) & (data_combined[f_col].notna() & data_combined[f_col] != 0)]

# Transform data to log levels
data_log_levels = pd.DataFrame()
data_log_levels["spot"] = np.log(data_combined[s_col])
data_log_levels["forwp"] = np.log(data_combined[f_col])
data_log_levels.index = data_combined["Date"]


# Split into train and test sets
split_index = round(len(data_log_levels) * 0.8)
hor = 1
train = data_log_levels.iloc[:split_index]
test = data_log_levels.iloc[split_index:split_index+hor]
#train.head()
data_log_levels.head()
train.head()
#test.head()

Unnamed: 0_level_0,spot,forwp
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2006-01-04,9.815312,9.675645
2006-01-10,9.806095,9.678154
2006-01-17,9.73932,9.595263
2006-01-18,9.724361,9.587063
2006-01-19,9.710085,9.583627


In [78]:
# Assuming data_combined is your final DataFrame after preprocessing
from sklearn.preprocessing import MinMaxScaler, StandardScaler

# Normalize features
scaler = MinMaxScaler()
train_scal = scaler.fit_transform(train)

# Convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=10, is_test=False):
    X, Y = [], []
    if is_test:  # for test data, we just need the last entry for 1-step ahead forecast
        X.append(dataset[-look_back:])
        return np.array(X), None
    else:
        for i in range(look_back, len(dataset)):
            X.append(dataset[i-look_back:i])
            Y.append(dataset[i])
    return np.array(X), np.array(Y)

look_back = 10  # Adjust based on your temporal structure
trainX, trainY = create_dataset(train_scal, look_back)
print(trainX)

[[[0.58835698 0.46232689]
  [0.58560421 0.46323447]
  [0.56566024 0.43324572]
  ...
  [0.52697214 0.47832482]
  [0.53726921 0.49426967]
  [0.55932869 0.48987549]]

 [[0.58560421 0.46323447]
  [0.56566024 0.43324572]
  [0.56119228 0.43027925]
  ...
  [0.53726921 0.49426967]
  [0.55932869 0.48987549]
  [0.57578165 0.47653813]]

 [[0.56566024 0.43324572]
  [0.56119228 0.43027925]
  [0.55692839 0.42903601]
  ...
  [0.55932869 0.48987549]
  [0.57578165 0.47653813]
  [0.57593472 0.47914982]]

 ...

 [[0.23767389 0.24156131]
  [0.26081647 0.25566866]
  [0.29532163 0.27835084]
  ...
  [0.33022471 0.28575427]
  [0.35905714 0.29308177]
  [0.38224381 0.29257365]]

 [[0.26081647 0.25566866]
  [0.29532163 0.27835084]
  [0.30663397 0.27782158]
  ...
  [0.35905714 0.29308177]
  [0.38224381 0.29257365]
  [0.39615883 0.29126376]]

 [[0.29532163 0.27835084]
  [0.30663397 0.27782158]
  [0.31079384 0.27713997]
  ...
  [0.38224381 0.29257365]
  [0.39615883 0.29126376]
  [0.40421298 0.28693668]]]


In [79]:

# Create and fit the MLP model
from keras.layers import Dense
from keras import Sequential
from sklearn.metrics import mean_squared_error
trainX_flat = trainX.reshape(trainX.shape[0], -1)
f = trainX_flat.shape[1]

model_mlp = Sequential()
model_mlp.add(Dense(32, input_dim=trainX_flat.shape[1], activation='relu'))
model_mlp.add(Dense(16, activation="relu"))
model_mlp.add(Dense(2, activation="linear"))
model_mlp.compile(loss='mean_squared_error', optimizer='adam')
model_mlp.fit(trainX_flat, trainY, epochs=10, batch_size=2, verbose=2)

# Make predictions
trainPredict_scal = model_mlp.predict(trainX_flat)
testX = create_dataset(trainX[-look_back:])
testX_flat = testX.reshape(trainX.shape[0], -1)
testPredict_scal = model_mlp.predict(testX_flat)

# Invert predictions
trainPredict = scaler.inverse_transform(trainPredict_scal)
testPredict = scaler.inverse_transform(testPredict_scal)

# Calculate mean squared error
trainScore = mean_squared_error(trainY, trainPredict[:,0])
testScore = mean_squared_error(testY, testPredict[:,0])
print('Train Score: %.2f MSE' % (trainScore))
print('Test Score: %.2f MSE' % (testScore))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
897/897 - 2s - 2ms/step - loss: 0.0073
Epoch 2/10
897/897 - 1s - 963us/step - loss: 0.0011
Epoch 3/10
897/897 - 1s - 951us/step - loss: 6.2820e-04
Epoch 4/10
897/897 - 1s - 965us/step - loss: 5.3665e-04
Epoch 5/10
897/897 - 1s - 945us/step - loss: 4.7060e-04
Epoch 6/10
897/897 - 1s - 945us/step - loss: 4.8689e-04
Epoch 7/10
897/897 - 1s - 944us/step - loss: 4.3160e-04
Epoch 8/10
897/897 - 1s - 945us/step - loss: 4.0039e-04
Epoch 9/10
897/897 - 1s - 945us/step - loss: 3.8869e-04
Epoch 10/10
897/897 - 1s - 945us/step - loss: 4.2340e-04
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [None]:
trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
print(trainX)