In [None]:
import sys
import plotly.graph_objects as go
import pandas as pd

import numpy as np
import os
import random
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.metrics import explained_variance_score
from sklearn.metrics import median_absolute_error
from sklearn.preprocessing import StandardScaler
import skopt
from skopt import gp_minimize, forest_minimize
from skopt.space import Real, Categorical, Integer
from skopt.plots import plot_convergence
from skopt.plots import plot_objective, plot_evaluations
from skopt.plots import plot_histogram, plot_objective_2D
from skopt.utils import use_named_args

sys.path.insert(0, os.path.abspath('../..'))

from python_scripts.api_calls import fetch_item_to_df, fetch_items, get_cookie_from_blob

random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

Get original DataFrame

In [None]:
dailyCookie = get_cookie_from_blob()
items = fetch_items()

current_item = fetch_item_to_df(items[4], dailyCookie)
#print(items[4])

#print(current_item.tail())
#print(non_aggregated_item.tail())

df = current_item

os. getcwd()

In [None]:
# Fill missing values if any
df = df.fillna(method='ffill')

# Scale the data
scaler = MinMaxScaler(feature_range=(0, 1))
df['scaled_price'] = scaler.fit_transform(df[['price_usd']])

In [None]:
seq_length = 2  # Length of the sequence
y_shift = -1 # t+x should be a negative value for predictions future

counter = 0
# Create columns for each step in the sequence
for i in range(1, seq_length + 1):
    df[f't-{i}'] = df['scaled_price'].shift(i)
    counter += 1


df['y_shift'] = df['scaled_price'].shift(y_shift)  # y is the next value in the sequence (t+1)
# Drop rows with NaN values that were introduced by shifting

df.dropna(inplace=True)
print(df)
print(counter)

In [None]:
X_labels = [f't-{i}' for i in range(1, seq_length + 1)]
y_label = 'y_shift' #(t+1)

X = df[X_labels]
y = df[y_label]

split_train = int(0.7 * len(X))  # First 70% for training
split_val = int(0.85 * len(X))   

# Split the data
X_train = X[:split_train]
y_train = y[:split_train]

X_val = X[split_train:split_val]
y_val = y[split_train:split_val]


#convert to numpy arrays
X_valnp = X_val.to_numpy()
y_valnp = y_val.to_numpy()
#X_valnp_reshape = np.reshape(X_valnp, (X_valnp.shape[0], 1, X_valnp.shape[1]))
validation_data = (X_valnp, y_valnp)

#print((X_valnp))

X_test = X[split_val:]
y_test = y[split_val:]



# scaler = StandardScaler()
# X_train_scaled = scaler.fit_transform(X_train)
# X_test_scaled = scaler.transform(X_test)

X_train_reshaped = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test_reshaped = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))



Creates and fit the LSTM network

In [None]:

def create_old_model():
    model = Sequential()
    model.add(LSTM(4, input_shape=(seq_length,1)))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    model.fit(X_train,y_train, epochs=2, batch_size=1, verbose=2)
    return model

def create_model(learning_rate=1e-5, num_dense_layers=1,
                 num_dense_nodes=16, activation='sigmoid'):
    """
    Hyper-parameters:
    learning_rate:     Learning-rate for the optimizer.
    num_dense_layers:  Number of dense layers.
    num_dense_nodes:   Number of nodes in each dense layer.
    activation:        Activation function for all layers.
    """
    
    # Start construction of a Keras Sequential model.
    model = Sequential()
    model.add(LSTM(4, input_shape=(seq_length, 1)))
    # Add fully-connected / dense layers.
    # The number of layers is a hyper-parameter we want to optimize.
    # for i in range(num_dense_layers):
    #     # Name of the layer. This is not really necessary
    #     # because Keras should give them unique names.
    #     name = 'layer_dense_{0}'.format(i+1)

    #     # Add the dense / fully-connected layer to the model.
    #     # This has two hyper-parameters we want to optimize:
    #     # The number of nodes and the activation function.
    #     model.add(Dense(num_dense_nodes,
    #                     activation=activation,
    #                     name=name))

    # Last fully-connected / dense layer with softmax-activation
    # for use in classification.
    model.add(Dense(1))
    
    # Use the Adam method for training the network.
    # We want to find the best learning-rate for the Adam method.
    optimizer = Adam(learning_rate)
    
    # In Keras we need to compile the model so it can be trained.
    model.compile(optimizer=optimizer,
                  loss='mean_squared_error',
                  metrics=['mean_squared_error'])
    
    return model


Hyperparameter Tuning

In [None]:
def log_dir_name(learning_rate, num_dense_layers,
                 num_dense_nodes, activation):

    # The dir-name for the TensorBoard log-dir.
    s = "./19_logs/lr_{0:.0e}_layers_{1}_nodes_{2}_{3}/"

    # Insert all the hyper-parameters in the dir-name.
    log_dir = s.format(learning_rate,
                       num_dense_layers,
                       num_dense_nodes,
                       activation)

    return log_dir

dim_learning_rate = Real(low=1e-6, high=1e-2, prior='log-uniform',
                         name='learning_rate')
dim_num_dense_layers = Integer(low=1, high=5, name='num_dense_layers')
dim_num_dense_nodes = Integer(low=5, high=512, name='num_dense_nodes')
dim_activation = Categorical(categories=['relu', 'sigmoid'],
                             name='activation')
dimensions = [dim_learning_rate,
              dim_num_dense_layers,
              dim_num_dense_nodes,
              dim_activation]
default_parameters = [1e-5, 1, 16, 'relu']
print(type(dim_activation))
print(type(dim_learning_rate))
print(type(dim_num_dense_layers))
print(type(dim_num_dense_nodes))
#def create_model():
  #  model = Sequential()
   # model.add(LSTM(4, input_shape=(seq_length, 1)))
   # model.add(Dense(1))
  #  model.compile(loss='mean_squared_error', optimizer='adam')
   # model.fit(X_train, y_train, epochs=2, batch_size=1, verbose=2)

    #return model


path_best_model = 'C:/Users/Fadow/Desktop/LSTM logs.h5'
best_mse = 0.0
@use_named_args(dimensions=dimensions)
def fitness(learning_rate, num_dense_layers,
            num_dense_nodes, activation):
    """
    Hyper-parameters:
    learning_rate:     Learning-rate for the optimizer.
    num_dense_layers:  Number of dense layers.
    num_dense_nodes:   Number of nodes in each dense layer.
    activation:        Activation function for all layers.
    """

    # Print the hyper-parameters.
    print('learning rate: {0:.1e}'.format(learning_rate))
    print('num_dense_layers:', num_dense_layers)
    print('num_dense_nodes:', num_dense_nodes)
    print('activation:', activation)
    print()
    
    # Create the neural network with these hyper-parameters.
    model = create_model(learning_rate=learning_rate,
                         num_dense_layers=num_dense_layers,
                         num_dense_nodes=num_dense_nodes,
                         activation=activation)

    # Dir-name for the TensorBoard log-files.
    log_dir = log_dir_name(learning_rate, num_dense_layers,
                           num_dense_nodes, activation)
    
    # Create a callback-function for Keras which will be
    # run after each epoch has ended during training.
    # This saves the log-files for TensorBoard.
    # Note that there are complications when histogram_freq=1.
    # It might give strange errors and it also does not properly
    # support Keras data-generators for the validation-set.
    callback_log = TensorBoard(
        log_dir=log_dir,
        histogram_freq=0,
        write_graph=True,
        # write_grads=False,
        write_images=False)
   
    # Use Keras to train the model.
    history = model.fit(x=X_train,
                        y=y_train,
                        epochs=2,
                        batch_size=1,
                        validation_data=validation_data,
                        callbacks=[callback_log])

    # Get the classification accuracy on the validation-set
    # after the last training-epoch.
    mse = history.history['mean_squared_error'][-1]

    # Print the classification accuracy.
    print()
    print("Mean Squared Error: 0:.2".format(mse))
    print()

    # Save the model if it improves on the best-found performance.
    # We use the global keyword so we update the variable outside
    # of this function.
    global best_mse
    #If the classification accuracy of the saved model is improved ...
    if mse < best_mse:
        # Save the new model to harddisk.
        model.save('B:/PRICE PREDICTOR/game_price_prediction/data/LSTM_model/best_lstm_model.keras')
        
        # Update the classification accuracy.
        best_mse = mse

    # Delete the Keras model with these hyper-parameters from memory.
    del model
    
    # Clear the Keras session, otherwise it will keep adding new
    # models to the same TensorFlow graph each time we create
    # a model with a different set of hyper-parameters.
    K.clear_session()
    
    # NOTE: Scikit-optimize does minimization so it tries to
    # find a set of hyper-parameters with the LOWEST fitness-value.
    # Because we are interested in the HIGHEST classification
    # accuracy, we need to negate this number so it can be minimized.
    return mse


In [None]:
fitness(x=default_parameters)

In [None]:
#trainPredict = model.predict(trainX)
default_parameters = [1e-5, 1, 16, 'relu']
model=create_model()
testPredict = model.predict(X_test)

# invert predictions
#trainPredict = scaler.inverse_transform(trainPredict)
unscaledtrainY = scaler.inverse_transform([y_train])
unscaledtestPredict = scaler.inverse_transform(testPredict)
unscaledtestY = scaler.inverse_transform([y_test])
rowunscaledtestY = unscaledtestY.reshape(-1,1)
# calculate root mean squared error
#trainScore = np.sqrt(mean_squared_error(scaledtrainY[0], trainPredict[:,0]))

#print(unscaledtestY, unscaledtestPredict)

#print(testPredict)
#print(testY)
#print(unscaledtestY)

In [None]:
testScore = np.sqrt(mean_squared_error(unscaledtestY[0], unscaledtestPredict[:,0]))  #Root mean squared error
r2 = r2_score(unscaledtestY[0], unscaledtestPredict[:, 0]) #R2 score - This provides an indication of the goodness of fit and therefore a measure of how well unseen samples are likely to be predicted by the model. It is the proportion of the variance in the dependent variable that is predictable from the independent variables.
explained_variance = explained_variance_score(unscaledtestY[0], unscaledtestPredict[:, 0])  #Explained variance score  this measures the proportion to which a mathematical model accounts for the variation (dispersion) of a given data set. It is the proportion of the variance in the dependent variable that is predictable from the independent variables
medae = median_absolute_error(unscaledtestY[0], unscaledtestPredict[:, 0])
mae = mean_absolute_error(unscaledtestY[0], unscaledtestPredict[:, 0])
mape = np.mean(np.abs((unscaledtestY[0] - unscaledtestPredict[:, 0]) / unscaledtestY[0])) * 100

print('Test Score: %.2f RMSE' % (testScore))
print('R2 Score: %.2f' % (r2))
print('Explained Variance: %.2f' % (explained_variance))
print('Median Absolute Error: %.2f' % (medae))
print('Mean Absolute Error: %.2f' % (mae))
print('Mean Absolute Percentage Error: %.2f' % (mape)+'%')

In [None]:
# # shift train predictions for plotting
# trainPredictPlot = np.empty_like(dataset)

# trainPredictPlot[:, :] = np.nan

# #trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict

plt.grid()
plt.title("LSTM - Predicted vs Actual")
plt.xlabel("Date")
plt.ylabel("Price (USD)")
plt.plot(unscaledtestPredict , label = "Predicted") #ȳ
plt.plot(rowunscaledtestY, label = "Actual") #ground truth values
plt.legend(loc="upper left")
plt.show()
print(testScore)