In [3]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, LSTM, Dropout
from keras import optimizers, regularizers
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix
import seaborn as sns
import plotly.graph_objects as go
from datetime import date, datetime, timedelta
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
import os
import locale
import time
import requests
from bs4 import BeautifulSoup
from csv import writer
import matplotlib 
matplotlib.use('WebAgg')
import matplotlib.pyplot as plt

print("Gold Price Forecast")

df = pd.read_csv('data/harga_emas_new2.csv')
df['Tanggal'] = pd.to_datetime(df['Tanggal'])
df.set_index('Tanggal', inplace=True)

df['Harga'] = df['Price1'].astype(str).str.replace('.', '').astype(float)
df.drop(['Price1', 'Price2', 'Price3', 'Price5', 'Price10', 'Price25', 'Price50', 'Price100'], axis=1, inplace=True)

# Prepare data for LSTM
data = df['Harga'].values
data = data.reshape(-1, 1)
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data)

train_size = int(len(scaled_data) * 0.8)
train_data = scaled_data[:train_size]
test_data = scaled_data[train_size:]

print(f"train_data: {len(train_data)}")
print(f"test_data: {len(test_data)}")

def create_dataset(dataset, look_back=1):
    X, Y = [], []
    for i in range(len(dataset) - look_back - 1):
        a = dataset[i:(i + look_back), 0]
        X.append(a)
        Y.append(dataset[i + look_back, 0])
    return np.array(X), np.array(Y)

look_back = 30
trainX, trainY = create_dataset(train_data, look_back)
testX, testY = create_dataset(test_data, look_back)

trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
testX = np.reshape(testX, (testX.shape[0], testX.shape[1], 1))

model_path = 'saved_test_model/model.h5'

if os.path.exists(model_path) == False:
    model = Sequential()
    model.add(LSTM(300, return_sequences=True, input_shape=(look_back, 1)))
    # model.add(Dropout(0.1))
    model.add(LSTM(150, return_sequences=False))
    # model.add(Dropout(0.1))
    model.add(Dense(25))
    model.add(Dense(1))

    learning_rate = 0.001
    batch_size = 10
    epochs = 40
    dropout_rate = 0.2

    adam = optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=adam, loss='mean_squared_error')
    model_test = model.fit(x=trainX, y=trainY, batch_size=batch_size, epochs=epochs, shuffle=True, validation_split=0.1)

    model.save(model_path)
    print("Model trained and saved")
    plt.close('all')
    plt.plot(model_test.history['loss'])
    plt.plot(model_test.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'validation'], loc='upper right')
    plt.show()
    
    # Calculate MAPE for train and test data
    train_mape = np.mean(np.abs((trainY - model.predict(trainX).flatten()) / trainY)) * 100
    test_mape = np.mean(np.abs((testY - model.predict(testX).flatten()) / testY)) * 100

    print(f"Train MAPE: {train_mape:.2f}%")
    print(f"Test MAPE: {test_mape:.2f}%")
else:
    model = load_model(model_path)
    # plt.close('all')
    # plt.plot(model.history['loss'])
    # plt.plot(model.history['val_loss'])
    # plt.title('model loss')
    # plt.ylabel('loss')
    # plt.xlabel('epoch')
    # plt.legend(['train', 'validation'], loc='upper right')
    # plt.show()
    print("Loaded model from disk")
    
    


Gold Price Forecast
train_data: 3055
test_data: 764
Epoch 1/40


  super().__init__(**kwargs)


[1m273/273[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 35ms/step - loss: 0.0041 - val_loss: 0.0011
Epoch 2/40
[1m273/273[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 30ms/step - loss: 5.7735e-04 - val_loss: 6.0316e-05
Epoch 3/40
[1m273/273[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 30ms/step - loss: 4.0746e-04 - val_loss: 9.6531e-05
Epoch 4/40
[1m273/273[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 30ms/step - loss: 4.3098e-04 - val_loss: 5.2690e-05
Epoch 5/40
[1m273/273[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 30ms/step - loss: 4.6350e-04 - val_loss: 3.9358e-05
Epoch 6/40
[1m273/273[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 31ms/step - loss: 4.0567e-04 - val_loss: 4.0034e-05
Epoch 7/40
[1m273/273[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 32ms/step - loss: 2.9712e-04 - val_loss: 3.6056e-05
Epoch 8/40
[1m273/273[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 30ms/step - loss: 1.3226e-04 - val_los



Model trained and saved
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step
[1m 4/23[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m0s[0m 19ms/step

  train_mape = np.mean(np.abs((trainY - model.predict(trainX).flatten()) / trainY)) * 100


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
Train MAPE: inf%
Test MAPE: 1.51%


In [4]:
# Predictions
train_predict = model.predict(trainX)
test_predict = model.predict(testX)

train_predict = scaler.inverse_transform(train_predict)
trainY = scaler.inverse_transform(trainY.reshape(-1, 1))
test_predict = scaler.inverse_transform(test_predict)
testY = scaler.inverse_transform(testY.reshape(-1, 1))
    
# Calculate accuracy metrics
train_mse = mean_squared_error(trainY, train_predict)
train_mae = mean_absolute_error(trainY, train_predict)
train_mape = mean_absolute_percentage_error(trainY, train_predict)
test_mse = mean_squared_error(testY, test_predict)
test_mae = mean_absolute_error(testY, test_predict)
test_mape = mean_absolute_percentage_error(testY, test_predict)

print("Train MSE:", train_mse)
print("Train MAE:", train_mae)
print(f"Train MAPE: {train_mape * 100}%")
print("Test MSE:", test_mse)
print("Test MAE:", test_mae)
print(f"Test MAPE: {test_mape * 100}%")

# Plotting
print('Actual vs Predicted Prices')
trace1 = go.Scatter(
    x=df.index[:train_size],
    y=trainY.flatten(),
    mode='lines',
    name='Actual Train Prices'
)
trace2 = go.Scatter(
    x=df.index[:train_size],
    y=train_predict.flatten(),
    mode='lines',
    name='Predicted Train Prices'
)
trace3 = go.Scatter(
    x=df.index[train_size:],
    y=testY.flatten(),
    mode='lines',
    name='Actual Test Prices'
)
trace4 = go.Scatter(
    x=df.index[train_size:],
    y=test_predict.flatten(),
    mode='lines',
    name='Predicted Test Prices'
)

data = [trace1, trace2, trace3, trace4]
layout = go.Layout(
    title='Gold Prices: Actual vs Predicted',
    xaxis={'title': 'Date'},
    yaxis={'title': 'Price'},
    hovermode='closest'
)
fig = go.Figure(data=data, layout=layout)
# plt.show()

[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
Train MSE: 193877927.84375775
Train MAE: 9208.062293320108
Train MAPE: 1.408095354054501%
Test MSE: 298809002.15057975
Test MAE: 8996.284276944065
Test MAPE: 0.7994399978315537%
Actual vs Predicted Prices


In [5]:
# Make predictions
predicted_prices = model.predict(testX).flatten()

# Define bins for the gold price ranges
bins = np.linspace(min(testY.min(), predicted_prices.min()), max(testY.max(), predicted_prices.max()), 10)
actual_bins = np.digitize(testY, bins)
predicted_bins = np.digitize(predicted_prices, bins)

# Create the confusion matrix
conf_matrix = confusion_matrix(actual_bins, predicted_bins)

# Plot the confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=bins, yticklabels=bins)
plt.xlabel('Predicted Price Bin')
plt.ylabel('Actual Price Bin')
plt.title('Confusion Matrix for Gold Price Prediction')
plt.show()

[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step


In [6]:
# Weighted Moving Average (WMA) Calculation
def wma(values, period):
    weights = np.arange(1, period + 1)
    return np.convolve(values, weights[::-1], 'valid') / weights.sum()
    
period = 7
wma_values = wma(df['Harga'].values, period)
    
# Adding NaN values to align the WMA with the original data length
wma_full = np.empty(len(df['Harga']))
wma_full[:period-1] = np.nan
wma_full[period-1:] = wma_values
    
df['WMA'] = wma_full

train_wma = df['WMA'].iloc[:train_size].dropna()
test_wma = df['WMA'].iloc[train_size:].dropna()
    
# Ensure the lengths of train_wma and trainY match
valid_length = min(len(train_wma), len(trainY))
train_wma = train_wma[-valid_length:]
trainY_wma = trainY[-valid_length:]
    
# Ensure the lengths of test_wma and testY match
valid_length = min(len(test_wma), len(testY))
test_wma = test_wma[-valid_length:]
testY_wma = testY[-valid_length:]
    
train_wma_mse = mean_squared_error(trainY_wma, train_wma)
train_wma_mae = mean_absolute_error(trainY_wma, train_wma)
train_wma_mape = mean_absolute_percentage_error(trainY_wma, train_wma)
test_wma_mse = mean_squared_error(testY_wma, test_wma)
test_wma_mae = mean_absolute_error(testY_wma, test_wma)
test_wma_mape = mean_absolute_percentage_error(testY_wma, test_wma)

print("Train WMA MSE:", train_wma_mse)
print("Train WMA MAE:", train_wma_mae)
print("Train WMA MAPE:", train_wma_mape)
print("Test WMA MSE:", test_wma_mse)
print("Test WMA MAE:", test_wma_mae)
print("Test WMA MAPE:", test_wma_mape)

print('Actual vs WMA Prices')
trace1 = go.Scatter(
    x=df.index[:train_size][-len(train_wma):],
    y=trainY_wma.flatten(),
    mode='lines',
    name='Actual Train Prices'
)
trace2 = go.Scatter(
    x=df.index[:train_size][-len(train_wma):],
    y=train_wma,
    mode='lines',
    name='WMA Train Prices'
)
trace3 = go.Scatter(
    x=df.index[train_size:][-len(test_wma):],
    y=testY_wma.flatten(),
    mode='lines',
    name='Actual Test Prices'
)
trace4 = go.Scatter(
    x=df.index[train_size:][-len(test_wma):],
    y=test_wma,
    mode='lines',
    name='WMA Test Prices'
)

data = [trace1, trace2, trace3, trace4]
layout = go.Layout(
    title='Gold Prices: Actual vs WMA',
    xaxis={'title': 'Date'},
    yaxis={'title': 'Price'},
    hovermode='closest'
)
fig = go.Figure(data=data, layout=layout)
plt.show(fig)

Train WMA MSE: 74819557.29057504
Train WMA MAE: 3107.896305744521
Train WMA MAPE: 0.004311294564672284
Test WMA MSE: 128723711.96090986
Test WMA MAE: 5092.233482751899
Test WMA MAPE: 0.004650845125704301
Actual vs WMA Prices


TypeError: FigureManagerWebAgg.pyplot_show() takes 1 positional argument but 2 were given