In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import torch
import torch.nn as nn
import time
import optuna

from sklearn.preprocessing import MinMaxScaler
from copy import deepcopy as dc
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score

# 1. PENGUMPULAN DATA

In [2]:
# Membaca data dari file CSV
data = pd.read_csv('AKRA.JK.csv')

data

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2005-09-29,53.200001,53.200001,52.799999,53.200001,26.296928,7425000
1,2005-09-30,54.000000,54.000000,53.200001,54.000000,26.692368,9337500
2,2005-10-03,54.000000,54.000000,53.599998,54.000000,26.692368,6300000
3,2005-10-04,54.000000,54.000000,53.599998,54.000000,26.692368,16937500
4,2005-10-05,54.799999,54.799999,54.000000,54.799999,27.087812,35300000
...,...,...,...,...,...,...,...
4586,2024-04-24,1805.000000,1810.000000,1785.000000,1790.000000,1790.000000,9951800
4587,2024-04-25,1785.000000,1785.000000,1700.000000,1735.000000,1735.000000,29677200
4588,2024-04-26,1730.000000,1730.000000,1570.000000,1600.000000,1600.000000,51436000
4589,2024-04-29,1630.000000,1655.000000,1580.000000,1650.000000,1650.000000,49231200


In [3]:
# Melihat baris dengan indeks dari 10 hingga 20 pada DataFrame
# untuk melihat data yang kosong
rows = data.iloc[10:21]
print(rows)

          Date       Open       High        Low      Close  Adj Close  \
10  2005-10-13  60.799999  61.599998  58.799999  60.799999  30.053625   
11  2005-10-14  62.000000  62.799999  61.200001  62.000000  30.646786   
12  2005-10-17  62.000000  62.000000  62.000000  62.000000  30.646786   
13  2005-10-18  62.799999  63.200001  61.599998  62.799999  31.042233   
14  2005-10-19  62.400002  62.799999  62.000000  62.400002  30.844517   
15  2005-10-20  62.400002  62.400002  62.400002  62.400002  30.844517   
16  2005-10-21  62.000000  62.400002  62.000000  62.000000  30.646786   
17  2005-10-24  62.000000  62.000000  61.200001  62.000000  30.646786   
18  2005-10-25  62.000000  62.000000  62.000000  62.000000  30.646786   
19  2005-10-26  61.599998  61.599998  58.799999  61.599998  30.449068   
20  2005-10-27  61.599998  61.599998  60.000000  61.599998  30.449068   

      Volume  
10  46937500  
11  35975000  
12         0  
13  22637500  
14  22937500  
15         0  
16  12600000  
17 

# 2. PREPROCESSING DATA: 
## - Mengisi kekosongan data, menormalisasi data, dan membagi data menjadi set latih dan set uji.

In [4]:
# Mengisi nilai 0 pada kolom 'Volume' dengan nilai rata-rata dari nilai sebelumnya
data['Volume'] = data['Volume'].replace(0, np.nan)
# Mengisi nilai NaN dengan nilai rata-rata dari nilai sebelumnya menggunakan ffill() (forward fill).
data['Volume'] = data['Volume'].ffill()

data

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2005-09-29,53.200001,53.200001,52.799999,53.200001,26.296928,7425000.0
1,2005-09-30,54.000000,54.000000,53.200001,54.000000,26.692368,9337500.0
2,2005-10-03,54.000000,54.000000,53.599998,54.000000,26.692368,6300000.0
3,2005-10-04,54.000000,54.000000,53.599998,54.000000,26.692368,16937500.0
4,2005-10-05,54.799999,54.799999,54.000000,54.799999,27.087812,35300000.0
...,...,...,...,...,...,...,...
4586,2024-04-24,1805.000000,1810.000000,1785.000000,1790.000000,1790.000000,9951800.0
4587,2024-04-25,1785.000000,1785.000000,1700.000000,1735.000000,1735.000000,29677200.0
4588,2024-04-26,1730.000000,1730.000000,1570.000000,1600.000000,1600.000000,51436000.0
4589,2024-04-29,1630.000000,1655.000000,1580.000000,1650.000000,1650.000000,49231200.0


In [5]:
# Melihat baris dengan indeks dari 10 hingga 20 pada DataFrame
# untuk melihat data yang sudah diisi dengan nilai rata-rata sebelumnya
rows = data.iloc[10:21]
print(rows)

          Date       Open       High        Low      Close  Adj Close  \
10  2005-10-13  60.799999  61.599998  58.799999  60.799999  30.053625   
11  2005-10-14  62.000000  62.799999  61.200001  62.000000  30.646786   
12  2005-10-17  62.000000  62.000000  62.000000  62.000000  30.646786   
13  2005-10-18  62.799999  63.200001  61.599998  62.799999  31.042233   
14  2005-10-19  62.400002  62.799999  62.000000  62.400002  30.844517   
15  2005-10-20  62.400002  62.400002  62.400002  62.400002  30.844517   
16  2005-10-21  62.000000  62.400002  62.000000  62.000000  30.646786   
17  2005-10-24  62.000000  62.000000  61.200001  62.000000  30.646786   
18  2005-10-25  62.000000  62.000000  62.000000  62.000000  30.646786   
19  2005-10-26  61.599998  61.599998  58.799999  61.599998  30.449068   
20  2005-10-27  61.599998  61.599998  60.000000  61.599998  30.449068   

        Volume  
10  46937500.0  
11  35975000.0  
12  35975000.0  
13  22637500.0  
14  22937500.0  
15  22937500.0  
16  

In [6]:
data = data[['Date', 'Close']]

data

Unnamed: 0,Date,Close
0,2005-09-29,53.200001
1,2005-09-30,54.000000
2,2005-10-03,54.000000
3,2005-10-04,54.000000
4,2005-10-05,54.799999
...,...,...
4586,2024-04-24,1790.000000
4587,2024-04-25,1735.000000
4588,2024-04-26,1600.000000
4589,2024-04-29,1650.000000


In [7]:
# Memeriksa ketersediaan GPU
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

device

'cpu'

In [8]:
# Memvisualisasikan data

fig = go.Figure(data=go.Scatter(x=data['Date'], y=data['Close'], mode='lines'))

fig.update_layout(
    height = 500, 
    width = 1000,
    xaxis_title='Date',
    yaxis_title='Close Price',
    xaxis_range=['2005', '2024'],
    yaxis_range=[0, 2000]
)

fig.show()

In [9]:
# Fungsi untuk mempersiapkan dataframe untuk model LSTM
def prepare_dataframe_for_lstm(df, n_steps):
    df = dc(df)
    df.set_index('Date', inplace=True)
    for i in range(1, n_steps + 1):
        df[f'Close(t-{i})'] = df['Close'].shift(i)
    df.dropna(inplace=True)
    return df

# Menyiapkan dataframe dengan lookback 7 hari
lookback = 7
shifted_df = prepare_dataframe_for_lstm(data, lookback)
shifted_df

Unnamed: 0_level_0,Close,Close(t-1),Close(t-2),Close(t-3),Close(t-4),Close(t-5),Close(t-6),Close(t-7)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2005-10-10,56.400002,54.799999,54.799999,54.799999,54.000000,54.000000,54.000000,53.200001
2005-10-11,57.599998,56.400002,54.799999,54.799999,54.799999,54.000000,54.000000,54.000000
2005-10-12,58.799999,57.599998,56.400002,54.799999,54.799999,54.799999,54.000000,54.000000
2005-10-13,60.799999,58.799999,57.599998,56.400002,54.799999,54.799999,54.799999,54.000000
2005-10-14,62.000000,60.799999,58.799999,57.599998,56.400002,54.799999,54.799999,54.799999
...,...,...,...,...,...,...,...,...
2024-04-24,1790.000000,1800.000000,1795.000000,1845.000000,1850.000000,1795.000000,1815.000000,1735.000000
2024-04-25,1735.000000,1790.000000,1800.000000,1795.000000,1845.000000,1850.000000,1795.000000,1815.000000
2024-04-26,1600.000000,1735.000000,1790.000000,1800.000000,1795.000000,1845.000000,1850.000000,1795.000000
2024-04-29,1650.000000,1600.000000,1735.000000,1790.000000,1800.000000,1795.000000,1845.000000,1850.000000


In [10]:
# Mengubah dataframe menjadi array numpy
shifted_df_as_np = shifted_df.to_numpy()

shifted_df_as_np

array([[  56.400002,   54.799999,   54.799999, ...,   54.      ,
          54.      ,   53.200001],
       [  57.599998,   56.400002,   54.799999, ...,   54.      ,
          54.      ,   54.      ],
       [  58.799999,   57.599998,   56.400002, ...,   54.799999,
          54.      ,   54.      ],
       ...,
       [1600.      , 1735.      , 1790.      , ..., 1845.      ,
        1850.      , 1795.      ],
       [1650.      , 1600.      , 1735.      , ..., 1795.      ,
        1845.      , 1850.      ],
       [1675.      , 1650.      , 1600.      , ..., 1800.      ,
        1795.      , 1845.      ]])

In [11]:
# Menormalisasi data menggunakan MinMaxScaler
# MinMaxScaler mengubah skala data menjadi rentang antara -1 dan 1
scaler = MinMaxScaler(feature_range=(-1, 1))
shifted_df_as_np = scaler.fit_transform(shifted_df_as_np)

shifted_df_as_np

array([[-1.        , -1.        , -1.        , ..., -1.        ,
        -1.        , -1.        ],
       [-0.99866191, -0.99821747, -1.        , ..., -1.        ,
        -1.        , -0.99910953],
       [-0.99732382, -0.99688057, -0.99821747, ..., -0.99910913,
        -1.        , -0.99910953],
       ...,
       [ 0.72123104,  0.87188057,  0.93315508, ...,  0.99443207,
         1.        ,  0.93878005],
       [ 0.77698483,  0.7214795 ,  0.87188057, ...,  0.93875278,
         0.99443207,  1.        ],
       [ 0.80486173,  0.7771836 ,  0.7214795 , ...,  0.94432071,
         0.93875278,  0.99443455]])

In [12]:
# Memisahkan data menjadi fitur (X) dan target (y)
# Fitur diambil dari kolom 1 hingga kolom terakhir, sedangkan target diambil dari kolom pertama (indeks 0).
X = shifted_df_as_np[:, 1:]
y = shifted_df_as_np[:, 0]

X.shape, y.shape

((4584, 7), (4584,))

In [13]:
# Membalik urutan fitur
X = dc(np.flip(X, axis=1))

X

array([[-1.        , -1.        , -1.        , ..., -1.        ,
        -1.        , -1.        ],
       [-0.99910953, -1.        , -1.        , ..., -1.        ,
        -1.        , -0.99821747],
       [-0.99910953, -1.        , -0.99910913, ..., -1.        ,
        -0.99821747, -0.99688057],
       ...,
       [ 0.93878005,  1.        ,  0.99443207, ...,  0.9442959 ,
         0.93315508,  0.87188057],
       [ 1.        ,  0.99443207,  0.93875278, ...,  0.93315508,
         0.87188057,  0.7214795 ],
       [ 0.99443455,  0.93875278,  0.94432071, ...,  0.87188057,
         0.7214795 ,  0.7771836 ]])

In [14]:
# Membagi data menjadi data latih (80%) dan data uji (20%)
split_index = int(len(X) * 0.80)
split_test = int(len(X) * 0.20)
split_index, split_test


(3667, 916)

In [15]:
# Indeks pemisahan ditentukan dengan split_index yang dihitung sebagai 80% dari panjang array X. 
# Data latih diambil dari awal hingga split_index, sedangkan data uji diambil dari split_index hingga akhir.
X_train = X[:split_index]
X_test = X[split_index:]

y_train = y[:split_index]
y_test = y[split_index:]

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((3667, 7), (917, 7), (3667,), (917,))

In [16]:
# Mengubah bentuk data latih dan data uji untuk model LSTM (batch_size, sequence_length, input_size)
# Dimensi data diubah menjadi (batch_size, sequence_length, input_size) untuk X_train dan X_test,
# serta (batch_size, 1) untuk y_train dan y_test.
X_train = X_train.reshape((-1, lookback, 1))
X_test = X_test.reshape((-1, lookback, 1))

y_train = y_train.reshape((-1, 1))
y_test = y_test.reshape((-1, 1))

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((3667, 7, 1), (917, 7, 1), (3667, 1), (917, 1))

In [17]:
# Mengubah data menjadi tensor PyTorch
X_train = torch.tensor(X_train).float()
y_train = torch.tensor(y_train).float()
X_test = torch.tensor(X_test).float()
y_test = torch.tensor(y_test).float()

X_train.shape, X_test.shape, y_train.shape, y_test.shape

(torch.Size([3667, 7, 1]),
 torch.Size([917, 7, 1]),
 torch.Size([3667, 1]),
 torch.Size([917, 1]))

# PENGEMBANGAN MODEL: 
## Membangun model LSTM berdasarkan data yang telah diproses dan melakukan tuning hyperparameter untuk mengoptimalkan kinerja model.

In [18]:
# Mendefinisikan kelas TimeSeriesDataset
class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        return self.X[i], self.y[i]
    
# Membuat objek DataLoader untuk data latih dan data uji    
train_dataset = TimeSeriesDataset(X_train, y_train)
test_dataset = TimeSeriesDataset(X_test, y_test)

In [19]:
# Mendefinisikan model LSTM menggunakan PyTorch.
class LSTM(nn.Module):
    # Model ini terdiri dari lapisan LSTM (nn.LSTM) dengan jumlah lapisan yang ditumpuk (num_stacked_layers) 
    # dan ukuran hidden state (hidden_size), serta lapisan fully connected (nn.Linear) untuk menghasilkan output. 
    def __init__(self, input_size, hidden_size, num_stacked_layers):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_stacked_layers = num_stacked_layers

        self.lstm = nn.LSTM(input_size, hidden_size, num_stacked_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    # Metode forward() mendefinisikan alur forward pass dari model, 
    # di mana input x dilewatkan melalui lapisan LSTM dan lapisan fully connected untuk menghasilkan output.
    def forward(self, x):
        batch_size = x.size(0)
        h0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(device)
        c0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(device)

        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

In [20]:
# Fungsi untuk melatih model selama satu epoch menggunakan data latih
def train_one_epoch(model, train_loader, loss_function, optimizer, batch_size):
    # Model diatur ke mode pelatihan menggunakan model.train(True)
    model.train(True)
    running_loss = 0.0

    for batch_index, batch in enumerate(train_loader):
        # Kemudian, untuk setiap batch dalam data loader, input batch (x_batch) 
        # dan target batch (y_batch) diambil dan dipindahkan ke perangkat (GPU jika tersedia). 
        # Output model dihitung menggunakan model(x_batch),
        x_batch, y_batch = batch[0].to(device), batch[1].to(device)

        output = model(x_batch)
        # loss dihitung menggunakan fungsi loss.
        loss = loss_function(output, y_batch)
        running_loss += loss.item()

        # Gradien di-reset menggunakan optimizer.zero_grad()
        optimizer.zero_grad()
        # Backpropagation dilakukan menggunakan loss.backward(),
        loss.backward()
        # dan bobot model diperbarui menggunakan optimizer.step().
        optimizer.step()

    return running_loss / len(train_loader)

In [21]:
# Fungsi untuk mengevaluasi model pada data validasi selama satu epoch
def validate_one_epoch(model, test_loader, loss_function):
    # model diatur ke mode evaluasi menggunakan model.train(False)
    model.train(False)
    running_loss = 0.0

    for batch_index, batch in enumerate(test_loader):
        # untuk setiap batch dalam data loader, input batch (x_batch) 
        # dan target batch (y_batch) diambil dan dipindahkan ke perangkat (GPU jika tersedia)
        x_batch, y_batch = batch[0].to(device), batch[1].to(device)

        with torch.no_grad():
            # Output model dihitung menggunakan model(x_batch) tanpa melakukan perhitungan gradien menggunakan torch.no_grad()
            output = model(x_batch)
            loss = loss_function(output, y_batch)
            running_loss += loss.item()

    return running_loss / len(test_loader)

In [22]:
# Fungsi objektif untuk hyperparameter tuning
# menemukan kombinasi hyperparameter yang menghasilkan nilai val_loss terendah.
def objective(trial):
    hidden_size = trial.suggest_int('hidden_size', 2, 128) # 2, 128
    num_stacked_layers = trial.suggest_int('num_stacked_layers', 1, 5) # 1, 5
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1) # 1e-5 (0.00001), 1e-1 (0.1)
    batch_size = trial.suggest_int('batch_size', 8, 128) # 8, 128

    model = LSTM(1, hidden_size, num_stacked_layers)
    model.to(device)

    loss_function = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    for epoch in range(num_epochs): # type: ignore
        train_loss = train_one_epoch(model, train_loader, loss_function, optimizer, batch_size)
        val_loss = validate_one_epoch(model, test_loader, loss_function)

    return val_loss

In [23]:
# Hyperparameter tuning menggunakan Optuna
num_epochs = 100
start_time = time.time()

# Mencari nilai terkecil dari fungsi objective.
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=5)

end_time = time.time()  # Mencatat waktu akhir
tuning_time = end_time - start_time  # Menghitung durasi tuning

# Menampilkan waktu tuning
print(f"Waktu yang dibutuhkan untuk tuning hyperparameter: {tuning_time:.2f} detik")

[I 2024-06-20 22:06:25,183] A new study created in memory with name: no-name-e9cb58dc-455b-4f68-a775-99b56bd737f2

suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.

[I 2024-06-20 22:07:18,567] Trial 0 finished with value: 0.0034094195370562375 and parameters: {'hidden_size': 31, 'num_stacked_layers': 4, 'learning_rate': 9.84940168660733e-05, 'batch_size': 37}. Best is trial 0 with value: 0.0034094195370562375.

suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.

[I 2024-06-20 22:08:26,692] Trial 1 finished with value: 0.0011654371892291238 and parameters: {'hidden_size': 46, 'num_stacked_layers': 5, 'learning_rate': 0.001991033121783432, 'batch_size': 58}. Best is trial 1 with value: 0.0011654371892291238.
[I 2

Waktu yang dibutuhkan untuk tuning hyperparameter: 247.99 detik


In [24]:
best_params = study.best_params
print("Hyperparameters terbaik:", best_params)

Hyperparameters terbaik: {'hidden_size': 68, 'num_stacked_layers': 4, 'learning_rate': 0.0009631577743539275, 'batch_size': 104}


In [25]:
# Latih model dengan hyperparameter terbaik
# Mengambil nilai hyperparameter terbaik yang diperoleh dari Optuna dan menggunakannya untuk melatih model
hidden_size = best_params['hidden_size']
num_stacked_layers = best_params['num_stacked_layers']
learning_rate = best_params['learning_rate']
batch_size = best_params['batch_size']


In [26]:
# Model LSTM dibuat dengan ukuran hidden state (hidden_size),
# jumlah lapisan yang ditumpuk (num_stacked_layers), dan dipindahkan ke perangkat (GPU jika tersedia). 
model = LSTM(1, hidden_size, num_stacked_layers)
model.to(device)

model

LSTM(
  (lstm): LSTM(1, 68, num_layers=4, batch_first=True)
  (fc): Linear(in_features=68, out_features=1, bias=True)
)

In [27]:
# Menginisialisasi fungsi MSE Loss untuk mengukur perbedaan antara prediksi dan nilai sebenarnya, 
# serta optimizer Adam untuk memperbarui parameter model berdasarkan fungsi gradien kerugian dengan learning rate yang ditentukan.

loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [28]:
# Data loader untuk data latih (train_loader) dan data uji (test_loader) dibuat dengan ukuran batch (batch_size) yang sesuai.
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [29]:
start_time = time.time()

# Pada setiap epoch, fungsi train_one_epoch() dipanggil untuk melatih model pada data latih, 
# dan fungsi validate_one_epoch() dipanggil untuk mengevaluasi model pada data validasi.
for epoch in range(num_epochs):
    train_loss = train_one_epoch(model, train_loader, loss_function, optimizer, batch_size)
    val_loss = validate_one_epoch(model, test_loader, loss_function)
    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
end_time = time.time()
execution_time = end_time - start_time

print(f"Waktu kompilasi: {execution_time:.2f} detik")

Epoch [1/100], Train Loss: 0.1783, Val Loss: 0.1057
Epoch [2/100], Train Loss: 0.0126, Val Loss: 0.0156
Epoch [3/100], Train Loss: 0.0031, Val Loss: 0.0043
Epoch [4/100], Train Loss: 0.0024, Val Loss: 0.0053
Epoch [5/100], Train Loss: 0.0023, Val Loss: 0.0033
Epoch [6/100], Train Loss: 0.0022, Val Loss: 0.0034
Epoch [7/100], Train Loss: 0.0023, Val Loss: 0.0039
Epoch [8/100], Train Loss: 0.0022, Val Loss: 0.0040
Epoch [9/100], Train Loss: 0.0022, Val Loss: 0.0034
Epoch [10/100], Train Loss: 0.0022, Val Loss: 0.0035
Epoch [11/100], Train Loss: 0.0022, Val Loss: 0.0032
Epoch [12/100], Train Loss: 0.0022, Val Loss: 0.0035
Epoch [13/100], Train Loss: 0.0022, Val Loss: 0.0040
Epoch [14/100], Train Loss: 0.0023, Val Loss: 0.0039
Epoch [15/100], Train Loss: 0.0022, Val Loss: 0.0031
Epoch [16/100], Train Loss: 0.0022, Val Loss: 0.0032
Epoch [17/100], Train Loss: 0.0021, Val Loss: 0.0033
Epoch [18/100], Train Loss: 0.0021, Val Loss: 0.0035
Epoch [19/100], Train Loss: 0.0021, Val Loss: 0.0029
Ep

In [30]:
# Melakukan prediksi pada data latih
with torch.no_grad():
    # Data latih (X_train) dipindahkan ke perangkat (GPU jika tersedia), 
    # dilewatkan ke model untuk mendapatkan output, kemudian dipindahkan kembali ke CPU dan diubah menjadi array numpy.
    predicted = model(X_train.to(device)).to('cpu').numpy()

In [31]:
# Memvisualisasikan hasil prediksi pada data latih
# Nilai-nilai pada sumbu y yang berkisar dari -1 hingga 1 menunjukkan rentang nilai hasil normalisasi dari data Close Price asli. 
# MinMaxScaler mengubah skala data menjadi rentang antara -1 dan 1.
# Sumbu y mewakili nilai Close Price (harga penutupan) yang telah dinormalisasi menggunakan MinMaxScaler.
# nilai-nilai tersebut tidak mewakili harga penutupan sebenarnya, 
# melainkan nilai yang telah diubah skalanya ke rentang tertentu (dalam hal ini, antara -1 dan 1).
fig = go.Figure(data=[
    go.Scatter(y=y_train.flatten(), mode='lines', name='Normalized Actual Close Price'),
    go.Scatter(y=predicted.flatten(), mode='lines', name='Normalized Predicted Close Price')
])


fig.update_layout(
    height = 500, 
    width = 1000,
    title= "Hasil Prediksi Data",
    xaxis_title='Day',
    yaxis_title='Normalized Close Price',
    xaxis_range=[0,3700],
)

fig.show()

In [32]:
fig = go.Figure(data=[
    go.Scatter(x=data['Date'], y=y_train.flatten(), mode='lines', name='Normalized Actual Close Price'),
    go.Scatter(x=data['Date'], y=predicted.flatten(), mode='lines', name='Normalized Predicted Close Price')
])


fig.update_layout(
    title= "Hasil Prediksi Data Latih",
    xaxis_title='Day',
    yaxis_title='Normalized Close Price',
    

)

fig.show()

In [33]:
# Memvisualisasikan hasil prediksi pada data latih dengan waktu 7 hari
fig = go.Figure(data=[
    go.Scatter(y=y_train.flatten(), name='Normalized Actual Close Price'),
    go.Scatter(y=predicted.flatten(), name='Normalized Predicted Close Price')
])

fig.update_layout(
    height = 500, 
    width = 1000,
    title= "Data Latih 7 Hari",
    xaxis_title='Day',
    yaxis_title='Normalized Close Price',
    xaxis_range=[3000,3700]
)

fig.show()

In [34]:
# Untuk mendapatkan nilai harga penutupan sebenarnya, 
# perlu dilakukan inverse scaling menggunakan MinMaxScaler untuk mengubah kembali nilai-nilai yang dinormalisasi ke skala aslinya.

# Melakukan inverse scaling pada hasil prediksi data latih
train_predictions = predicted.flatten()
dummies = np.zeros((X_train.shape[0], lookback+1))
dummies[:, 0] = train_predictions
dummies = scaler.inverse_transform(dummies)
train_predictions = dc(dummies[:, 0])

In [35]:
# Melakukan inverse scaling pada nilai aktual data latih
dummies = np.zeros((X_train.shape[0], lookback+1))
dummies[:, 0] = y_train.flatten()
dummies = scaler.inverse_transform(dummies)
new_y_train = dc(dummies[:, 0])

In [36]:
# Melakukan prediksi pada data uji
test_predictions = model(X_test.to(device)).detach().cpu().numpy().flatten()

In [37]:
# Melakukan inverse scaling pada hasil prediksi data uji
dummies = np.zeros((X_test.shape[0], lookback+1))
dummies[:, 0] = test_predictions
dummies = scaler.inverse_transform(dummies)
test_predictions = dc(dummies[:, 0])

In [38]:
# Melakukan inverse scaling pada nilai aktual data uji
dummies = np.zeros((X_test.shape[0], lookback+1))
dummies[:, 0] = y_test.flatten()
dummies = scaler.inverse_transform(dummies)
new_y_test = dc(dummies[:, 0])

# EVALUASI MODEL: 
## Menggunakan metrik standar untuk mengevaluasi kinerja model dan membandingkan hasil prediksi dengan data aktual.

In [39]:
print("==============================================================")
# Menghitung metrik evaluasi pada data latih
mse_train = mean_squared_error(new_y_train, train_predictions)
mae_train = mean_absolute_error(new_y_train, train_predictions)
mape_train = mean_absolute_percentage_error(new_y_train, train_predictions)
rmse_train = np.sqrt(mean_squared_error(new_y_train, train_predictions))

print("Metrik Evaluasi pada Data Latih:")
print("Mean Squared Error (MSE):", mse_train)
print("Mean Absolute Error (MAE):", mae_train)
print("Mean Absolute Percentage Error (MAPE):", mape_train)
print("Root Mean Squared Error (RMSE):", rmse_train)

# Menghitung akurasi (R^2) pada data latih
train_r2 = r2_score(new_y_train, train_predictions)
print(f"Akurasi (R^2) pada Data Latih: {train_r2:.4f}")

print("==============================================================")

# Menghitung metrik evaluasi pada data uji
mse_test = mean_squared_error(new_y_test, test_predictions)
mae_test = mean_absolute_error(new_y_test, test_predictions)
mape_test = mean_absolute_percentage_error(new_y_test, test_predictions)
rmse_test = np.sqrt(mean_squared_error(new_y_test, test_predictions))

print("Metrik Evaluasi pada Data Uji:")
print("Mean Squared Error (MSE):", mse_test)
print("Mean Absolute Error (MAE):", mae_test)
print("Mean Absolute Percentage Error (MAPE):", mape_test)
print("Root Mean Squared Error (RMSE):", rmse_test)

# Menghitung akurasi (R^2) pada data uji
test_r2 = r2_score(new_y_test, test_predictions)
print(f"Akurasi (R^2) pada Data Uji: {test_r2:.4f}")


Metrik Evaluasi pada Data Latih:
Mean Squared Error (MSE): 546.7812959628554
Mean Absolute Error (MAE): 13.792908787849033
Mean Absolute Percentage Error (MAPE): 0.033822265055411666
Root Mean Squared Error (RMSE): 23.38335510492144
Akurasi (R^2) pada Data Latih: 0.9972
Metrik Evaluasi pada Data Uji:
Mean Squared Error (MSE): 795.5904998759346
Mean Absolute Error (MAE): 19.706411710383733
Mean Absolute Percentage Error (MAPE): 0.018584690982610123
Root Mean Squared Error (RMSE): 28.206213852198147
Akurasi (R^2) pada Data Uji: 0.9946


In [40]:
# Memvisualisasikan hasil prediksi pada data test
fig = go.Figure(data=[
    go.Scatter(y=new_y_test, mode='lines', name='Actual Close Price'),
    go.Scatter(y=test_predictions, mode='lines', name='Predicted Close Price')
])
fig.update_layout(
    height = 500, 
    width = 1000,
    title= "Actual vs Predicted Stock Prices",
    xaxis_title='Day',
    yaxis_title='Close Price',
    xaxis_range=[0, 950],
    
)
fig.show()

In [41]:
fig = go.Figure(data=[
    go.Scatter(y=new_y_test, name='Actual Close Price'),
    go.Scatter(y=test_predictions, name='Predicted Close Price')
])
fig.update_layout(
    xaxis_title='Day',
    yaxis_title='Close',
    title= "Actual vs Predicted Stock Prices",
    xaxis_range=[800, 950],
    yaxis_range=[1000, 1900]
)
fig.show()

In [42]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=data['Date'][-len(new_y_test):], y=new_y_test, mode='lines', name='Actual Price'))
fig.add_trace(go.Scatter(x=data['Date'][-len(test_predictions):], y=test_predictions, mode='lines', name='Predicted Price'))
fig.update_layout(title='Actual vs Predicted Stock Prices', xaxis_title='Date', yaxis_title='Price')
fig.show()

In [43]:
# Mengambil data terakhir sebanyak lookback hari dari data uji
last_data = X_test[-1].cpu().numpy()

In [44]:
# Melakukan prediksi harga selama 7 hari ke depan
future_predictions = []
for _ in range(7):
    # Reshape data menjadi bentuk yang sesuai dengan input model
    input_data = last_data.reshape(1, lookback, 1)
    input_data = torch.tensor(input_data).float().to(device)
    
    # Melakukan prediksi menggunakan model
    with torch.no_grad():
        predicted_price = model(input_data).cpu().numpy()[0][0]
    
    # Menambahkan harga prediksi ke dalam data dan menghapus data paling awal
    last_data = np.append(last_data[1:], predicted_price)
    
    # Menambahkan harga prediksi ke dalam list
    future_predictions.append(predicted_price)

In [45]:
# Melakukan inverse scaling pada hasil prediksi
# Melakukan inverse scaling pada hasil prediksi
dummies = np.zeros((len(future_predictions), lookback+1))
dummies[:, 0] = future_predictions
dummies = scaler.inverse_transform(dummies)
future_predictions = dummies[:, 0]

In [46]:
# Memvisualisasikan hasil prediksi harga selama 7 hari ke depan
fig = go.Figure(data=[
    go.Scatter(y=future_predictions, name='Predicted Close Price')
])
fig.update_layout(
    title='Prediksi Harga Selama 7 Hari ke Depan',
    xaxis_title='Day',
    yaxis_title='Close Price'
)
fig.show()

In [47]:
# Melakukan prediksi harga selama 14 hari ke depan
future_predictions_14 = []
last_data_14 = X_test[-1].cpu().numpy()
for _ in range(14):
    input_data_14 = last_data_14.reshape(1, lookback, 1)
    input_data_14 = torch.tensor(input_data_14).float().to(device)
    
    with torch.no_grad():
        predicted_price_14 = model(input_data_14).cpu().numpy()[0][0]
    
    last_data_14 = np.append(last_data_14[1:], predicted_price_14)
    future_predictions_14.append(predicted_price_14)

dummies_14 = np.zeros((len(future_predictions_14), lookback+1))
dummies_14[:, 0] = future_predictions_14
dummies_14 = scaler.inverse_transform(dummies_14)
future_predictions_14 = dummies_14[:, 0]

# Memvisualisasikan hasil prediksi harga selama 14 hari ke depan
fig_14 = go.Figure(data=[
    go.Scatter(y=future_predictions_14, name='Predicted Close Price (14 days)')
])
fig_14.update_layout(
    title='Prediksi Harga Selama 14 Hari ke Depan',
    xaxis_title='Day',
    yaxis_title='Close Price'
)
fig_14.show()


In [48]:
# Melakukan prediksi harga selama 30 hari ke depan
future_predictions_30 = []
last_data_30 = X_test[-1].cpu().numpy()
for _ in range(30):
    input_data_30 = last_data_30.reshape(1, lookback, 1)
    input_data_30 = torch.tensor(input_data_30).float().to(device)
    
    with torch.no_grad():
        predicted_price_30 = model(input_data_30).cpu().numpy()[0][0]
    
    last_data_30 = np.append(last_data_30[1:], predicted_price_30)
    future_predictions_30.append(predicted_price_30)

dummies_30 = np.zeros((len(future_predictions_30), lookback+1))
dummies_30[:, 0] = future_predictions_30
dummies_30 = scaler.inverse_transform(dummies_30)
future_predictions_30 = dummies_30[:, 0]

# Memvisualisasikan hasil prediksi harga selama 30 hari ke depan
fig_30 = go.Figure(data=[
    go.Scatter(y=future_predictions_30, name='Predicted Close Price (30 days)')
])
fig_30.update_layout(
    title='Prediksi Harga Selama 30 Hari ke Depan',
    xaxis_title='Day',
    yaxis_title='Close Price'
)
fig_30.show()

In [49]:
# Mengambil tanggal terakhir dari data
last_date = data['Date'].iloc[-1]

# Membuat rentang tanggal untuk prediksi 30 hari ke depan
future_dates = pd.date_range(start=last_date, periods=31, freq='D')[1:]

# Memvisualisasikan hasil prediksi harga 30 hari ke depan pada grafik yang diberikan
fig = go.Figure(data=[
    go.Scatter(x=data['Date'], y=data['Close'], name='Actual Close Price'),
    go.Scatter(x=future_dates, y=future_predictions_30, name='Predicted Close Price (30 days)')
])

fig.update_layout(
    height = 500, 
    width = 1000,
    title='Predicted Close Price (30 days)',
    xaxis_title='Date',
    yaxis_title='Close Price',
    xaxis_range=['2005', '2025'],
    yaxis_range=[0, 2000]
)

fig.show()

In [50]:
fig = go.Figure(data=[
    go.Scatter(x=data['Date'], y=data['Close'], name='Actual Close Price'),
    go.Scatter(x=future_dates, y=future_predictions_30, name='Predicted Close Price (30 days)')
])

fig.update_layout(
    height = 500, 
    width = 1000,
    title='Prediksi Harga Penutupan (30 hari)',
    xaxis_title='Date',
    yaxis_title='Close Price',
    xaxis_range=['2020', '2025'],
    yaxis_range=[0, 2000]
)

fig.show()

In [51]:
fig = go.Figure(data=[
    go.Scatter(x=data['Date'], y=data['Close'], name='Actual Close Price'),
    go.Scatter(x=future_dates, y=future_predictions_30, name='Predicted Close Price (30 days)')
])

fig.update_layout(
    height = 500, 
    width = 1000,
    title='Prediksi Harga Penutupan (30 hari)',
    xaxis_title='Date',
    yaxis_title='Close Price',
    xaxis_range=['2024', '2025'],
    yaxis_range=[0, 2000]
)

In [64]:
import plotly.graph_objects as go
import pandas as pd
import numpy as np

# Memfilter data dari Januari 2020 hingga Mei 2024
start_date = '2020-01-01'
end_date = '2024-05-31'
filtered_actual = data.loc[(data['Date'] >= start_date) & (data['Date'] <= end_date), 'Close']

# Menghitung jumlah hari dari data aktual yang tersedia
num_days = len(filtered_actual)

# Membuat Series untuk prediksi dengan index yang sama dengan data aktual
filtered_predicted = pd.Series(index=filtered_actual.index, dtype=float)

# Mengisi nilai prediksi yang tersedia
available_predictions = min(len(test_predictions), num_days)
filtered_predicted.iloc[-available_predictions:] = test_predictions[-available_predictions:]

# Mengisi nilai yang tidak tersedia dengan NaN
filtered_predicted.fillna(np.nan, inplace=True)

# Memvisualisasikan prediksi vs aktual dari Januari 2020 hingga Mei 2024
fig = go.Figure(data=[
    go.Scatter(x=filtered_actual.index, y=filtered_actual, name='Actual Close Price'),
    go.Scatter(x=filtered_predicted.index, y=filtered_predicted, name='Predicted Close Price')
])

fig.update_layout(
    height=500,
    width=1000,
    title='Actual vs Predicted Stock Prices (Jan 2020 - May 2024)',
    xaxis_title='Date',
    yaxis_title='Close Price',
    xaxis_range=[start_date, end_date],
    yaxis_range=[filtered_actual.min() * 0.9, filtered_actual.max() * 1.1]
)

fig.show()

In [None]:
# Menyimpan model yang sudah dilatih
torch.save(model.state_dict(), 'model.pth')
print('Model berhasil disimpan ke file model.pth')

Model berhasil disimpan ke file model.pth
