In [34]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import plotly.figure_factory as ff
import torch
import torch.nn as nn
import time

from sklearn.preprocessing import MinMaxScaler
from copy import deepcopy as dc
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score


In [35]:
# Membaca data dari file CSV
data = pd.read_csv('AKRA.JK.csv')

data

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2005-09-29,53.200001,53.200001,52.799999,53.200001,26.296928,7425000
1,2005-09-30,54.000000,54.000000,53.200001,54.000000,26.692368,9337500
2,2005-10-03,54.000000,54.000000,53.599998,54.000000,26.692368,6300000
3,2005-10-04,54.000000,54.000000,53.599998,54.000000,26.692368,16937500
4,2005-10-05,54.799999,54.799999,54.000000,54.799999,27.087812,35300000
...,...,...,...,...,...,...,...
4586,2024-04-24,1805.000000,1810.000000,1785.000000,1790.000000,1790.000000,9951800
4587,2024-04-25,1785.000000,1785.000000,1700.000000,1735.000000,1735.000000,29677200
4588,2024-04-26,1730.000000,1730.000000,1570.000000,1600.000000,1600.000000,51436000
4589,2024-04-29,1630.000000,1655.000000,1580.000000,1650.000000,1650.000000,49231200


In [36]:
# Menampilkan baris ke-10 hingga ke-20 dari data
rows = data.iloc[10:21]
print(rows)

          Date       Open       High        Low      Close  Adj Close  \
10  2005-10-13  60.799999  61.599998  58.799999  60.799999  30.053625   
11  2005-10-14  62.000000  62.799999  61.200001  62.000000  30.646786   
12  2005-10-17  62.000000  62.000000  62.000000  62.000000  30.646786   
13  2005-10-18  62.799999  63.200001  61.599998  62.799999  31.042233   
14  2005-10-19  62.400002  62.799999  62.000000  62.400002  30.844517   
15  2005-10-20  62.400002  62.400002  62.400002  62.400002  30.844517   
16  2005-10-21  62.000000  62.400002  62.000000  62.000000  30.646786   
17  2005-10-24  62.000000  62.000000  61.200001  62.000000  30.646786   
18  2005-10-25  62.000000  62.000000  62.000000  62.000000  30.646786   
19  2005-10-26  61.599998  61.599998  58.799999  61.599998  30.449068   
20  2005-10-27  61.599998  61.599998  60.000000  61.599998  30.449068   

      Volume  
10  46937500  
11  35975000  
12         0  
13  22637500  
14  22937500  
15         0  
16  12600000  
17 

In [37]:
# Mengisi nilai 0 pada kolom 'Volume' dengan nilai rata-rata dari nilai sebelumnya
data['Volume'] = data['Volume'].replace(0, np.nan)
data['Volume'] = data['Volume'].ffill()

data

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2005-09-29,53.200001,53.200001,52.799999,53.200001,26.296928,7425000.0
1,2005-09-30,54.000000,54.000000,53.200001,54.000000,26.692368,9337500.0
2,2005-10-03,54.000000,54.000000,53.599998,54.000000,26.692368,6300000.0
3,2005-10-04,54.000000,54.000000,53.599998,54.000000,26.692368,16937500.0
4,2005-10-05,54.799999,54.799999,54.000000,54.799999,27.087812,35300000.0
...,...,...,...,...,...,...,...
4586,2024-04-24,1805.000000,1810.000000,1785.000000,1790.000000,1790.000000,9951800.0
4587,2024-04-25,1785.000000,1785.000000,1700.000000,1735.000000,1735.000000,29677200.0
4588,2024-04-26,1730.000000,1730.000000,1570.000000,1600.000000,1600.000000,51436000.0
4589,2024-04-29,1630.000000,1655.000000,1580.000000,1650.000000,1650.000000,49231200.0


In [38]:
# Menampilkan baris ke-10 hingga ke-20 setelah pengisian nilai 0 pada kolom 'Volume'
rows = data.iloc[10:21]
print(rows)

          Date       Open       High        Low      Close  Adj Close  \
10  2005-10-13  60.799999  61.599998  58.799999  60.799999  30.053625   
11  2005-10-14  62.000000  62.799999  61.200001  62.000000  30.646786   
12  2005-10-17  62.000000  62.000000  62.000000  62.000000  30.646786   
13  2005-10-18  62.799999  63.200001  61.599998  62.799999  31.042233   
14  2005-10-19  62.400002  62.799999  62.000000  62.400002  30.844517   
15  2005-10-20  62.400002  62.400002  62.400002  62.400002  30.844517   
16  2005-10-21  62.000000  62.400002  62.000000  62.000000  30.646786   
17  2005-10-24  62.000000  62.000000  61.200001  62.000000  30.646786   
18  2005-10-25  62.000000  62.000000  62.000000  62.000000  30.646786   
19  2005-10-26  61.599998  61.599998  58.799999  61.599998  30.449068   
20  2005-10-27  61.599998  61.599998  60.000000  61.599998  30.449068   

        Volume  
10  46937500.0  
11  35975000.0  
12  35975000.0  
13  22637500.0  
14  22937500.0  
15  22937500.0  
16  

In [39]:
# Menggunakan hanya kolom 'Date' dan 'Close'
data = data[['Date', 'Close']]

data

Unnamed: 0,Date,Close
0,2005-09-29,53.200001
1,2005-09-30,54.000000
2,2005-10-03,54.000000
3,2005-10-04,54.000000
4,2005-10-05,54.799999
...,...,...
4586,2024-04-24,1790.000000
4587,2024-04-25,1735.000000
4588,2024-04-26,1600.000000
4589,2024-04-29,1650.000000


In [40]:
# Memeriksa ketersediaan GPU
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

device

'cpu'

In [41]:
# 1. Visualisasi Data Asli
fig1 = go.Figure()
fig1.add_trace(go.Scatter(x=data['Date'], y=data['Close'], mode='lines', name='Close Price'))
fig1.update_layout(title='Stock Close Price Over Time', xaxis_title='Date', yaxis_title='Close Price')
fig1.show()

In [42]:
# Fungsi untuk mempersiapkan dataframe untuk model LSTM
def prepare_dataframe_for_lstm(df, n_steps):
    df = dc(df)
    df.set_index('Date', inplace=True)
    for i in range(1, n_steps + 1):
        df[f'Close(t-{i})'] = df['Close'].shift(i)
    df.dropna(inplace=True)
    return df

# Menyiapkan dataframe dengan lookback 7 hari
lookback = 7
shifted_df = prepare_dataframe_for_lstm(data, lookback)

shifted_df

Unnamed: 0_level_0,Close,Close(t-1),Close(t-2),Close(t-3),Close(t-4),Close(t-5),Close(t-6),Close(t-7)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2005-10-10,56.400002,54.799999,54.799999,54.799999,54.000000,54.000000,54.000000,53.200001
2005-10-11,57.599998,56.400002,54.799999,54.799999,54.799999,54.000000,54.000000,54.000000
2005-10-12,58.799999,57.599998,56.400002,54.799999,54.799999,54.799999,54.000000,54.000000
2005-10-13,60.799999,58.799999,57.599998,56.400002,54.799999,54.799999,54.799999,54.000000
2005-10-14,62.000000,60.799999,58.799999,57.599998,56.400002,54.799999,54.799999,54.799999
...,...,...,...,...,...,...,...,...
2024-04-24,1790.000000,1800.000000,1795.000000,1845.000000,1850.000000,1795.000000,1815.000000,1735.000000
2024-04-25,1735.000000,1790.000000,1800.000000,1795.000000,1845.000000,1850.000000,1795.000000,1815.000000
2024-04-26,1600.000000,1735.000000,1790.000000,1800.000000,1795.000000,1845.000000,1850.000000,1795.000000
2024-04-29,1650.000000,1600.000000,1735.000000,1790.000000,1800.000000,1795.000000,1845.000000,1850.000000


In [43]:
# Normalisasi data
scaler = MinMaxScaler()
shifted_df_scaled = scaler.fit_transform(shifted_df)

In [44]:
# Membagi data menjadi training dan testing
train_size = int(len(shifted_df_scaled) * 0.8)
train, test = shifted_df_scaled[:train_size], shifted_df_scaled[train_size:]

In [45]:
# Membuat dataset dan dataloader
class StockDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return torch.tensor(self.data[idx, 1:], dtype=torch.float32), torch.tensor(self.data[idx, 0], dtype=torch.float32)

train_dataset = StockDataset(train)
test_dataset = StockDataset(test)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [46]:
# Definisi model LSTM
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

In [47]:
# Inisialisasi parameter model

# Menentukan dimensi input, 
# yang berarti hanya menggunakan harga penutupan saham sebagai input.
input_size = 1

# Menentukan jumlah unit tersembunyi dalam setiap lapisan LSTM.
# Nilai ini menentukan kapasitas model dalam mempelajari pola dan representasi dalam data.
hidden_size = 50

# Menentukan jumlah lapisan LSTM yang akan digunakan dalam model.
# Lapisan LSTM dapat disusun secara bertingkat untuk meningkatkan kemampuan model 
# dalam mempelajari pola yang lebih kompleks.
num_layers = 2

# Menentukan dimensi output
# yang berarti model akan menghasilkan prediksi harga penutupan saham untuk satu periode ke depan.
output_size = 1

In [48]:
# Inisialisasi model, loss function, dan optimizer
model = LSTMModel(input_size, hidden_size, num_layers, output_size).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [49]:
# Pelatihan model
num_epochs = 50
train_losses = []
val_losses = []

In [50]:
start_time = time.time()

for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        outputs = model(X_batch.unsqueeze(2))
        loss = criterion(outputs, y_batch.unsqueeze(1))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    train_loss /= len(train_loader)
    train_losses.append(train_loss)

    model.eval()
    val_loss = 0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = model(X_batch.unsqueeze(2))
            loss = criterion(outputs, y_batch.unsqueeze(1))
            val_loss += loss.item()
    val_loss /= len(test_loader)
    val_losses.append(val_loss)

    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

end_time = time.time()
execution_time = end_time - start_time

print(f"Waktu kompilasi: {execution_time:.2f} Detik")

Epoch [1/50], Train Loss: 0.0478, Val Loss: 0.0018
Epoch [2/50], Train Loss: 0.0006, Val Loss: 0.0006
Epoch [3/50], Train Loss: 0.0004, Val Loss: 0.0008
Epoch [4/50], Train Loss: 0.0004, Val Loss: 0.0010
Epoch [5/50], Train Loss: 0.0004, Val Loss: 0.0011
Epoch [6/50], Train Loss: 0.0004, Val Loss: 0.0007
Epoch [7/50], Train Loss: 0.0004, Val Loss: 0.0006
Epoch [8/50], Train Loss: 0.0004, Val Loss: 0.0007
Epoch [9/50], Train Loss: 0.0004, Val Loss: 0.0006
Epoch [10/50], Train Loss: 0.0004, Val Loss: 0.0008
Epoch [11/50], Train Loss: 0.0004, Val Loss: 0.0006
Epoch [12/50], Train Loss: 0.0004, Val Loss: 0.0006
Epoch [13/50], Train Loss: 0.0004, Val Loss: 0.0007
Epoch [14/50], Train Loss: 0.0004, Val Loss: 0.0006
Epoch [15/50], Train Loss: 0.0004, Val Loss: 0.0005
Epoch [16/50], Train Loss: 0.0004, Val Loss: 0.0007
Epoch [17/50], Train Loss: 0.0004, Val Loss: 0.0006
Epoch [18/50], Train Loss: 0.0004, Val Loss: 0.0006
Epoch [19/50], Train Loss: 0.0004, Val Loss: 0.0005
Epoch [20/50], Train 

In [51]:
# Menghitung metrik evaluasi pada data latih
train_predictions = []
with torch.no_grad():
    for X_batch, _ in train_loader:
        X_batch = X_batch.to(device)
        outputs = model(X_batch.unsqueeze(2))
        train_predictions.extend(outputs.cpu().numpy())

train_predictions = np.array(train_predictions)

In [52]:
# Inverse scaling untuk prediksi data latih
dummies = np.zeros((len(train_predictions), lookback + 1))
dummies[:, 0] = train_predictions.flatten()
dummies = scaler.inverse_transform(dummies)
train_predictions = dummies[:, 0]

In [53]:
# Inverse scaling untuk nilai aktual data latih
dummies = np.zeros((len(train_predictions), lookback + 1))
dummies[:, 0] = train[:, 0]
dummies = scaler.inverse_transform(dummies)
train_actuals = dummies[:, 0]

In [54]:
# Prediksi pada data uji
model.eval()
test_predictions = []
with torch.no_grad():
    for X_batch, _ in test_loader:
        X_batch = X_batch.to(device)
        outputs = model(X_batch.unsqueeze(2))
        test_predictions.extend(outputs.cpu().numpy())

test_predictions = np.array(test_predictions)

In [55]:
# Inverse scaling
dummies = np.zeros((len(test_predictions), lookback + 1))
dummies[:, 0] = test_predictions.flatten()
dummies = scaler.inverse_transform(dummies)
test_predictions = dummies[:, 0]

In [56]:
# Inverse scaling untuk nilai aktual
dummies = np.zeros((len(test_predictions), lookback + 1))
dummies[:, 0] = test[:, 0]
dummies = scaler.inverse_transform(dummies)
new_y_test = dummies[:, 0]

In [57]:
# Menghitung metrik evaluasi pada data latih
train_mse = mean_squared_error(train_actuals, train_predictions)
train_rmse = np.sqrt(train_mse)
train_mae = mean_absolute_error(train_actuals, train_predictions)
train_mape = mean_absolute_percentage_error(train_actuals, train_predictions)
train_r2 = r2_score(train_actuals, train_predictions)

print("Metrik Evaluasi pada Data Latih:")
print(f"MSE: {train_mse:.4f}")
print(f"RMSE: {train_rmse:.4f}")
print(f"MAE: {train_mae:.4f}")
print(f"MAPE: {train_mape:.4f}")
print(f"R2 Score: {train_r2:.4f}")

# Menghitung metrik evaluasi pada data uji
test_mse = mean_squared_error(new_y_test, test_predictions)
test_rmse = np.sqrt(test_mse)
test_mae = mean_absolute_error(new_y_test, test_predictions)
test_mape = mean_absolute_percentage_error(new_y_test, test_predictions)
test_r2 = r2_score(new_y_test, test_predictions)

print("Metrik Evaluasi pada Data Uji:")
print(f"MSE: {test_mse:.4f}")
print(f"RMSE: {test_rmse:.4f}")
print(f"MAE: {test_mae:.4f}")
print(f"MAPE: {test_mape:.4f}")
print(f"R2 Score: {test_r2:.4f}")

Metrik Evaluasi pada Data Latih:
MSE: 393423.9034
RMSE: 627.2351
MAE: 507.0270
MAPE: 1.8587
R2 Score: -1.0269
Metrik Evaluasi pada Data Uji:
MSE: 2150.0656
RMSE: 46.3688
MAE: 35.7648
MAPE: 0.0337
R2 Score: 0.9853


In [58]:
# 2. Visualisasi Hasil Prediksi vs Nilai Aktual
fig = go.Figure()
fig.add_trace(go.Scatter( y=new_y_test, mode='lines', name='Actual Price'))
fig.add_trace(go.Scatter( y=test_predictions, mode='lines', name='Predicted Price'))
fig.update_layout(
    title='Actual vs Predicted Stock Prices', 
    xaxis_title='Day', 
    yaxis_title='Price',
    xaxis_range=[0, 950],
)
fig.show()

In [59]:
fig = go.Figure()
fig.add_trace(go.Scatter( y=new_y_test, mode='lines', name='Actual Price'))
fig.add_trace(go.Scatter( y=test_predictions, mode='lines', name='Predicted Price'))
fig.update_layout(
    title='Actual vs Predicted Stock Prices', 
    xaxis_title='Day', 
    yaxis_title='Price', 
    xaxis_range=[800, 950],
    yaxis_range=[1000, 1900]
)
fig.show()

In [64]:
# Melakukan prediksi harga selama 14 hari ke depan
future_predictions_14 = []
last_data_14 = X_batch[-1].cpu().numpy()
for _ in range(14):
    input_data_14 = last_data_14.reshape(1, lookback, 1)
    input_data_14 = torch.tensor(input_data_14).float().to(device)
    
    with torch.no_grad():
        predicted_price_14 = model(input_data_14).cpu().numpy()[0][0]
    
    last_data_14 = np.append(last_data_14[1:], predicted_price_14)
    future_predictions_14.append(predicted_price_14)

dummies_14 = np.zeros((len(future_predictions_14), lookback+1))
dummies_14[:, 0] = future_predictions_14
dummies_14 = scaler.inverse_transform(dummies_14)
future_predictions_14 = dummies_14[:, 0]

# Memvisualisasikan hasil prediksi harga selama 14 hari ke depan
fig_14 = go.Figure(data=[
    go.Scatter(y=future_predictions_14, name='Predicted Close Price (14 days)')
])
fig_14.update_layout(
    title='Prediksi Harga Selama 14 Hari ke Depan',
    xaxis_title='Day',
    yaxis_title='Close Price'
)
fig_14.show()



In [65]:
# Melakukan prediksi harga selama 30 hari ke depan
future_predictions_30 = []
last_data_30 = X_batch[-1].cpu().numpy()
for _ in range(30):
    input_data_30 = last_data_30.reshape(1, lookback, 1)
    input_data_30 = torch.tensor(input_data_30).float().to(device)
    
    with torch.no_grad():
        predicted_price_30 = model(input_data_30).cpu().numpy()[0][0]
    
    last_data_30 = np.append(last_data_30[1:], predicted_price_30)
    future_predictions_30.append(predicted_price_30)

dummies_30 = np.zeros((len(future_predictions_30), lookback+1))
dummies_30[:, 0] = future_predictions_30
dummies_30 = scaler.inverse_transform(dummies_30)
future_predictions_30 = dummies_30[:, 0]



# Memvisualisasikan hasil prediksi harga selama 30 hari ke depan
fig_30 = go.Figure(data=[
    go.Scatter(y=future_predictions_30, name='Predicted Close Price (30 days)')
])
fig_30.update_layout(
    title='Prediksi Harga Selama 30 Hari ke Depan',
    xaxis_title='Day',
    yaxis_title='Close Price'
)
fig_30.show()

In [66]:
# Mengambil tanggal terakhir dari data
last_date = data['Date'].iloc[-1]

# Membuat rentang tanggal untuk prediksi 30 hari ke depan
future_dates = pd.date_range(start=last_date, periods=31, freq='D')[1:]

# Memvisualisasikan hasil prediksi harga 30 hari ke depan pada grafik yang diberikan
fig = go.Figure(data=[
    go.Scatter(x=data['Date'], y=data['Close'], name='Actual Close Price'),
    go.Scatter(x=future_dates, y=future_predictions_30, name='Predicted Close Price (30 days)')
])

fig.update_layout(
    height = 500, 
    width = 1200,
    title='Predicted Close Price (30 days)',
    xaxis_title='Date',
    yaxis_title='Close Price',
    xaxis_range=['2024', '2025'],
    yaxis_range=[0, 2000]
)

fig.show()