**IMPORT LIBRARY & DATASET**

In [None]:
import math
from math import sqrt
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import seaborn as sns
import tensorflow as tf
from sklearn.metrics import mean_squared_error
from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
df = pd.read_csv('XSSI.JK.csv')
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2022-03-01,79.0,99.0,79.0,99.0,99.0,1310600.0
1,2022-03-02,113.0,133.0,101.0,133.0,133.0,573100.0
2,2022-03-03,113.0,133.0,101.0,133.0,133.0,573100.0
3,2022-03-04,130.0,179.0,130.0,179.0,179.0,337100.0
4,2022-03-05,130.0,179.0,130.0,179.0,179.0,337100.0
...,...,...,...,...,...,...,...
727,2024-02-26,75.0,75.0,70.0,75.0,75.0,8600.0
728,2024-02-27,75.0,75.0,74.0,74.0,74.0,4000.0
729,2024-02-28,74.0,74.0,74.0,74.0,74.0,1000.0
730,2024-02-29,74.0,75.0,74.0,75.0,75.0,1500.0


**PREPROCESSING DATA**

In [None]:
# Set Date as index
df['Date'] = pd.to_datetime(df['Date'])
df = df.set_index('Date')

# Menghapus kolom 'Adj Close' dan 'Volume' dari DataFrame
df = df.drop(columns=['Adj Close', 'Volume'])

In [None]:
# Check dataframe info
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 732 entries, 2022-03-01 to 2024-03-01
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Open    732 non-null    float64
 1   High    732 non-null    float64
 2   Low     732 non-null    float64
 3   Close   732 non-null    float64
dtypes: float64(4)
memory usage: 28.6 KB


In [None]:
# Check missing values
df.isnull().sum()

Open     0
High     0
Low      0
Close    0
dtype: int64

In [None]:
# Membuat plot dengan plotly
fig = go.Figure()

fig.add_trace(go.Scatter(x=df.index, y=df['Open'], mode='lines', name='Open'))
fig.add_trace(go.Scatter(x=df.index, y=df['High'], mode='lines', name='High'))
fig.add_trace(go.Scatter(x=df.index, y=df['Low'], mode='lines', name='Low'))
fig.add_trace(go.Scatter(x=df.index, y=df['Close'], mode='lines', name='Close'))

fig.update_layout(
    title="Stock Price\nReksadana",
    xaxis_title='Date',
    yaxis_title='Price'
)

fig.show()

In [None]:
# Ekstraksi Fitur
Average = (df['Open'] + df['High'] + df['Low'] + df['Close']) / 4

# Mencari nilai RSI
dataclose = df['Close'].diff(1).dropna()
up = dataclose.copy()
down = dataclose.copy()
up[up < 0] = 0
down[down > 0] = 0
time_period = 14
AVG_Gain = up.rolling(window=time_period).mean()
AVG_Loss = abs(down.rolling(window=time_period).mean())
RS = AVG_Gain / AVG_Loss
RSI = 100 - (100 / (1 + RS))

# Menambahkan fitur ke dataframe
dataset = df.assign(Average=Average).assign(RSI=RSI).fillna(0)
dataset

Unnamed: 0_level_0,Open,High,Low,Close,Average,RSI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-03-01,79.0,99.0,79.0,99.0,89.00,0.000000
2022-03-02,113.0,133.0,101.0,133.0,120.00,0.000000
2022-03-03,113.0,133.0,101.0,133.0,120.00,0.000000
2022-03-04,130.0,179.0,130.0,179.0,154.50,0.000000
2022-03-05,130.0,179.0,130.0,179.0,154.50,0.000000
...,...,...,...,...,...,...
2024-02-26,75.0,75.0,70.0,75.0,73.75,50.000000
2024-02-27,75.0,75.0,74.0,74.0,74.50,42.857143
2024-02-28,74.0,74.0,74.0,74.0,74.00,42.857143
2024-02-29,74.0,75.0,74.0,75.0,74.50,50.000000


In [None]:
# Mengecek deskriptif dataset
deskriptif = dataset.describe()
deskriptif

Unnamed: 0,Open,High,Low,Close,Average,RSI
count,732.0,732.0,732.0,732.0,732.0,732.0
mean,79.484973,81.70765,77.364754,79.308743,79.46653,47.262398
std,10.150228,12.978855,8.982854,10.485494,10.22093,16.645525
min,55.0,72.0,55.0,56.0,64.5,0.0
25%,76.0,77.0,74.0,76.0,76.0,40.0
50%,79.0,80.0,77.0,78.0,78.25,50.0
75%,81.0,82.0,79.0,80.0,80.5,54.545455
max,238.0,241.0,167.0,179.0,203.25,100.0


In [None]:
# Ascending (mengurutkan data)
dataset2 = dataset.sort_values(by=['Date']).copy()
fitur = ['High', 'Open', 'Low', 'Close', 'Average', 'RSI']
print('MACAM-MACAM FITUR')
print([f for f in fitur])

# Membuat dataset sesuai dengan list pada fitur
data2 = pd.DataFrame(dataset2)
dataset3 = data2[fitur]

MACAM-MACAM FITUR
['High', 'Open', 'Low', 'Close', 'Average', 'RSI']


In [None]:
# Banyaknya baris
n_baris = dataset3.shape[0]

# Convert data dalam bentuk array
np_data_unscaled = np.array(dataset3)
np_data = np.reshape(np_data_unscaled, (n_baris, -1))
np_data.shape

(732, 6)

In [None]:
# Proses segmentasi
panjang_urutan = 30

def partition_dataset(panjang_urutan, data):
    x, y = [], []
    data_len = data.shape[0]
    for i in range(panjang_urutan, data_len):
        x.append(data[i-panjang_urutan:i, :])
        y.append(data[i, 0])
    x = np.array(x)
    y = np.array(y)
    return x, y

**NORMALIZED DATA**

In [None]:
# Proses Normalisasi
scaler = MinMaxScaler()
np_data_scaled = scaler.fit_transform(np_data_unscaled)

# Melihat data yang telah dinormalisasi
df_normalized = pd.DataFrame(np_data_scaled, columns=dataset3.columns)
df_normalized

Unnamed: 0,High,Open,Low,Close,Average,RSI
0,0.159763,0.131148,0.214286,0.349593,0.176577,0.000000
1,0.360947,0.316940,0.410714,0.626016,0.400000,0.000000
2,0.360947,0.316940,0.410714,0.626016,0.400000,0.000000
3,0.633136,0.409836,0.669643,1.000000,0.648649,0.000000
4,0.633136,0.409836,0.669643,1.000000,0.648649,0.000000
...,...,...,...,...,...,...
727,0.017751,0.109290,0.133929,0.154472,0.066667,0.500000
728,0.017751,0.109290,0.169643,0.146341,0.072072,0.428571
729,0.011834,0.103825,0.169643,0.146341,0.068468,0.428571
730,0.017751,0.103825,0.169643,0.154472,0.072072,0.500000


**SPLITTING DATA INTO 80:20**

In [None]:
# Split data
split_ratio = 0.8
n_datalatih = int(n_baris * split_ratio)
train_data = np_data_scaled[:n_datalatih, :]
test_data = np_data_scaled[n_datalatih - 30:, :]

# Print the number of rows in training and testing data
print(f'Total rows in training data: {train_data.shape[0]}')
print(f'Total rows in testing data: {test_data.shape[0]}')

# Segmentasi data pelatihan dan pengujian
x_train, y_train = partition_dataset(panjang_urutan, train_data)
x_test, y_test = partition_dataset(panjang_urutan, test_data)

Total rows in training data: 585
Total rows in testing data: 177


In [None]:
# Split data for visualization
train = dataset3['Close'][:n_datalatih + 1]
test = dataset3['Close'][n_datalatih:]

# Plot training and testing data
fig = go.Figure()
fig.add_trace(go.Scatter(x=train.index, y=train, mode='lines', name='Training', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=test.index, y=test, mode='lines', name='Testing', line=dict(color='orange')))
fig.update_layout(title='Stock Price - Reksadana\n80:20 Training & Testing',
                  xaxis_title='Date', yaxis_title='Price', xaxis=dict(tickformat='%Y-%m-%d',
                                                                          tickmode='auto', nticks=20))
fig.show()

**BUILD LSTM MODEL**

In [None]:
def build_model(optimizer):
    model = Sequential()
    model.add(LSTM(50, activation='tanh', recurrent_activation='sigmoid', return_sequences=True, input_shape=(x_train.shape[1], x_train.shape[2])))
    model.add(LSTM(50, activation='tanh', recurrent_activation='sigmoid'))
    model.add(Dropout(0.2))
    model.add(Dense(1))
    model.compile(loss='mse', optimizer=optimizer, metrics=['mae'])
    return model
model = build_model('adam')
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 30, 50)            11400     
                                                                 
 lstm_1 (LSTM)               (None, 50)                20200     
                                                                 
 dropout (Dropout)           (None, 50)                0         
                                                                 
 dense (Dense)               (None, 1)                 51        
                                                                 
Total params: 31651 (123.64 KB)
Trainable params: 31651 (123.64 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


**TRAINING MODEL**

In [None]:
batch_sizes = [4, 16, 64, 128]
epochs = [50, 100, 150, 200]

results = {}

for batch_size in batch_sizes:
    for epoch in epochs:
        early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
        history = model.fit(x_train, y_train, epochs=epoch, batch_size=batch_size, validation_data=(x_test, y_test),
                            callbacks=[early_stopping], verbose=1)

        y_pred_scaled = model.predict(x_test)

        y_pred_scaled_copies_array = np.repeat(y_pred_scaled, 6, axis=-1)
        y_pred = scaler.inverse_transform(np.reshape(y_pred_scaled_copies_array, (len(y_pred_scaled), 6)))[:, 0]

        data_aktual_copies_array = np.repeat(y_test, 6, axis=-1)
        y_test_unscaled = scaler.inverse_transform(np.reshape(data_aktual_copies_array, (len(y_test), 6)))[:, 0]

        RMSE = sqrt(mean_squared_error(y_test_unscaled, y_pred))
        MAPE = np.mean((np.abs(np.subtract(y_test_unscaled, y_pred) / y_test_unscaled))) * 100

        results[(batch_size, epoch)] = {'RMSE': RMSE, 'MAPE': MAPE}
        print(f'Batch Size: {batch_size}, Epochs: {epoch} -> RMSE: {np.round(RMSE, 2)}, MAPE: {np.round(MAPE, 2)}%')

print("All results:", results)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Batch Size: 4, Epochs: 50 -> RMSE: 1.2, MAPE: 1.21%
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Batch Size: 4, Epochs: 100 -> RMSE: 1.06, MAPE: 1.08%
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Batch Size: 4, Epochs: 150 -> RMSE: 1.05, MAPE: 0.99%
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 

In [None]:
# Convert results to a DataFrame
results_df = pd.DataFrame.from_dict(results, orient='index')

# Display the detailed DataFrame
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.expand_frame_repr', False)
results_df

Unnamed: 0,Unnamed: 1,RMSE,MAPE
4,50,1.203263,1.212517
4,100,1.057916,1.083277
4,150,1.049173,0.991408
4,200,1.056954,1.005024
16,50,1.023384,0.97235
16,100,1.039827,1.031277
16,150,1.035858,0.996973
16,200,1.046402,0.96413
64,50,1.05853,1.032536
64,100,1.063033,1.009393


**SELECT BEST MODEL**

In [None]:
# Pilih model terbaik berdasarkan hasil evaluasi
best_params = min(results, key=lambda k: results[k]['RMSE'])
best_batch_size, best_epoch = best_params
best_RMSE = results[best_params]['RMSE']
best_MAPE = results[best_params]['MAPE']
print(f'Best params - Batch Size: {best_batch_size}, Epochs: {best_epoch}, RMSE: {best_RMSE}, MAPE: {best_MAPE}')

Best params - Batch Size: 16, Epochs: 50, RMSE: 1.023384213860634, MAPE: 0.9723504490411168


**TRAINING BEST MODEL**

In [None]:
# Melatih model terbaik dengan EarlyStopping
best_model = build_model('adam')
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history = best_model.fit(x_train, y_train,
                         epochs=best_epoch,
                         batch_size=best_batch_size,
                         validation_data=(x_test, y_test),
                         callbacks=[early_stopping],
                         verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50


**PREDICTION**

In [None]:
# Nilai prediksi
y_pred_scaled = best_model.predict(x_test)

# Scaling prediksi
y_pred_scaled_copies_array = np.repeat(y_pred_scaled, 6, axis=-1)
y_pred = scaler.inverse_transform(np.reshape(y_pred_scaled_copies_array, (len(y_pred_scaled), 6)))[:, 0]

# Scaling data aktual
data_aktual_copies_array = np.repeat(y_test, 6, axis=-1)
y_test_unscaled = scaler.inverse_transform(np.reshape(data_aktual_copies_array, (len(y_test), 6)))[:, 0]

# Root Mean Squared Error (RMSE)
RMSE = sqrt(mean_squared_error(y_test_unscaled, y_pred))
print(f'Root Mean Squared Error (RMSE): {np.round(RMSE, 2)}')

# Mean Absolute Percentage Error (MAPE)
MAPE = np.mean((np.abs(np.subtract(y_test_unscaled, y_pred) / y_test_unscaled))) * 100
print(f'Mean Absolute Percentage Error (MAPE): {np.round(MAPE, 2)} %')

Root Mean Squared Error (RMSE): 1.21
Mean Absolute Percentage Error (MAPE): 1.24 %


In [None]:
# Buat DataFrame untuk data aktual dan prediksi
df_results = pd.DataFrame({
    'Date': df.index[-len(y_test_unscaled):],
    'Actual_Close': y_test_unscaled,
    'Predicted_Close': y_pred
})

# Fungsi untuk membuat plot
def plot_feature(feature_name, color_train, color_test, color_predicted):
    fig = go.Figure()

    # Data latih
    fig.add_trace(go.Scatter(x=df.index[:n_datalatih], y=df[feature_name][:n_datalatih],
                             mode='lines', name=f'Train Data {feature_name}', line=dict(color=color_train)))

    # Data uji
    fig.add_trace(go.Scatter(x=df.index[n_datalatih:], y=df[feature_name][n_datalatih:],
                             mode='lines', name=f'Test Data {feature_name}', line=dict(color=color_test)))

    # Hasil prediksi
    fig.add_trace(go.Scatter(x=df_results['Date'], y=df_results[f'Predicted_{feature_name}'] if f'Predicted_{feature_name}' in df_results else df_results['Predicted_Close'],
                             mode='lines', name=f'Predicted Data {feature_name}', line=dict(color=color_predicted)))

    # Layout
    fig.update_layout(
        title={'text': f'Stock Price Prediction - {feature_name}', 'x': 0.5, 'xanchor': 'center'},
        xaxis_title='Date',
        yaxis_title=f'Stock Price {feature_name}',
        legend=dict(x=0.5, y=1.1, xanchor='center', orientation='h')
    )

    fig.show()

# Plot Close
plot_feature('Close', 'blue', 'orange', 'green')

# Plot Open
plot_feature('Open', 'purple', 'pink', 'red')

# Plot High
plot_feature('High', 'brown', 'yellow', 'black')

# Plot Low
plot_feature('Low', 'cyan', 'magenta', 'grey')


In [None]:
# Membuat DataFrame untuk menampilkan nilai aktual, nilai prediksi, Absolute Difference, dan Relative Difference (%)
dates = df.index[-len(y_test_unscaled):]  # Menentukan tanggal untuk data pengujian
results_df = pd.DataFrame({
    'Date': dates,
    'Actual': y_test_unscaled,
    'Predicted': y_pred,
    'Absolute Difference': np.abs(y_test_unscaled - y_pred),
    'Relative Difference (%)': (np.abs(y_test_unscaled - y_pred) / y_test_unscaled) * 100
})

# Set index to Date
results_df.set_index('Date', inplace=True)

# Tampilkan DataFrame
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.expand_frame_repr', False)
results_df

Unnamed: 0_level_0,Actual,Predicted,Absolute Difference,Relative Difference (%)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-10-07,80.0,78.95266,1.04734,1.309175
2023-10-08,80.0,78.97403,1.02597,1.282463
2023-10-09,79.0,78.965569,0.034431,0.043584
2023-10-10,80.0,78.94651,1.05349,1.316862
2023-10-11,80.0,79.012558,0.987442,1.234303
2023-10-12,79.0,79.117508,0.117508,0.148744
2023-10-13,80.0,79.150131,0.849869,1.062336
2023-10-14,80.0,79.135368,0.864632,1.08079
2023-10-15,80.0,79.101822,0.898178,1.122723
2023-10-16,80.0,79.065414,0.934586,1.168232


**FUTURE PREDICTION**

In [None]:
# Future prediction
future_days = 120
last_30_days = np_data_scaled[-30:]
future_predictions = []

for _ in range(future_days):
    x_future = last_30_days[-30:].reshape(1, 30, x_train.shape[2])
    pred = best_model.predict(x_future)
    future_predictions.append(pred[0])
    # Update last_30_days by appending pred, ensuring dimensions match
    pred_expanded = np.zeros((1, last_30_days.shape[1]))
    pred_expanded[0, 0] = pred  # only set the first value (Close price)
    last_30_days = np.vstack([last_30_days[1:], pred_expanded])

# Scaling predictions back to original scale
future_predictions_scaled_copies_array = np.repeat(np.array(future_predictions), 6, axis=-1)
future_predictions = scaler.inverse_transform(np.reshape(future_predictions_scaled_copies_array, (len(future_predictions), 6)))[:, 0]

# Future dates
last_date = df.index[-1]
future_dates = pd.date_range(last_date, periods=future_days + 1).tolist()[1:]

# Create DataFrame for future predictions
future_df = pd.DataFrame({'Date': future_dates, 'Predicted': future_predictions})
future_df.set_index('Date', inplace=True)

# Display future predictions
future_df




Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)





Unnamed: 0_level_0,Predicted
Date,Unnamed: 1_level_1
2024-03-02,75.411186
2024-03-03,74.92807
2024-03-04,74.168259
2024-03-05,73.351807
2024-03-06,72.589523
2024-03-07,71.928482
2024-03-08,71.382088
2024-03-09,70.944527
2024-03-10,70.596367
2024-03-11,70.321426


In [None]:
# Misalkan kita ingin melihat hasil prediksi pada tanggal tertentu
tanggal_spesifik = '2024-06-26'

# Melihat hasil prediksi pada tanggal tersebut
if tanggal_spesifik in future_df.index:
    prediksi_tanggal_spesifik = future_df.loc[tanggal_spesifik]
    print(f"Prediksi untuk tanggal {tanggal_spesifik}: {prediksi_tanggal_spesifik['Predicted']}")
else:
    print(f"Tanggal {tanggal_spesifik} tidak ada dalam rentang prediksi.")


Prediksi untuk tanggal 2024-06-26: 68.6348648071289


In [None]:
# Buat DataFrame untuk data aktual dan prediksi
df_results = pd.DataFrame({
    'Date': df.index[-len(y_test_unscaled):],
    'Actual': y_test_unscaled,
    'Predicted': y_pred
})

# Plot data latih, data uji, hasil prediksi, dan prediksi masa depan
fig = go.Figure()

# Data latih
fig.add_trace(go.Scatter(x=df.index[:n_datalatih], y=df['Close'][:n_datalatih],
                         mode='lines', name='Train Data', line=dict(color='blue')))

# Data uji
fig.add_trace(go.Scatter(x=df.index[n_datalatih:], y=df['Close'][n_datalatih:],
                         mode='lines', name='Test Data', line=dict(color='orange')))

# Hasil prediksi
fig.add_trace(go.Scatter(x=df_results['Date'], y=df_results['Predicted'],
                         mode='lines', name='Predicted Data', line=dict(color='green')))

# Add future predictions to the plot
fig.add_trace(go.Scatter(x=future_df.index, y=future_df['Predicted'],
                         mode='lines', name='Future Predictions', line=dict(color='red')))

# Layout
fig.update_layout(
    title={'text': 'Stock Price Prediction', 'x': 0.5, 'xanchor': 'center'},
    xaxis_title='Date',
    yaxis_title='Stock Price',
    legend=dict(x=0.5, y=1.1, xanchor='center', orientation='h')
)

fig.show()
