In [1]:
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.metrics import mean_squared_error
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows

In [None]:
#### CALL OPTIONS ####

In [2]:
# Load the training and testing data
train_data = pd.read_excel("/Users/shannenwibisono/Desktop/-SKRIPSI-/Bitcoin/Bitcoin Training Testing Call.xlsx", sheet_name="Training Data Call")
test_data = pd.read_excel("/Users/shannenwibisono/Desktop/-SKRIPSI-/Bitcoin/Bitcoin Training Testing Call.xlsx", sheet_name="Testing Data Call")

In [3]:
# Define the LightGBM parameters
params = {
    'boost_from_average': True,
    'colsample_bytree': 0.5,
    'fair_c': 1,
    'huber_delta': 1,
    'learning_rate': 0.05,
    'max_bin': 255,
    'max_delta_step': 0.7,
    'max_depth': -1,  # None is represented by -1 in LightGBM
    'min_child_samples': 10,
    'min_child_weight': 5,
    'min_split_gain': 0,
    'n_estimators': 2500,
    'num_leaves': 16,
    'objective': 'gamma',
    'reg_alpha': 0,
    'reg_lambda': 0,
    'subsample': 1,
    'subsample_for_bin': 50000,
    'subsample_freq': 1,
    'tweedie_variance_power': 1.5,
    'early_stopping_round': 200  # Set early stopping in parameters
}


# Define lag configurations (individual testing)
lags = {
    'RVlag1': ['RVlag1'],
    'RVlag7': ['RVlag7'],
    'RVlag15': ['RVlag15'],
    'RVlag30': ['RVlag30'],
    'BVlag1': ['BVlag1'],
    'BVlag7': ['BVlag7'],
    'BVlag15': ['BVlag15'],
    'BVlag30': ['BVlag30'],
    'SJVlag7': ['SJVlag7'],
    'SJVlag15': ['SJVlag15'],
    'SJVlag30': ['SJVlag30']
}

In [4]:
# Output file path
output_file = "/Users/shannenwibisono/Desktop/-SKRIPSI-/Results LGBM/Results_LightGBM_BTCCall_All.xlsx"

# Create a workbook to save results
wb = Workbook()
ws_summary = wb.active
ws_summary.title = "Summary"
ws_summary.append(["Lag", "RMSE", "MAPE"])  # Add headers for summary

In [5]:
# Loop through each lag configuration
for lag_name, features in lags.items():
    print(f"\nTesting with lag: {lag_name}")
    
    # Select relevant features
    selected_features = ['Asset Price', 'Maturity', 'Strike'] + features
    
    # Prepare features and target
    x_train = train_data[selected_features].values
    y_train = train_data['Last'].values
    x_test = test_data[selected_features].values
    y_test = test_data['Last'].values
    
    # Create LightGBM datasets
    train_set = lgb.Dataset(x_train, label=y_train)
    test_set = lgb.Dataset(x_test, label=y_test, reference=train_set)
    
    # Train the model
    model = lgb.train(
        params, 
        train_set, 
        valid_sets=[train_set, test_set], 
        valid_names=["train", "test"], 
        num_boost_round=2500
    )
    
    # Make predictions
    y_pred = model.predict(x_test, num_iteration=model.best_iteration)
    
    # Calculate RMSE and MAPE
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
    print(f"Lag: {lag_name} | RMSE: {rmse:.4f} | MAPE: {mape:.2f}%")
    
    # Save results in a new sheet
    results = pd.DataFrame({
        'Date': test_data['Time'],  # Adjust 'Time' to your date column name
        'Actual': y_test,
        'Predicted': y_pred,
        'RMSE': [rmse] * len(y_test),  # Add RMSE for consistency
        'MAPE': [mape] * len(y_test)  # Add MAPE for consistency
    })
    
    # Add summary results
    ws_summary.append([lag_name, rmse, mape])
    
    # Add sheet for this lag
    ws = wb.create_sheet(title=lag_name)
    for r in dataframe_to_rows(results, index=False, header=True):
        ws.append(r)

# Save the workbook
wb.save(output_file)
print(f"All results saved to Excel file: {output_file}")


Testing with lag: RVlag1
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000310 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 830
[LightGBM] [Info] Number of data points in the train set: 8228, number of used features: 4
[LightGBM] [Info] Start training from score 9.399805
Training until validation scores don't improve for 200 rounds




Did not meet early stopping. Best iteration is:
[2500]	train's gamma: 9.77743	test's gamma: 9.79769
Lag: RVlag1 | RMSE: 343.2638 | MAPE: 2.40%

Testing with lag: RVlag7
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000104 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 829
[LightGBM] [Info] Number of data points in the train set: 8228, number of used features: 4
[LightGBM] [Info] Start training from score 9.399805
Training until validation scores don't improve for 200 rounds




Did not meet early stopping. Best iteration is:
[2500]	train's gamma: 9.77746	test's gamma: 9.79788
Lag: RVlag7 | RMSE: 348.3840 | MAPE: 2.54%

Testing with lag: RVlag15
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000167 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 830
[LightGBM] [Info] Number of data points in the train set: 8228, number of used features: 4
[LightGBM] [Info] Start training from score 9.399805
Training until validation scores don't improve for 200 rounds




Did not meet early stopping. Best iteration is:
[2499]	train's gamma: 9.77745	test's gamma: 9.79763
Lag: RVlag15 | RMSE: 351.4909 | MAPE: 2.34%

Testing with lag: RVlag30
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000112 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 829
[LightGBM] [Info] Number of data points in the train set: 8228, number of used features: 4
[LightGBM] [Info] Start training from score 9.399805
Training until validation scores don't improve for 200 rounds




Did not meet early stopping. Best iteration is:
[2497]	train's gamma: 9.77743	test's gamma: 9.79769
Lag: RVlag30 | RMSE: 344.4443 | MAPE: 2.34%

Testing with lag: BVlag1




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000194 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 830
[LightGBM] [Info] Number of data points in the train set: 8228, number of used features: 4
[LightGBM] [Info] Start training from score 9.399805
Training until validation scores don't improve for 200 rounds
Did not meet early stopping. Best iteration is:
[2500]	train's gamma: 9.77745	test's gamma: 9.79769
Lag: BVlag1 | RMSE: 366.2200 | MAPE: 2.34%

Testing with lag: BVlag7
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000095 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 830
[LightGBM] [Info] Number of data points in the train set: 8228, number of used features: 4
[LightGBM] [Info] Start training from score 9.399805
Training until validation scores don't improve for 200 rounds




Did not meet early stopping. Best iteration is:
[2499]	train's gamma: 9.77747	test's gamma: 9.79773
Lag: BVlag7 | RMSE: 367.3920 | MAPE: 2.45%

Testing with lag: BVlag15
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000125 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 830
[LightGBM] [Info] Number of data points in the train set: 8228, number of used features: 4
[LightGBM] [Info] Start training from score 9.399805
Training until validation scores don't improve for 200 rounds




Did not meet early stopping. Best iteration is:
[2500]	train's gamma: 9.77747	test's gamma: 9.79772
Lag: BVlag15 | RMSE: 345.6674 | MAPE: 2.43%

Testing with lag: BVlag30
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000040 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 830
[LightGBM] [Info] Number of data points in the train set: 8228, number of used features: 4
[LightGBM] [Info] Start training from score 9.399805
Training until validation scores don't improve for 200 rounds




Did not meet early stopping. Best iteration is:
[2499]	train's gamma: 9.77744	test's gamma: 9.79772
Lag: BVlag30 | RMSE: 352.0816 | MAPE: 2.36%

Testing with lag: SJVlag7
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000040 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 830
[LightGBM] [Info] Number of data points in the train set: 8228, number of used features: 4
[LightGBM] [Info] Start training from score 9.399805
Training until validation scores don't improve for 200 rounds




Did not meet early stopping. Best iteration is:
[2500]	train's gamma: 9.77743	test's gamma: 9.79767
Lag: SJVlag7 | RMSE: 365.1132 | MAPE: 2.34%

Testing with lag: SJVlag15
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000040 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 829
[LightGBM] [Info] Number of data points in the train set: 8228, number of used features: 4
[LightGBM] [Info] Start training from score 9.399805
Training until validation scores don't improve for 200 rounds




Did not meet early stopping. Best iteration is:
[2499]	train's gamma: 9.77748	test's gamma: 9.79783
Lag: SJVlag15 | RMSE: 381.2782 | MAPE: 2.47%

Testing with lag: SJVlag30
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000076 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 827
[LightGBM] [Info] Number of data points in the train set: 8228, number of used features: 4
[LightGBM] [Info] Start training from score 9.399805
Training until validation scores don't improve for 200 rounds




Did not meet early stopping. Best iteration is:
[2499]	train's gamma: 9.77741	test's gamma: 9.79775
Lag: SJVlag30 | RMSE: 320.9626 | MAPE: 2.25%
All results saved to Excel file: /Users/shannenwibisono/Desktop/-SKRIPSI-/Results LGBM/Results_LightGBM_BTCCall_All.xlsx


In [None]:
#### PUT BERDASARKAN LAG ####

In [6]:
# Read training and testing data for Put options
train_data_put = pd.read_excel("/Users/shannenwibisono/Desktop/-SKRIPSI-/Bitcoin/Bitcoin Training Testing Put.xlsx", sheet_name="Training Data Put")
test_data_put = pd.read_excel("/Users/shannenwibisono/Desktop/-SKRIPSI-/Bitcoin/Bitcoin Training Testing Put.xlsx", sheet_name="Testing Data Put")

In [7]:
# Define the LightGBM parameters
params = {
    'boost_from_average': True,
    'colsample_bytree': 0.5,
    'fair_c': 1,
    'huber_delta': 1,
    'learning_rate': 0.05,
    'max_bin': 255,
    'max_delta_step': 0.7,
    'max_depth': -1,  # None is represented by -1 in LightGBM
    'min_child_samples': 10,
    'min_child_weight': 5,
    'min_split_gain': 0,
    'n_estimators': 2500,
    'num_leaves': 16,
    'objective': 'gamma',
    'reg_alpha': 0,
    'reg_lambda': 0,
    'subsample': 1,
    'subsample_for_bin': 50000,
    'subsample_freq': 1,
    'tweedie_variance_power': 1.5,
    'early_stopping_round': 200  # Set early stopping in parameters
}

# Define lag configurations (individual testing)
lags = {
    'RVlag1': ['RVlag1'],
    'RVlag7': ['RVlag7'],
    'RVlag15': ['RVlag15'],
    'RVlag30': ['RVlag30'],
    'BVlag1': ['BVlag1'],
    'BVlag7': ['BVlag7'],
    'BVlag15': ['BVlag15'],
    'BVlag30': ['BVlag30'],
    'SJVlag7': ['SJVlag7'],
    'SJVlag15': ['SJVlag15'],
    'SJVlag30': ['SJVlag30']
}

In [8]:
# Output file path
output_file = "/Users/shannenwibisono/Desktop/-SKRIPSI-/Results LGBM/Results_LightGBM_BTCPut_All.xlsx"

# Create a workbook to save results
wb = Workbook()
ws_summary = wb.active
ws_summary.title = "Summary"
ws_summary.append(["Lag", "RMSE", "MAPE"])  # Add headers for summary

In [9]:
# Loop through each lag configuration
for lag_name, features in lags.items():
    print(f"\nTesting with lag: {lag_name}")
    
    # Select relevant features
    selected_features = ['Asset Price', 'Maturity', 'Strike'] + features
    
    # Prepare features and target
    x_train = train_data_put[selected_features].values
    y_train = train_data_put['Last'].values
    x_test = test_data_put[selected_features].values
    y_test = test_data_put['Last'].values
    
    # Create LightGBM datasets
    train_set = lgb.Dataset(x_train, label=y_train)
    test_set = lgb.Dataset(x_test, label=y_test, reference=train_set)
    
    # Train the model
    model = lgb.train(
        params, 
        train_set, 
        valid_sets=[train_set, test_set], 
        valid_names=["train", "test"], 
        num_boost_round=2500
    )
    
    # Make predictions
    y_pred = model.predict(x_test, num_iteration=model.best_iteration)
    
    # Calculate RMSE and MAPE
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
    print(f"Lag: {lag_name} | RMSE: {rmse:.4f} | MAPE: {mape:.2f}%")
    
    # Save results in a new sheet
    results = pd.DataFrame({
        'Date': test_data_put['Time'],  # Adjust 'Time' to your date column name
        'Actual': y_test,
        'Predicted': y_pred,
        'RMSE': [rmse] * len(y_test),  # Add RMSE for consistency
        'MAPE': [mape] * len(y_test)  # Add MAPE for consistency
    })
    
    # Add summary results
    ws_summary.append([lag_name, rmse, mape])
    
    # Add sheet for this lag
    ws = wb.create_sheet(title=lag_name)
    for r in dataframe_to_rows(results, index=False, header=True):
        ws.append(r)

# Save the workbook
wb.save(output_file)
print(f"All results saved to Excel file: {output_file}")


Testing with lag: RVlag1
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000386 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 832
[LightGBM] [Info] Number of data points in the train set: 8446, number of used features: 4
[LightGBM] [Info] Start training from score 9.242772
Training until validation scores don't improve for 200 rounds




Did not meet early stopping. Best iteration is:
[2500]	train's gamma: 9.08769	test's gamma: 8.97453
Lag: RVlag1 | RMSE: 473.1196 | MAPE: 4.59%

Testing with lag: RVlag7
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000240 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 832
[LightGBM] [Info] Number of data points in the train set: 8446, number of used features: 4
[LightGBM] [Info] Start training from score 9.242772
Training until validation scores don't improve for 200 rounds




Did not meet early stopping. Best iteration is:
[2499]	train's gamma: 9.08769	test's gamma: 8.97479
Lag: RVlag7 | RMSE: 754.1602 | MAPE: 4.55%

Testing with lag: RVlag15
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000246 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 832
[LightGBM] [Info] Number of data points in the train set: 8446, number of used features: 4
[LightGBM] [Info] Start training from score 9.242772
Training until validation scores don't improve for 200 rounds




Did not meet early stopping. Best iteration is:
[2499]	train's gamma: 9.08771	test's gamma: 8.97509
Lag: RVlag15 | RMSE: 585.5914 | MAPE: 4.53%

Testing with lag: RVlag30
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000250 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 831
[LightGBM] [Info] Number of data points in the train set: 8446, number of used features: 4
[LightGBM] [Info] Start training from score 9.242772
Training until validation scores don't improve for 200 rounds




Did not meet early stopping. Best iteration is:
[2499]	train's gamma: 9.08765	test's gamma: 8.97457
Lag: RVlag30 | RMSE: 854.1427 | MAPE: 4.42%

Testing with lag: BVlag1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000075 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 832
[LightGBM] [Info] Number of data points in the train set: 8446, number of used features: 4
[LightGBM] [Info] Start training from score 9.242772
Training until validation scores don't improve for 200 rounds




Did not meet early stopping. Best iteration is:
[2500]	train's gamma: 9.0877	test's gamma: 8.97455
Lag: BVlag1 | RMSE: 655.5402 | MAPE: 4.62%

Testing with lag: BVlag7
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000035 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 832
[LightGBM] [Info] Number of data points in the train set: 8446, number of used features: 4
[LightGBM] [Info] Start training from score 9.242772
Training until validation scores don't improve for 200 rounds




Did not meet early stopping. Best iteration is:
[2500]	train's gamma: 9.0877	test's gamma: 8.97444
Lag: BVlag7 | RMSE: 751.1933 | MAPE: 4.49%

Testing with lag: BVlag15




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000199 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 832
[LightGBM] [Info] Number of data points in the train set: 8446, number of used features: 4
[LightGBM] [Info] Start training from score 9.242772
Training until validation scores don't improve for 200 rounds
Did not meet early stopping. Best iteration is:
[2499]	train's gamma: 9.08767	test's gamma: 8.97471
Lag: BVlag15 | RMSE: 604.3342 | MAPE: 4.61%

Testing with lag: BVlag30
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000178 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 832
[LightGBM] [Info] Number of data points in the train set: 8446, number of used features: 4
[LightGBM] [Info] Start training from score 9.242772
Training until validation scores don't improve for 200 rounds




Did not meet early stopping. Best iteration is:
[2500]	train's gamma: 9.08755	test's gamma: 8.97422
Lag: BVlag30 | RMSE: 413.8038 | MAPE: 4.13%

Testing with lag: SJVlag7
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000115 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 832
[LightGBM] [Info] Number of data points in the train set: 8446, number of used features: 4
[LightGBM] [Info] Start training from score 9.242772
Training until validation scores don't improve for 200 rounds




Did not meet early stopping. Best iteration is:
[2499]	train's gamma: 9.08759	test's gamma: 8.97428
Lag: SJVlag7 | RMSE: 466.5006 | MAPE: 4.21%

Testing with lag: SJVlag15
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000129 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 831
[LightGBM] [Info] Number of data points in the train set: 8446, number of used features: 4
[LightGBM] [Info] Start training from score 9.242772
Training until validation scores don't improve for 200 rounds




Did not meet early stopping. Best iteration is:
[2500]	train's gamma: 9.08772	test's gamma: 8.97501
Lag: SJVlag15 | RMSE: 803.4681 | MAPE: 4.48%

Testing with lag: SJVlag30
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000108 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 828
[LightGBM] [Info] Number of data points in the train set: 8446, number of used features: 4
[LightGBM] [Info] Start training from score 9.242772
Training until validation scores don't improve for 200 rounds




Did not meet early stopping. Best iteration is:
[2500]	train's gamma: 9.0876	test's gamma: 8.97449
Lag: SJVlag30 | RMSE: 580.9833 | MAPE: 4.52%
All results saved to Excel file: /Users/shannenwibisono/Desktop/-SKRIPSI-/Results LGBM/Results_LightGBM_BTCPut_All.xlsx
