In [85]:
import xgboost as xgb
import onnxmltools
from skl2onnx.common.data_types import FloatTensorType
import onnxruntime as rt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

In [86]:

dtype_dict ={
    'volumefrom': 'float64', 
    'volumeto': 'float64',
    'open': 'float64', 
    'high': 'float64', 
    'low': 'float64', 
    'close': 'float64'}
df = pd.read_csv('btc_usd_hourly_data.csv',dtype=dtype_dict, index_col='time', parse_dates=True)

In [87]:
df = df.loc[:, (df != df.iloc[0]).any()]
df = df.dropna(axis=1, how='all')
df = df[(df.T != 0).any()]

In [88]:
print(df.head())

print(df.dtypes)
print(df.isnull().sum())

                         high       low      open  volumefrom      volumeto  \
time                                                                          
2024-03-17 04:00:00  66531.61  66249.84  66509.15      499.19  3.313809e+07   
2024-03-17 05:00:00  66501.87  65980.53  66329.12      603.61  3.996655e+07   
2024-03-17 06:00:00  66330.24  64931.39  66205.10     1665.73  1.093584e+08   
2024-03-17 07:00:00  65716.20  64529.30  64947.12     2896.62  1.887179e+08   
2024-03-17 08:00:00  66657.65  65254.03  65577.99     1561.00  1.029646e+08   

                        close  
time                           
2024-03-17 04:00:00  66329.12  
2024-03-17 05:00:00  66205.10  
2024-03-17 06:00:00  64947.12  
2024-03-17 07:00:00  65577.99  
2024-03-17 08:00:00  66456.91  
high          float64
low           float64
open          float64
volumefrom    float64
volumeto      float64
close         float64
dtype: object
high          0
low           0
open          0
volumefrom    0
volumeto    

In [89]:
df.fillna(method='ffill', inplace=True)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_features = scaler.fit_transform(df.drop('close', axis=1))

scaled_df = pd.DataFrame(scaled_features, index=df.index, columns=df.columns[:-1])
scaled_df['close'] = df['close']

In [90]:
scaled_df['ma_5'] = scaled_df['close'].rolling(window=5).mean()
scaled_df['ma_10'] = scaled_df['close'].rolling(window=10).mean()
scaled_df['volatility'] = scaled_df['close'].rolling(window=10).std()
scaled_df.dropna(inplace=True)

In [91]:
feature_names = [f'f{i}' for i in range(scaled_df.shape[1] -1)]
scaled_df.columns = feature_names + ['close']

In [92]:
from sklearn.model_selection import train_test_split
X = scaled_df.drop('close', axis=1)
y = scaled_df['close']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [93]:
import xgboost as xgb
xgb_reg = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, max_depth=3, learning_rate=0.1)
xgb_reg.fit(X_train, y_train)

In [94]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

y_pred = xgb_reg.predict(X_test)
print(f"MAE: {mean_absolute_error(y_test, y_pred)}")
print(f"MSE: {mean_squared_error(y_test, y_pred)}")
print(f"R2: {r2_score(y_test, y_pred)}")

MAE: 55.24256845012646
MSE: 41175.96052407855
R2: 0.6778526229401791


In [95]:
def convert_to_onnx(model, onnx_file_path, input_size):
    initial_type = [('float_input', FloatTensorType([None, input_size]))]
    onnx_model = onnxmltools.convert_xgboost(model, initial_types=initial_type)
    onnxmltools.utils.save_model(onnx_model, onnx_file_path)
    print(f"Model has been converted to ONNX and saved as {onnx_file_path}")

# Defining the ONNX file path and input size
onnx_file_path = "xgb_model.onnx"
input_size = X_train.shape[1]

# Converting the model
convert_to_onnx(xgb_reg, onnx_file_path, input_size)

Model has been converted to ONNX and saved as xgb_model.onnx
