In [28]:
# importing libararies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import warnings

warnings.filterwarnings('ignore')

In [29]:
# Load cleaned data
df = pd.read_csv('../data/cleaned_cryptocurrency_data.csv')

In [30]:
# Drop non-numeric or unneeded features
df = df.drop(['coin', 'symbol', 'date'], axis=1)

In [31]:
# Define features and target
X = df.drop('liquidity_ratio', axis=1)
y = df['liquidity_ratio']

In [32]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [33]:
# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [34]:
# XGBoost Model + Hyperparameter Tuning

In [40]:
X_train_scaled.shape,X_test_scaled.shape

((793, 6), (199, 6))

In [35]:
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV

xgb = XGBRegressor(objective='reg:squarederror', random_state=42)

param_grid = {
    'n_estimators': [300, 500],
    'max_depth': [4, 6, 8],
    'learning_rate': [0.05, 0.1],
    'subsample': [0.7, 0.9],
    'colsample_bytree': [0.7, 0.9]
}

grid_search = GridSearchCV(estimator=xgb, param_grid=param_grid, cv=5, scoring='r2', n_jobs=-1, verbose=1)
grid_search.fit(X_train_scaled, y_train)

best_xgb = grid_search.best_estimator_
print("Best XGBoost Parameters:", grid_search.best_params_)


Fitting 5 folds for each of 48 candidates, totalling 240 fits
Best XGBoost Parameters: {'colsample_bytree': 0.9, 'learning_rate': 0.05, 'max_depth': 4, 'n_estimators': 500, 'subsample': 0.9}


In [36]:
# Bagging with XGBoost
from sklearn.ensemble import BaggingRegressor

bagging_xgb = BaggingRegressor(estimator=best_xgb, n_estimators=10, random_state=42, n_jobs=-1)
bagging_xgb.fit(X_train_scaled, y_train)

In [37]:
# Evaluation
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

y_pred = bagging_xgb.predict(X_test_scaled)

print("Final Model Evaluation")
print("R² Score:", r2_score(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
print("MAE:", mean_absolute_error(y_test, y_pred))

Final Model Evaluation
R² Score: 0.7317255194455199
RMSE: 0.06334605442153628
MAE: 0.01838998357599074


In [38]:
import joblib

joblib.dump(bagging_xgb, '../models/trained_model.pkl')
joblib.dump(scaler, '../models/xgb_scaler.pkl')

['../models/xgb_scaler.pkl']

In [27]:
X

Unnamed: 0,price,1h,24h,7d,24h_volume,mkt_cap
0,40859.460000,0.022,0.030,0.055,3.539076e+10,7.709915e+11
1,2744.410000,0.024,0.034,0.065,1.974870e+10,3.271044e+11
2,1.000000,-0.001,-0.001,0.000,5.793497e+10,7.996516e+10
3,383.430000,0.018,0.028,0.004,1.395854e+09,6.404382e+10
4,0.999874,-0.001,0.000,-0.000,3.872274e+09,5.222214e+10
...,...,...,...,...,...,...
987,0.055426,0.016,-0.003,-0.088,2.976839e+06,6.809024e+07
988,0.037961,0.002,-0.012,-0.054,3.667870e+05,6.782627e+07
989,0.069003,-0.000,0.008,-0.037,1.363376e+07,6.776284e+07
990,0.464613,-0.003,0.014,0.019,9.398219e+06,6.738822e+07
