In [None]:
# Basic Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Time Series & ML
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split, GridSearchCV
import xgboost as xgb

# Deep Learning
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')


In [None]:
# Load & Explore Data
# Load datasets (replace with actual file paths)
btc_df = pd.read_csv('../data/btc_eth_minute_data.csv')  # Example path
news_df = pd.read_csv('../data/crypto_news.csv')

# Preview
print(btc_df.head())
print(news_df.head())


In [None]:
# Preprocess Data
# Convert timestamps
btc_df['Timestamp'] = pd.to_datetime(btc_df['Timestamp'])
btc_df.set_index('Timestamp', inplace=True)

# Create new features
btc_df['Price Change %'] = btc_df['Close'].pct_change()
btc_df['Rolling Volatility'] = btc_df['Close'].rolling(window=7).std()
btc_df['10MA'] = btc_df['Close'].rolling(window=10).mean()
btc_df['30MA'] = btc_df['Close'].rolling(window=30).mean()
btc_df.dropna(inplace=True)


In [None]:
# Train-Test Split for LSTM
# Normalize and shape for LSTM
scaler = MinMaxScaler()
scaled_close = scaler.fit_transform(btc_df[['Close']])

X, y = [], []
for i in range(60, len(scaled_close)):
    X.append(scaled_close[i-60:i])
    y.append(scaled_close[i])

X, y = np.array(X), np.array(y)

# Split
X_train, X_test = X[:int(0.8*len(X))], X[int(0.8*len(X)):]
y_train, y_test = y[:int(0.8*len(y))], y[int(0.8*len(y)):]


In [None]:
# LSTM Model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(50, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mean_squared_error')
early_stop = EarlyStopping(monitor='val_loss', patience=5)

history = model.fit(X_train, y_train, validation_data=(X_test, y_test),
                    epochs=20, batch_size=64, callbacks=[early_stop])


In [None]:
# XGBoost Model
# Feature Engineering for XGBoost
features = ['Price Change %', 'Rolling Volatility', '10MA', '30MA']
target = 'Close'

btc_df.dropna(inplace=True)
X = btc_df[features]
y = btc_df[target]

X_train_xgb, X_test_xgb, y_train_xgb, y_test_xgb = train_test_split(X, y, test_size=0.2, shuffle=False)

xgb_model = xgb.XGBRegressor(objective='reg:squarederror')
xgb_model.fit(X_train_xgb, y_train_xgb)

y_pred_xgb = xgb_model.predict(X_test_xgb)


In [None]:
# ARIMA Model 
# Test stationarity
adf_result = adfuller(btc_df['Close'])
print('ADF Statistic:', adf_result[0])
print('p-value:', adf_result[1])

# Train ARIMA
model_arima = ARIMA(btc_df['Close'], order=(5,1,0))
result_arima = model_arima.fit()
btc_df['forecast_arima'] = result_arima.predict(start=200, end=300, typ='levels')


In [None]:
# Evaluation
# LSTM Eval
predicted_prices = model.predict(X_test)
predicted_prices = scaler.inverse_transform(predicted_prices)
actual_prices = scaler.inverse_transform(y_test)

print("LSTM RMSE:", np.sqrt(mean_squared_error(actual_prices, predicted_prices)))
print("LSTM MAE:", mean_absolute_error(actual_prices, predicted_prices))

# XGBoost Eval
print("XGBoost RMSE:", np.sqrt(mean_squared_error(y_test_xgb, y_pred_xgb)))
print("XGBoost MAE:", mean_absolute_error(y_test_xgb, y_pred_xgb))


In [None]:
# Save Figures
# LSTM Loss Curve
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='val')
plt.title("LSTM Loss Curve")
plt.legend()
plt.savefig('../graph/lstm_loss_curve.png')
