In [1]:
import optuna
import requests
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint
from datetime import datetime, timedelta
from coinbase.rest import RESTClient
import logging

# Setup logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

# Initialize Coinbase client
api_key = "organizations/34ea76db-0149-4b56-a708-2610a3c60ab2/apiKeys/5533fcd1-0647-4784-a9b5-902bf53691c4"
api_secret = "-----BEGIN EC PRIVATE KEY-----\nMHcCAQEEIHshL3W7qRla2s+0t0ttn5u8ys70j3eJTwpk50uYCIQ6oAoGCCqGSM49\nAwEHoUQDQgAEj2YiJKTFGzUeSgcDVpgaAuBV4N9//6ZJ+Li0GdPaBo8Hd0yGQTLc\nWPGHUaot5a6HklujIN/zt21EgpBERDgl2Q==\n-----END EC PRIVATE KEY-----\n"

client = RESTClient(api_key=api_key, api_secret=api_secret)

# Parameters
trade_coin = "SHIB-USD"
lookback_window = 30
checkpoint_file = "checkpoints/shib_model.keras"
granularity = 900  # 15-minute candles
scaler = MinMaxScaler()

# Fetch historical data
def get_product_candles_all(product_id, start, end, granularity):
    logging.info(f"Fetching historical data for {product_id} from {start} to {end} with granularity {granularity}.")
    url = f"https://api.exchange.coinbase.com/products/{product_id}/candles"
    headers = {"Content-Type": "application/json"}
    all_candles = []
    
    current_start = datetime.fromisoformat(start)
    current_end = current_start + timedelta(seconds=granularity * 300)  # 300 data points max

    while current_start < datetime.fromisoformat(end):
        # Ensure current_end does not exceed the requested end time
        if current_end > datetime.fromisoformat(end):
            current_end = datetime.fromisoformat(end)
        
        params = {"start": current_start.isoformat(), "end": current_end.isoformat(), "granularity": granularity}
        try:
            logging.info(f"Requesting data from {current_start.isoformat()} to {current_end.isoformat()}.")
            response = requests.get(url, headers=headers, params=params)
            response.raise_for_status()
            candles = response.json()
            if not candles:
                logging.warning(f"No data returned for {current_start.isoformat()} to {current_end.isoformat()}.")
            all_candles.extend(candles)

            current_start = current_end
            current_end = current_start + timedelta(seconds=granularity * 300)

        except requests.exceptions.RequestException as e:
            logging.error(f"Error fetching product candles: {e}")
            break

    if not all_candles:
        logging.error("No data fetched. Exiting historical data fetch.")
        return pd.DataFrame()

    df = pd.DataFrame(all_candles, columns=["time", "low", "high", "open", "close", "volume"])
    df["time"] = pd.to_datetime(df["time"], unit="s")
    df.set_index("time", inplace=True)
    logging.info(f"Historical data fetch complete. Total rows fetched: {len(df)}.")
    return df

# Feature engineering
def feature_engineering(df):
    logging.info("Starting feature engineering.")
    df["returns"] = df["close"].pct_change()
    df["volatility"] = df["close"].rolling(window=lookback_window).std()
    df["moving_avg"] = df["close"].rolling(window=lookback_window).mean()
    df.dropna(inplace=True)
    logging.info(f"Feature engineering complete. Final dataset shape: {df.shape}.")
    return df

# Prepare data for LSTM
def prepare_data(df):
    logging.info("Preparing data for LSTM.")
    scaled_data = scaler.fit_transform(df.values)
    x, y = [], []
    for i in range(lookback_window, len(scaled_data)):
        x.append(scaled_data[i - lookback_window:i])
        y.append(scaled_data[i, 0])
    logging.info(f"Data preparation complete. Total samples: {len(x)}.")
    return np.array(x), np.array(y)

# Build LSTM model
def build_lstm_model(input_shape, units=50, dropout=0.2):
    logging.info(f"Building LSTM model with input shape {input_shape}, units={units}, dropout={dropout}.")
    model = Sequential([
        LSTM(units, return_sequences=True, input_shape=input_shape),
        Dropout(dropout),
        LSTM(units, return_sequences=False),
        Dropout(dropout),
        Dense(1, activation="linear")
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(), loss="mean_squared_error")
    logging.info("LSTM model build complete.")
    return model

# Optuna objective function for hyperparameter tuning
def objective(trial):
    logging.info("Starting hyperparameter tuning with Optuna.")
    units = trial.suggest_int("units", 30, 100)
    dropout = trial.suggest_float("dropout", 0.1, 0.5)
    learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True)

    x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.2, random_state=42)

    model = build_lstm_model((x_train.shape[1], x_train.shape[2]), units=units, dropout=dropout)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate), loss="mean_squared_error")
    model.fit(x_train, y_train, epochs=5, batch_size=32, verbose=0)

    val_loss = model.evaluate(x_val, y_val, verbose=0)
    logging.info(f"Validation loss: {val_loss}.")
    return val_loss

# Training pipeline
def train_model():
    logging.info("Starting model training pipeline.")
    end_time = datetime.utcnow()
    start_time = end_time - timedelta(days=365)
    df = get_product_candles_all(trade_coin, start_time.isoformat(), end_time.isoformat(), granularity)
    if df.empty:
        logging.error("No historical data available. Exiting training.")
        return

    df = feature_engineering(df)
    global x, y
    x, y = prepare_data(df)

    # Hyperparameter tuning
    study = optuna.create_study(direction="minimize")
    study.optimize(objective, n_trials=20)
    best_params = study.best_params
    logging.info(f"Best parameters: {best_params}.")

    # Train final model
    model = build_lstm_model((x.shape[1], x.shape[2]), units=best_params["units"], dropout=best_params["dropout"])
    checkpoint = ModelCheckpoint(filepath=checkpoint_file, save_best_only=True, save_weights_only=False, verbose=1)
    model.fit(x, y, epochs=50, batch_size=32, validation_split=0.1, callbacks=[checkpoint], verbose=1)
    logging.info(f"Model training complete. Checkpoint saved to {checkpoint_file}.")

if __name__ == "__main__":
    train_model()


2025-01-01 18:28:34,009 - INFO - Starting model training pipeline.
2025-01-01 18:28:34,025 - INFO - Fetching historical data for SHIB-USD from 2024-01-03T00:28:34.025262 to 2025-01-02T00:28:34.025262 with granularity 900.
2025-01-01 18:28:34,025 - INFO - Requesting data from 2024-01-03T00:28:34.025262 to 2024-01-06T03:28:34.025262.
2025-01-01 18:28:34,197 - INFO - Requesting data from 2024-01-06T03:28:34.025262 to 2024-01-09T06:28:34.025262.
2025-01-01 18:28:34,699 - INFO - Requesting data from 2024-01-09T06:28:34.025262 to 2024-01-12T09:28:34.025262.
2025-01-01 18:28:34,997 - INFO - Requesting data from 2024-01-12T09:28:34.025262 to 2024-01-15T12:28:34.025262.
2025-01-01 18:28:35,122 - INFO - Requesting data from 2024-01-15T12:28:34.025262 to 2024-01-18T15:28:34.025262.
2025-01-01 18:28:35,607 - INFO - Requesting data from 2024-01-18T15:28:34.025262 to 2024-01-21T18:28:34.025262.
2025-01-01 18:28:36,124 - INFO - Requesting data from 2024-01-21T18:28:34.025262 to 2024-01-24T21:28:34.02

Epoch 1/50
[1m984/984[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step - loss: 0.0044  
Epoch 1: val_loss improved from inf to 0.00024, saving model to checkpoints/shib_model.keras
[1m984/984[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 80ms/step - loss: 0.0044 - val_loss: 2.4187e-04
Epoch 2/50
[1m984/984[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step - loss: 7.8821e-04  
Epoch 2: val_loss improved from 0.00024 to 0.00016, saving model to checkpoints/shib_model.keras
[1m984/984[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 79ms/step - loss: 7.8807e-04 - val_loss: 1.5899e-04
Epoch 3/50
[1m984/984[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step - loss: 4.6787e-04  
Epoch 3: val_loss did not improve from 0.00016
[1m984/984[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 71ms/step - loss: 4.6787e-04 - val_loss: 1.6928e-04
Epoch 4/50
[1m983/984[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 74ms/step - l

2025-01-01 21:18:08,192 - INFO - Model training complete. Checkpoint saved to checkpoints/shib_model.keras.


In [2]:
from tensorflow.keras.models import load_model

try:
    model = load_model("checkpoints/shib_model.keras")
    print("Model loaded successfully.")
except Exception as e:
    print(f"Error loading model: {e}")


Model loaded successfully.
