In [1]:
!pip install ta

Collecting ta
  Downloading ta-0.11.0.tar.gz (25 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: ta
  Building wheel for ta (setup.py) ... [?25l[?25hdone
  Created wheel for ta: filename=ta-0.11.0-py3-none-any.whl size=29412 sha256=afa0b5a64992f57ce9659c37f541005551acdfb3eebb0909b0dedb40f2ee4b4c
  Stored in directory: /root/.cache/pip/wheels/5f/67/4f/8a9f252836e053e532c6587a3230bc72a4deb16b03a829610b
Successfully built ta
Installing collected packages: ta
Successfully installed ta-0.11.0


In [2]:
import pandas as pd
import numpy as np
import warnings
import ta
from sklearn.preprocessing import StandardScaler
from statsmodels.tsa.statespace.sarimax import SARIMAX
import xgboost as xgb
from concurrent.futures import ThreadPoolExecutor
import gc

warnings.filterwarnings('ignore')

class TimeSeriesModel:
   def __init__(self):
       self.sarimax_model = None
       self.xgb_model = None
       self.scaler = StandardScaler()

   def _calculate_momentum(self, df):
       result = pd.DataFrame(index=df.index)
       result['rsi'] = ta.momentum.RSIIndicator(df['close'], window=14).rsi()
       result['macd'] = ta.trend.MACD(df['close'], window_fast=12).macd()
       result['atr'] = ta.volatility.AverageTrueRange(
           df['high'], df['low'], df['close'], window=14
       ).average_true_range()
       return result

   def _calculate_volume(self, df):
       result = pd.DataFrame(index=df.index)
       result['volume_ma_10'] = df['volume'].rolling(window=10, min_periods=1).mean()
       result['volume_ratio'] = df['volume'] / result['volume_ma_10']
       return result

   def calculate_technical_indicators(self, df: pd.DataFrame) -> pd.DataFrame:
       df = df.copy()

       # Shift base data
       base_cols = ['close', 'high', 'low', 'volume']
       df[base_cols] = df[base_cols].shift(1)

       # Returns calculations
       df['returns'] = df['close'].pct_change()

       # Parallel indicator calculation
       with ThreadPoolExecutor(max_workers=2) as executor:
           futures = {
               executor.submit(self._calculate_momentum, df): 'momentum',
               executor.submit(self._calculate_volume, df): 'volume'
           }

           for future in futures:
               result = future.result()
               df = pd.concat([df, result], axis=1)

       # Time features
       timestamp = pd.to_datetime(df['timestamp'], unit='s')
       df['hour'] = timestamp.dt.hour
       df['day_of_week'] = timestamp.dt.dayofweek

       return df.fillna(0)

   def prepare_features(self, df: pd.DataFrame) -> np.ndarray:
       exog_columns = [
           'returns',  # price movement
           'volume_ratio',  # volume trend
           'rsi',  # momentum
           'macd',  # trend
           'atr',  # volatility
           'hour', 'day_of_week'  # time features
       ]

       exog = df[exog_columns]
       return self.scaler.fit_transform(exog) if self.sarimax_model is None else self.scaler.transform(exog)

   def train(self, train_data: pd.DataFrame):
       df = self.calculate_technical_indicators(train_data)

       # Sample last 50K rows for SARIMAX
       sarimax_df = df.iloc[-50000:].copy()
       sarimax_exog = self.prepare_features(sarimax_df)

       self.sarimax_model = SARIMAX(
           sarimax_df['close'],
           exog=sarimax_exog,
           order=(1, 1, 0),
           seasonal_order=(0, 1, 1, 12)
       ).fit(disp=False, method='powell')

       # Full data for XGBoost
       X = self.prepare_features(df)
       y = (df['close'].diff() > 0).astype(int)

       self.xgb_model = xgb.XGBClassifier(
           n_estimators=200,
           learning_rate=0.05,
           max_depth=6,
           tree_method='gpu_hist',
           predictor='gpu_predictor',
           grow_policy='lossguide',
           max_leaves=32,
           n_jobs=-1,
           subsample=0.9,
           colsample_bytree=0.9,
           min_child_weight=3,
           gamma=1,
           eval_metric='logloss',
           random_state=42
       )
       self.xgb_model.fit(X, y)

       gc.collect()

   def predict(self, future_df: pd.DataFrame) -> np.ndarray:
       df = self.calculate_technical_indicators(future_df)
       exog = self.prepare_features(df)

       sarimax_forecast = self.sarimax_model.forecast(steps=len(df), exog=exog)
       xgb_pred = self.xgb_model.predict_proba(exog)[:, 1]

       return ((sarimax_forecast.diff() > 0).astype(float) * 0.4 + xgb_pred * 0.6 > 0.5).astype(int)


def main():
   try:
       chunk_size = 500000
       chunks = []

       for chunk in pd.read_csv('/kaggle/input/directional-forecasting-cryptocurrencies/train.csv',
                                usecols=['timestamp', 'close', 'high', 'low', 'volume'],
                                chunksize=chunk_size):
           chunks.append(chunk)

       train_df = pd.concat(chunks)
       chunks = None
       gc.collect()

       test_df = pd.read_csv('/kaggle/input/directional-forecasting-cryptocurrencies/test.csv',
                             usecols=['timestamp', 'close', 'high', 'low', 'volume'])

       print("Training model...")
       model = TimeSeriesModel()
       model.train(train_df)

       print("Generating predictions...")
       predictions = model.predict(test_df)

       pd.DataFrame({
           'row_id': range(len(predictions)),
           'target': predictions
       }).to_csv('submission.csv', index=False)

       print(f"Predictions generated: {len(predictions)}")

   except Exception as e:
       print(f"Error: {str(e)}")


if __name__ == "__main__":
   main()

Training model...
Generating predictions...
Predictions generated: 909617


In [None]:
0.49855