In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, median_absolute_error, r2_score
from scipy.stats import pearsonr, spearmanr
import polars as pl
import numpy as np
from tqdm import tqdm
from datetime import datetime

# 1 Data

In [None]:
data = pl.read_parquet(
    "/kaggle/input/drw-crypto-market-prediction/train.parquet"
).sort("timestamp", descending = False)
data

# 2 Pre-Processing / Feature Engineering

In [None]:
start = datetime(2023, 3, 1, 0, 0)
end   = datetime(2024, 2, 29, 23, 59, 59)

features = ['X35', 'X96', 'X113', 'X126', 'X261', 'X539', 'X666', 'X690', 'X696']

X_train = data.filter(
    (pl.col("timestamp") >= pl.lit(start)) & (pl.col("timestamp") <= pl.lit(end))
).select(features)

y_train = data.filter(
    (pl.col("timestamp") >= pl.lit(start)) & (pl.col("timestamp") <= pl.lit(end))
)["label"]

X_train_np = X_train.to_numpy()
y_train_np = y_train.to_numpy()

X_train

In [None]:
y_train

# 3 Model

In [None]:
from xgboost import XGBRegressor

# XGBoost (GPU)
model = XGBRegressor(
    n_estimators=100,
    max_depth=50,
    min_child_weight=10,
    random_state=42,
    tree_method="gpu_hist",  # Use GPU-accelerated histogram method
    predictor="gpu_predictor",
    n_jobs=-1,
    verbosity=0
)
model.fit(X_train_np, y_train_np)

# 4 Prediction

## 4.1 Preprocess Test

In [None]:
test = pl.read_parquet("/kaggle/input/drw-crypto-market-prediction/test.parquet")
test = test.with_row_index("ID", offset=1)

X_test = test.select(features)
X_test

## 4.2 Predict

In [None]:
y_pred = model.predict(X_test.to_numpy())
submission = pl.DataFrame({
    "ID": test["ID"],
    "prediction": y_pred
})
submission

## 5 Submission

In [None]:
submission.write_csv("/kaggle/working/submission.csv")