## Rennes Data Challenge 2023 

Mathis Derenne

---

### SARIMAX Model

We will train a SARIMAX model that uses previous values and exogenous variable to predict Close_BTC values.

We will implement a preprocessing pipeline as follows :
- Impute missing values
- Scale the data using RobustScaler
- Apply PCA to reduce the dimensionality


In [21]:
import pandas as pd
import numpy as np
from utils import load_data, validation_split
import warnings
warnings.filterwarnings('ignore')

In [22]:
X, y = load_data()

X_train = X[X.index < validation_split]
X_test = X[X.index >= validation_split]
y_train = y[y.index < validation_split]
y_test = y[y.index >= validation_split]

X_train.shape, y_train.shape, X_test.shape, y_test.shape

((1313, 199), (1313,), (148, 199), (148,))

### Preprocessing

#### Numeric preprocessing

In [23]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import KNNImputer
from sklearn.preprocessing import RobustScaler
from sklearn.decomposition import PCA

numeric_features = X.select_dtypes(include=[np.number]).columns.tolist()

numeric_transformer = Pipeline(steps=[
    ('imputer', KNNImputer(n_neighbors=5)),
    ('scaler', RobustScaler()),
    ('pca', PCA(n_components=0.6, svd_solver='full'))])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
    ])

### SARIMAX Model

In [24]:
from skforecast.ForecasterSarimax import ForecasterSarimax
from skforecast.model_selection_sarimax import backtesting_sarimax
from skforecast.Sarimax import Sarimax
from skforecast.model_selection import backtesting_forecaster

forecaster = ForecasterSarimax(
    regressor = Sarimax(order=(2, 1, 2), seasonal_order=(0, 0, 0, 0)),
    transformer_exog = preprocessor
)

metric, y_pred = backtesting_sarimax(
    forecaster = forecaster,
    y = y,
    exog = X,
    steps = 1,
    metric = 'mean_squared_error',
    initial_train_size = y_train.size,
    fixed_train_size = False,
    refit = True,
)

  0%|          | 0/148 [00:00<?, ?it/s]

### Save the models predictions

In [None]:
y_pred = y_pred.squeeze()
y_pred.name = 'Close_BTC'
y_pred.index.name = 'date'
y_pred.to_csv('prediction/SARIMAX_pred.csv')