# Считаем метрики по ETS, ARIMA, RF и их.

In [13]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import os

from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.arima.model import ARIMA
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [2]:
RANDOM_STATE = 42
HORIZON = 13

In [3]:
# --- Метрики ---
def smape(y_true, y_pred):
    return 100 * np.mean(2 * np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)))

def evaluate(y_true, y_pred):
    return {
        "RMSE": np.sqrt(mean_squared_error(y_true, y_pred)),
        "MAE": mean_absolute_error(y_true, y_pred),
        "SMAPE": smape(y_true, y_pred)
    }

### ETS

In [4]:
def forecast_ets(series, h=HORIZON):
    best_aic = np.inf
    best_res = None
    configs = [
        {'trend': 'add', 'damped_trend': True},
        {'trend': 'add', 'damped_trend': False},
        {'trend': None, 'damped_trend': False},
    ]
    for cfg in configs:
        try:
            model = ExponentialSmoothing(series,
                                         trend=cfg['trend'],
                                         damped_trend=cfg['damped_trend'],
                                         seasonal=None,
                                         initialization_method="estimated")
            res = model.fit(optimized=True)
            aic = res.aic
            if aic < best_aic:
                best_aic = aic
                best_res = res
        except:
            continue
    if best_res is None:
        return np.repeat(series.iloc[-1], h).astype(float)
    return best_res.forecast(h).values

### ARIMA

In [5]:
def forecast_arima(series, h=HORIZON):
    best_aic = np.inf
    best_model = None
    s = series.astype(float)
    if len(s) < 10:
        return np.repeat(s.iloc[-1], h).astype(float)
    for p in range(0, 3):
        for d in range(0, 2):
            for q in range(0, 3):
                try:
                    model = ARIMA(s, order=(p,d,q))
                    res = model.fit()
                    if res.aic < best_aic:
                        best_aic = res.aic
                        best_model = res
                except:
                    continue
    if best_model is None:
        return np.repeat(s.iloc[-1], h).astype(float)
    return best_model.forecast(steps=h).values

### RandomForest recursive

In [6]:
def make_lag_features(df_close, max_lag=7):
    df = pd.DataFrame({'close': df_close})
    for lag in range(1, max_lag+1):
        df[f'lag_{lag}'] = df['close'].shift(lag)
    df['roll_mean_3'] = df['close'].shift(1).rolling(3, min_periods=1).mean()
    df['roll_std_3']  = df['close'].shift(1).rolling(3, min_periods=1).std(ddof=0).fillna(0)
    df['roll_mean_7'] = df['close'].shift(1).rolling(7, min_periods=1).mean()
    return df.dropna().reset_index(drop=True)

def forecast_rf_recursive(series, h=HORIZON, max_lag=7):
    s = pd.Series(series).reset_index(drop=True)
    df_all = make_lag_features(s, max_lag=max_lag)
    if df_all.shape[0] < 5:
        return np.repeat(s.iloc[-1], h).astype(float)
    X = df_all.drop(columns=['close']).values
    y = df_all['close'].values
    model = Ridge(alpha=1.0) if len(y) < 30 else RandomForestRegressor(
        n_estimators=400, max_depth=6, random_state=RANDOM_STATE, n_jobs=-1
    )
    model.fit(X, y)

    history = s.tolist()
    preds = []
    for _ in range(h):
        row = {}
        for lag in range(1, max_lag+1):
            row[f'lag_{lag}'] = history[-lag] if len(history) >= lag else history[0]
        temp = pd.Series(history)
        row['roll_mean_3'] = temp.shift(1).rolling(3, min_periods=1).mean().iloc[-1]
        row['roll_std_3']  = temp.shift(1).rolling(3, min_periods=1).std(ddof=0).iloc[-1]
        row['roll_mean_7'] = temp.shift(1).rolling(7, min_periods=1).mean().iloc[-1]
        feat_order = [f'lag_{i}' for i in range(1, max_lag+1)] + ['roll_mean_3','roll_std_3','roll_mean_7']
        x_row = np.array([row[k] for k in feat_order]).reshape(1, -1)
        yhat = model.predict(x_row)[0]
        preds.append(float(yhat))
        history.append(float(yhat))
    return np.array(preds)

In [7]:
# --- Validation procedure ---

def run_validation(hist, horizon=HORIZON):
    train_part = hist[:-horizon]
    valid_part = hist[-horizon:]

    print(f"Train length: {len(train_part)}, Validation length: {len(valid_part)}")

    ets_pred   = forecast_ets(train_part, h=horizon)
    arima_pred = forecast_arima(train_part, h=horizon)
    rf_pred    = forecast_rf_recursive(train_part, h=horizon)

    ensemble = np.nanmean(np.vstack([ets_pred, arima_pred, rf_pred]), axis=0)

    results = {
        'ETS': evaluate(valid_part.values, ets_pred),
        'ARIMA': evaluate(valid_part.values, arima_pred),
        'RF': evaluate(valid_part.values, rf_pred),
        'ENSEMBLE': evaluate(valid_part.values, ensemble)
    }

    # сохраняем предсказания
    df_preds = pd.DataFrame({
        'ID': np.arange(1, horizon+1),
        'actual': valid_part.values,
        'ets': ets_pred,
        'arima': arima_pred,
        'rf': rf_pred,
        'ensemble': ensemble
    })
    df_preds.to_csv("validation_preds.csv", index=False)

    return results, df_preds

In [14]:
def main(train_path="train.csv", test_path="test.csv"):
    train = pd.read_csv(train_path, parse_dates=['dt'])
    test  = pd.read_csv(test_path, parse_dates=['dt'])
    hist  = pd.concat([train, test], ignore_index=True).sort_values('dt').reset_index(drop=True)
    close = hist['close'].astype(float)

    results, preds = run_validation(close, horizon=HORIZON)

    print("\nValidation metrics:")
    for model, mets in results.items():
        print(model, ":", ", ".join([f"{k}={v:.4f}" for k,v in mets.items()]))

    print("\nSaved predictions to validation_preds.csv")

if __name__ == "__main__":
    main()

Train length: 79, Validation length: 13

Validation metrics:
ETS : RMSE=0.2738, MAE=0.2443, SMAPE=6.6586
ARIMA : RMSE=0.5109, MAE=0.4600, SMAPE=12.9800
RF : RMSE=0.7164, MAE=0.6684, SMAPE=19.5315
ENSEMBLE : RMSE=0.4988, MAE=0.4576, SMAPE=12.9114

Saved predictions to validation_preds.csv
