## Experiments :: Statistic Models

In [1]:
%load_ext autoreload
%autoreload 2

from pathlib import Path

import warnings
warnings.filterwarnings("ignore")

from etna.datasets.tsdataset import TSDataset
from etna.models import HoltWintersModel
from etna.pipeline import Pipeline
import pandas as pd 

## Constants

In [2]:
PROJECT_ROOT = Path("__file__").resolve().parents[1]

DATA_DPATH = PROJECT_ROOT / "data"
assert DATA_DPATH.exists()

HORIZON = 30

## Data Loading

In [None]:
train_fpath = DATA_DPATH / "datasets" / "train.csv"
train_df = pd.read_csv(train_fpath, index_col=0)
train_df["timestamp"] = pd.to_datetime(train_df["timestamp"])

test_fpath = DATA_DPATH / "datasets" / "test.csv"
test_df = pd.read_csv(test_fpath, index_col=0)
test_df["timestamp"] = pd.to_datetime(test_df["timestamp"])

train_df.shape, test_df.shape

## TS Dataset Preparation

In [4]:
train_ts_df = train_df.copy()

# Segment Mock - only 1 segment
train_ts_df["segment"] = 10
train_ts_df = TSDataset(train_ts_df, freq="D")

test_ts_df = test_df.copy()
# Segment Mock - only 1 segment
test_ts_df["segment"] = 10
test_ts_df = TSDataset(test_ts_df, freq="D")

## Model Training

In [None]:
model = HoltWintersModel(
    trend="add", 
    seasonal="add", 
    seasonal_periods=7,
)
pipeline = Pipeline(model=model, horizon=HORIZON)

pipeline.fit(train_ts_df)
ts_forecast_df = pipeline.forecast()

## Predictions

In [None]:
test_df = test_ts_df.to_pandas(flatten=True).rename(columns={"target": "y_test"})
forecast_df = ts_forecast_df.to_pandas(flatten=True).rename(columns={"target": "y_pred"})

preds_df = test_df.merge(forecast_df, on=["timestamp", "segment"], how="outer")
preds_df = preds_df.drop(columns=["segment"])

preds_df.head()

In [7]:
pred_dpath = DATA_DPATH / "predictions"
pred_dpath.mkdir(parents=True, exist_ok=True)

preds_df.to_csv(pred_dpath / "holt_winters_predictions.csv")