## Import Libraries

In [35]:
import pandas as pd

from sklearn.model_selection import TimeSeriesSplit

## Data

Create a time-series dummy

In [36]:
data = pd.DataFrame(index=pd.date_range("2010-01-01", "2021-12-31", freq="D"))
data

2010-01-01
2010-01-02
2010-01-03
2010-01-04
2010-01-05
...
2021-12-27
2021-12-28
2021-12-29
2021-12-30
2021-12-31


## Cross-validation methodology

### Expanding Window

In [41]:
n_splits = 5

tscv = TimeSeriesSplit(n_splits=n_splits, test_size=365)

# Perform cross-validation on the data
for k_fold, (train_index, test_index) in enumerate(tscv.split(data)):
    train_data = data.iloc[train_index]
    test_data = data.iloc[test_index]

    # train and evaluate your model on each fold
    # ...

    print("--------------------")
    # number of fold
    print("Fold: ", k_fold + 1)
    # Print the shapes of the training and testing sets for each fold
    print("Training data shape:", train_data.shape)
    print("Testing data shape:", test_data.shape)
    # Print the period of the training and testing sets for each fold
    print("Training date:", train_data.index[0], "-", train_data.index[-1])
    print("Testing date:", test_data.index[0], "-", test_data.index[-1])

--------------------
Fold:  1
Training data shape: (2558, 0)
Testing data shape: (365, 0)
Training date: 2010-01-01 00:00:00 - 2017-01-01 00:00:00
Testing date: 2017-01-02 00:00:00 - 2018-01-01 00:00:00
--------------------
Fold:  2
Training data shape: (2923, 0)
Testing data shape: (365, 0)
Training date: 2010-01-01 00:00:00 - 2018-01-01 00:00:00
Testing date: 2018-01-02 00:00:00 - 2019-01-01 00:00:00
--------------------
Fold:  3
Training data shape: (3288, 0)
Testing data shape: (365, 0)
Training date: 2010-01-01 00:00:00 - 2019-01-01 00:00:00
Testing date: 2019-01-02 00:00:00 - 2020-01-01 00:00:00
--------------------
Fold:  4
Training data shape: (3653, 0)
Testing data shape: (365, 0)
Training date: 2010-01-01 00:00:00 - 2020-01-01 00:00:00
Testing date: 2020-01-02 00:00:00 - 2020-12-31 00:00:00
--------------------
Fold:  5
Training data shape: (4018, 0)
Testing data shape: (365, 0)
Training date: 2010-01-01 00:00:00 - 2020-12-31 00:00:00
Testing date: 2021-01-01 00:00:00 - 2021-

### Sliding Window

In [46]:
n_splits = 5
max_train_size = len(data) // n_splits

tscv = TimeSeriesSplit(n_splits=n_splits, max_train_size=max_train_size, test_size=365)

# Perform cross-validation on the data
for k_fold, (train_index, test_index) in enumerate(tscv.split(data)):
    train_data = data.iloc[train_index]
    test_data = data.iloc[test_index]

    # train and evaluate your model on each fold
    # ...

    print("--------------------")
    # number of fold
    print("Fold: ", k_fold + 1)
    # Prsint the shapes of the training and testing sets for each fold
    print("Training data shape:", train_data.shape)
    print("Testing data shape:", test_data.shape)
    # Print the period of the training and testing sets for each fold
    print("Training date:", train_data.index[0], "-", train_data.index[-1])
    print("Testing date:", test_data.index[0], "-", test_data.index[-1])

--------------------
Fold:  1
Training data shape: (876, 0)
Testing data shape: (365, 0)
Training date: 2014-08-10 00:00:00 - 2017-01-01 00:00:00
Testing date: 2017-01-02 00:00:00 - 2018-01-01 00:00:00
--------------------
Fold:  2
Training data shape: (876, 0)
Testing data shape: (365, 0)
Training date: 2015-08-10 00:00:00 - 2018-01-01 00:00:00
Testing date: 2018-01-02 00:00:00 - 2019-01-01 00:00:00
--------------------
Fold:  3
Training data shape: (876, 0)
Testing data shape: (365, 0)
Training date: 2016-08-09 00:00:00 - 2019-01-01 00:00:00
Testing date: 2019-01-02 00:00:00 - 2020-01-01 00:00:00
--------------------
Fold:  4
Training data shape: (876, 0)
Testing data shape: (365, 0)
Training date: 2017-08-09 00:00:00 - 2020-01-01 00:00:00
Testing date: 2020-01-02 00:00:00 - 2020-12-31 00:00:00
--------------------
Fold:  5
Training data shape: (876, 0)
Testing data shape: (365, 0)
Training date: 2018-08-09 00:00:00 - 2020-12-31 00:00:00
Testing date: 2021-01-01 00:00:00 - 2021-12-31