# Time series validation

In [7]:
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error
import statsmodels.api as sm

In [13]:
X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [1, 2], [3, 4]])
y = np.array([1, 2, 3, 4, 5, 6])

tscv = TimeSeriesSplit()
TimeSeriesSplit(max_train_size=None, n_splits=3)

for train_index, test_index in tscv.split(X):
    print('Train: ', train_index, '\t\tTest: ', test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

Train:  [0] 		Test:  [1]
Train:  [0 1] 		Test:  [2]
Train:  [0 1 2] 		Test:  [3]
Train:  [0 1 2 3] 		Test:  [4]
Train:  [0 1 2 3 4] 		Test:  [5]


In [None]:
tscv = TimeSeriesSplit(n_splits = 4)

rmse = []
for train_index, test_index in tscv.split(cross_validation):
    cv_train, cv_test = cross_validation.iloc[train_index], cross_validation.iloc[test_index]
    
    arma = sm.tsa.ARMA(cv_train, (2,2)).fit(disp=False)
    
    predictions = arma.predict(cv_test.index.values[0], cv_test.index.values[-1])
    true_values = cv_test.values
    rmse.append(sqrt(mean_squared_error(true_values, predictions)))
    
print("RMSE: {}".format(np.mean(rmse)))

In [16]:
class BlockingTimeSeriesSplit():
    def __init__(self, n_splits):
        self.n_splits = n_splits
    
    def get_n_splits(self, X, y, groups):
        return self.n_splits
    
    def split(self, X, y=None, groups=None):
        n_samples = len(X)
        k_fold_size = n_samples // self.n_splits
        indices = np.arange(n_samples)

        margin = 0
        for i in range(self.n_splits):
            start = i * k_fold_size
            stop = start + k_fold_size
            mid = int(0.8 * (stop - start)) + start
            yield indices[start: mid], indices[mid + margin: stop]

In [None]:
btss = BlockingTimeSeriesSplit(n_splits=3)
scores = cross_val_score(model, X_train, y_train, cv=btss, scoring=r2)

### Credits & Links

https://medium.com/@soumyachess1496/cross-validation-in-time-series-566ae4981ce4  
https://hub.packtpub.com/cross-validation-strategies-for-time-series-forecasting-tutorial/