In [13]:
import pandas as pd
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import cross_val_score

# Cross Validation methods

from sklearn.model_selection import KFold
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import StratifiedShuffleSplit 
from sklearn.model_selection import GroupKFold
from sklearn.model_selection import StratifiedGroupKFold
from sklearn.model_selection import GroupShuffleSplit
from sklearn.model_selection import TimeSeriesSplit


# Load time series data into a Pandas dataframe
df = pd.read_csv('data-sets/Alcohol_Sales.csv', index_col=0, parse_dates=True)

# Define the number of lagged values
n_lags = 3

# Create the feature matrix X and target vector y
X = pd.DataFrame(index=df.index)
for lag in range(1, n_lags+1):
    X[f'lag_{lag}'] = df.shift(lag)
X.dropna(inplace=True)
y = df.loc[X.index]

print(X, y)

# Define the number of rolling windows and the window size
n_windows = 5
window_size = len(X) // n_windows

# Initialize the cross-validators
shuffles_split_cv = ShuffleSplit(n_splits=n_windows)
k_fold_cv = KFold(n_splits=n_windows)
stratified_k_fold_cv = StratifiedKFold(n_splits=n_windows)
stratified_shuffle_split_cv = StratifiedShuffleSplit(n_splits=n_windows)
group_k_fold_cv = GroupKFold(n_splits=n_windows)
stratified_group_k_fold_cv = StratifiedGroupKFold(n_splits=n_windows)
group_shuffle_split_cv = GroupShuffleSplit(n_splits=n_windows)
timeSeries_split_cv = TimeSeriesSplit(n_splits=n_windows)

cv_array = [shuffles_split_cv, k_fold_cv, stratified_k_fold_cv, stratified_shuffle_split_cv,
            group_k_fold_cv, stratified_group_k_fold_cv, group_shuffle_split_cv, timeSeries_split_cv]



# Initialize the Support Vector Regression model
svr = SVR(kernel='rbf', C=500, gamma=0.1, epsilon=.1)

# Iterate over the rolling windows and train/test the model
for k in cv_array:
    for i, (train_index, test_index) in enumerate(tscv.split(X)):
        # Split the data into training and testing sets
        X_train, y_train = X.iloc[train_index], y.iloc[train_index]
        X_test, y_test = X.iloc[test_index], y.iloc[test_index]

        # Preprocess the data by scaling it
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)

        # Train the Support Vector Regression model on the training data
        svr.fit(X_train, y_train)

        # Make predictions on the testing data
        y_pred = svr.predict(X_test)

        # Evaluate the performance of the model using the Mean Absolute Percentage Error
        mape = mean_absolute_percentage_error(y_test, y_pred)
        print(k, "\n")
        print(f'Window {i+1} MAPE: {mape*100:.2f}%')



              lag_1    lag_2    lag_3
date                                 
1992-04-01   4002.0   3458.0   3459.0
1992-05-01   4564.0   4002.0   3458.0
1992-06-01   4221.0   4564.0   4002.0
1992-07-01   4529.0   4221.0   4564.0
1992-08-01   4466.0   4529.0   4221.0
...             ...      ...      ...
2018-09-01  14257.0  12640.0  14583.0
2018-10-01  12396.0  14257.0  12640.0
2018-11-01  13914.0  12396.0  14257.0
2018-12-01  14174.0  13914.0  12396.0
2019-01-01  15504.0  14174.0  13914.0

[322 rows x 3 columns]             S4248SM144NCEN
date                      
1992-04-01            4564
1992-05-01            4221
1992-06-01            4529
1992-07-01            4466
1992-08-01            4137
...                    ...
2018-09-01           12396
2018-10-01           13914
2018-11-01           14174
2018-12-01           15504
2019-01-01           10718

[322 rows x 1 columns]


  y = column_or_1d(y, warn=True)


AttributeError: 'ShuffleSplit' object has no attribute 'name'