In [3]:
import pandas as pd
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import cross_val_score

# Cross Validation methods

from sklearn.model_selection import KFold
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import StratifiedShuffleSplit 
from sklearn.model_selection import GroupKFold
from sklearn.model_selection import StratifiedGroupKFold
from sklearn.model_selection import GroupShuffleSplit
from sklearn.model_selection import TimeSeriesSplit


# Load time series data into a Pandas dataframe
df = pd.read_csv('data-sets/Alcohol_Sales.csv', index_col=0, parse_dates=True)

# Define the number of lagged values
n_lags = 3

# Create the feature matrix X and target vector y
X = pd.DataFrame(index=df.index)
for lag in range(1, n_lags+1):
    X[f'lag_{lag}'] = df.shift(lag)
X.dropna(inplace=True)
y = df.loc[X.index]


# Define the number of rolling windows and the window size
n_windows = 5
window_size = len(X) // n_windows

# Initialize the cross-validators
k_fold_cv = KFold(n_splits=n_windows)
shuffles_split_cv = ShuffleSplit(n_splits=n_windows)
stratified_k_fold_cv = StratifiedKFold(n_splits=n_windows)
stratified_shuffle_split_cv = StratifiedShuffleSplit(n_splits=n_windows)
group_k_fold_cv = GroupKFold(n_splits=n_windows)
stratified_group_k_fold_cv = StratifiedGroupKFold(n_splits=n_windows)
group_shuffle_split_cv = GroupShuffleSplit(n_splits=n_windows)
timeSeries_split_cv = TimeSeriesSplit(n_splits=n_windows)

cv_array = [shuffles_split_cv, k_fold_cv, timeSeries_split_cv]



# Initialize the Support Vector Regression model
svr = SVR(kernel='rbf', C=500, gamma=0.1, epsilon=.1)

# Iterate over the rolling windows and train/test the model
for cv in cv_array:
    sum_of_mape = 0
    for i, (train_index, test_index) in enumerate(cv.split(X)):
        # Split the data into training and testing sets
        X_train, y_train = X.iloc[train_index], y.iloc[train_index]
        X_test, y_test = X.iloc[test_index], y.iloc[test_index]

        # Preprocess the data by scaling it
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
        

        # Train the Support Vector Regression model on the training data
        svr.fit(X_train, y_train)

        # Make predictions on the testing data
        y_pred = svr.predict(X_test)

        # Evaluate the performance of the model using the Mean Absolute Percentage Error
        mape = mean_absolute_percentage_error(y_test, y_pred)
        
        sum_of_mape += mape
    
    average_mape = sum_of_mape / cv.get_n_splits()
    print(f'Average MAPE for {cv}: {average_mape * 100:.2f}%\n')



Average MAPE for ShuffleSplit(n_splits=5, random_state=None, test_size=None, train_size=None): 10.49%

Average MAPE for KFold(n_splits=5, random_state=None, shuffle=False): 15.13%

Average MAPE for TimeSeriesSplit(gap=0, max_train_size=None, n_splits=5, test_size=None): 19.20%



  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [4]:
y_pred

array([10194.7897746 , 10223.36305977, 10358.63507591, 10034.97552867,
       10180.94195344,  8905.43268199,  9500.44957584,  9975.06490085,
       10198.93336882, 10323.18865861, 10256.79751278, 10020.09342947,
        9987.79146362, 10265.9536427 , 10220.1468679 , 10128.61903559,
        9927.94531805,  8858.58484886,  9641.94725206, 10256.5796151 ,
       10173.29653285, 10215.95404286, 10018.77760238,  9651.8212026 ,
        9909.08274036, 10104.97780211, 10008.97672224, 10078.29972995,
        9676.25481937,  8891.4704737 ,  9635.16110851, 10298.39468858,
       10110.64856389, 10003.82913756,  9525.25136377,  9241.365745  ,
        9510.83748776,  9902.54970158,  9899.3011594 ,  9962.88358715,
        9565.1216914 ,  9064.05215714,  9697.24725991, 10331.62109613,
       10103.23101626,  9801.28962131,  9392.75077128,  9201.82385374,
        9283.20658879,  9553.83639424,  9462.28308472,  9484.65725955,
        8902.20006874])