In [4]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error

# Small sample dataset
X = np.array([[1], [2], [3], [4], [5], [6]])
y = np.array([1.1, 1.9, 3.0, 4.1, 5.1, 6.2])  # Target values close to X values


def cross_validation(X, y, k=None):
    
     # Shuffle indices to randomize rows
    indices = np.random.permutation(len(y))
    X = X[indices]
    y = y[indices]
     
    n = len(y)  # Total number of data points
    if k is None:
        k = n  # If k is not specified, set it to n for LOOCV
        
    fold_size = n // k  # Size of each fold
    errors = []  # List to store error for each fold
    
    for i in range(k):
        # Define start and end indices for the current fold
        start = i * fold_size
        print('start', start)
        end = start + fold_size if i < k - 1 else n
        print('end', end, 'fold_size', fold_size, 'i', i)
        
        # Split the data into training and testing sets for the current fold
        X_train = np.concatenate((X[:start], X[end:]), axis=0)
        y_train = np.concatenate((y[:start], y[end:]), axis=0)
        X_test = X[start:end]
        y_test = y[start:end]
        
        print('X_train', X_train)
        print('y_train', y_train)
        print('X_test', X_test)
        print('y_test', y_test)
        
        # Fit and evaluate the model
        model = LinearRegression()
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        errors.append(mean_absolute_error(y_test, y_pred))
        print('errors', errors)
    
    return np.mean(errors), np.std(errors)

# Run 3-Fold Cross-Validation
mean_error, std_error = cross_validation(X, y, k=3)
print("3-Fold Cross-Validation Results:")
print("Mean Absolute Error:", mean_error)
print("Standard Deviation of Error:", std_error)


print(X[:3], X[6:])


start 0
end 2 fold_size 2 i 0
X_train [[3]
 [5]
 [6]
 [4]]
y_train [3.  5.1 6.2 4.1]
X_test [[1]
 [2]]
y_test [1.1 1.9]
errors [0.13]
start 2
end 4 fold_size 2 i 1
X_train [[1]
 [2]
 [6]
 [4]]
y_train [1.1 1.9 6.2 4.1]
X_test [[3]
 [5]]
y_test [3.  5.1]
errors [0.13, 0.051694915254237195]
start 4
end 6 fold_size 2 i 2
X_train [[1]
 [2]
 [3]
 [5]]
y_train [1.1 1.9 3.  5.1]
X_test [[6]
 [4]]
y_test [6.2 4.1]
errors [0.13, 0.051694915254237195, 0.09285714285714386]
3-Fold Cross-Validation Results:
Mean Absolute Error: 0.09151735270379369
Standard Deviation of Error: 0.031981951707189836
[[1]
 [2]
 [3]] []
