In [12]:
import numpy as np
from sklearn.linear_model import RidgeCV
from sklearn.metrics import mean_absolute_error

# Define file paths and parameters
num_folds = 5
par_folder = 'features/94873a83-d/'
train_files = [f'{par_folder}train_finetuned_{i}.npy' for i in range(1, num_folds + 1)]
test_files = [f'{par_folder}test_finetuned_{i}.npy' for i in range(1, num_folds + 1)]

# Initialize a list to store test scores
test_scores = []

for fold in range(num_folds):
    # Load training and test data for the current fold
    train_data = np.load(train_files[fold])
    test_data = np.load(test_files[fold])
    
    # Split features and targets
    X_train, y_train = train_data[:, :1024], train_data[:, 1024]
    X_test, y_test = test_data[:, :1024], test_data[:, 1024]
    
    # Initialize and fit RidgeCV model
    alphas = np.logspace(-6, 6, 15)
    model = RidgeCV(alphas=alphas)
    model.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred = model.predict(X_test)
    
    # Calculate and store the mean squared error for the current fold
    mse = mean_absolute_error(y_test, y_pred)
    test_scores.append(mse)

# Calculate average test score across all folds
average_test_score = np.mean(test_scores)

print("Test scores for each fold:", test_scores)
print("Average test score across all folds:", average_test_score)

Test scores for each fold: [0.20089749311422922, 0.20050954389394657, 0.20436646844858505, 0.2072502961518725, 0.20511387654151716]
Average test score across all folds: 0.2036275356300301


In [13]:
best_alpha = model.alpha_
print("Best alpha:", best_alpha)

Best alpha: 51.79474679231202
