In [12]:
import numpy as np
from sklearn.linear_model import RidgeCV
from sklearn.metrics import mean_absolute_error

# Define file paths and parameters
num_folds = 5
par_folder = 'features/94873a83-d/'
train_files = [f'{par_folder}train_finetuned_{i}.npy' for i in range(1, num_folds + 1)]
test_files = [f'{par_folder}test_finetuned_{i}.npy' for i in range(1, num_folds + 1)]

# Initialize a list to store test scores
test_scores = []

for fold in range(num_folds):
    # Load training and test data for the current fold
    train_data = np.load(train_files[fold])
    test_data = np.load(test_files[fold])
    
    # Split features and targets
    X_train, y_train = train_data[:, :1024], train_data[:, 1024]
    X_test, y_test = test_data[:, :1024], test_data[:, 1024]
    
    # Initialize and fit RidgeCV model
    alphas = np.logspace(-6, 6, 15)
    model = RidgeCV(alphas=alphas)
    model.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred = model.predict(X_test)
    
    # Calculate and store the mean squared error for the current fold
    mse = mean_absolute_error(y_test, y_pred)
    test_scores.append(mse)

# Calculate average test score across all folds
average_test_score = np.mean(test_scores)

print("Test scores for each fold:", test_scores)
print("Average test score across all folds:", average_test_score)

Test scores for each fold: [0.20089749311422922, 0.20050954389394657, 0.20436646844858505, 0.2072502961518725, 0.20511387654151716]
Average test score across all folds: 0.2036275356300301


In [13]:
best_alpha = model.alpha_
print("Best alpha:", best_alpha)

Best alpha: 51.79474679231202


In [1]:
import numpy as np
from sklearn.linear_model import RidgeCV
from sklearn.metrics import mean_absolute_error
par_folder = 'features/330b6657-1/'

train_files = [f'{par_folder}train_finetuned_{0}.npy', f'{par_folder}train_raw_{0}.npy']
test_files = [f'{par_folder}test_finetuned_{0}.npy', f'{par_folder}test_raw_{0}.npy']

# Initialize a list to store test scores
test_scores = []

for fold in range(len(train_files)):
    # Load training and test data for the current fold
    train_data = np.load(train_files[fold])
    test_data = np.load(test_files[fold])
    
    # Split features and targets
    X_train, y_train = train_data[:, :1024], train_data[:, 1024]
    X_test, y_test = test_data[:, :1024], test_data[:, 1024]
    
    # Initialize and fit RidgeCV model
    alphas = np.logspace(-6, 6, 10)
    model = RidgeCV(alphas=alphas)
    model.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred = model.predict(X_test).clip(0,1)
    
    # Calculate and store the mean squared error for the current fold
    mse = mean_absolute_error(y_test, y_pred)
    test_scores.append(mse)

# Calculate average test score across all folds
average_test_score = np.mean(test_scores)

print("Test scores for each fold:", test_scores)
print("Average test score across all folds:", average_test_score)

Test scores for each fold: [0.42383190352737227, 0.556251939667463]
Average test score across all folds: 0.49004192159741766


In [2]:
import numpy as np
from sklearn.linear_model import RidgeCV
from sklearn.metrics import mean_absolute_error
par_folder = 'features/afb6d2ed-b/'

train_files = [f'{par_folder}train_finetuned_{0}.npy', f'{par_folder}train_raw_{0}.npy']
test_files = [f'{par_folder}test_finetuned_{0}.npy', f'{par_folder}test_raw_{0}.npy']

# Initialize a list to store test scores
test_scores = []

for fold in range(len(train_files)):
    # Load training and test data for the current fold
    train_data = np.load(train_files[fold])
    test_data = np.load(test_files[fold])
    
    # Split features and targets
    X_train, y_train = train_data[:, :1024], train_data[:, 1024]
    X_test, y_test = test_data[:, :1024], test_data[:, 1024]
    
    # Initialize and fit RidgeCV model
    alphas = np.logspace(-6, 6, 10)
    model = RidgeCV(alphas=alphas)
    model.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred = model.predict(X_test).clip(0,1)
    
    # Calculate and store the mean squared error for the current fold
    mse = mean_absolute_error(y_test, y_pred)
    test_scores.append(mse)

# Calculate average test score across all folds
average_test_score = np.mean(test_scores)

print("Test scores for each fold:", test_scores)
print("Average test score across all folds:", average_test_score)

Test scores for each fold: [0.4162707082087792, 0.3841616926505785]
Average test score across all folds: 0.40021620042967887


In [5]:
train_data.shape

(11113, 1025)

In [2]:
import os
import datetime

# Specify the directory to scan
directory_path = "/home/cs-yang3/rds/rds-t2-cs177-KNoCjrg2atA/cs-yang3/code/KidSat/modelling/satmae/features"


# Collect subdirectories and their creation times in a list
subfolders = []
with os.scandir(directory_path) as entries:
    for entry in entries:
        if entry.is_dir():
            creation_timestamp = os.path.getctime(entry.path)
            subfolders.append((entry.name, creation_timestamp))

# Sort the subfolders by creation timestamp (ascending order)
subfolders.sort(key=lambda x: x[1])

# Print out each subfolder's name and formatted creation date
for name, creation_timestamp in subfolders:
    creation_date = datetime.datetime.fromtimestamp(creation_timestamp)
    formatted_date = creation_date.strftime('%Y-%m-%d %H:%M:%S')
    print(f"Subfolder: {name}, Created on: {formatted_date}")

Subfolder: gc, Created on: 2024-11-11 09:47:05
Subfolder: e3d99309-6, Created on: 2025-01-06 11:14:47
Subfolder: 3af9468e-2, Created on: 2025-01-06 11:17:41
Subfolder: 165499c6-6, Created on: 2025-01-06 11:23:23
Subfolder: 47a635ea-5, Created on: 2025-01-06 11:30:00
Subfolder: ceb94742-a, Created on: 2025-01-06 11:30:39
Subfolder: 6515d153-0, Created on: 2025-01-06 11:32:05
Subfolder: f2378939-e, Created on: 2025-01-06 11:32:52
Subfolder: a61c2fc1-6, Created on: 2025-01-06 11:34:05
Subfolder: 3c769df6-a, Created on: 2025-01-06 11:38:21
Subfolder: dd6fbedc-d, Created on: 2025-01-06 12:48:13
Subfolder: 3cd529bf-3, Created on: 2025-01-06 14:33:55
Subfolder: a814a074-f, Created on: 2025-02-01 02:23:18
Subfolder: 0d29294f-6, Created on: 2025-02-01 02:33:07
Subfolder: 5c719ae5-1, Created on: 2025-02-01 02:36:19
Subfolder: 8dd9acb6-3, Created on: 2025-02-01 02:37:25
Subfolder: 29395816-b, Created on: 2025-02-02 08:09:26
Subfolder: 4b761580-4, Created on: 2025-02-02 08:18:50


In [3]:
alphas

array([1.00000000e-06, 2.15443469e-05, 4.64158883e-04, 1.00000000e-02,
       2.15443469e-01, 4.64158883e+00, 1.00000000e+02, 2.15443469e+03,
       4.64158883e+04, 1.00000000e+06])