In [1]:
from typing import Tuple, List, Union, Any, Optional, Dict, Literal, Callable
import os
import sys
sys.path.append(os.path.dirname(os.getcwd()))
sys.path.append(os.path.dirname(os.path.dirname(os.getcwd())))

import numpy as np
import jax
import aeon
import torch
import torch.nn as nn
import torch.functional as F

from utils.utils import print_name, print_shape
from rocket import Rocket, RocketFeatures
from ridge_loocv import fit_ridge_LOOCV

jax.config.update('jax_platform_name', 'cpu') # Used to set the platform (cpu, gpu, etc.)
np.set_printoptions(precision=3, threshold=5) # Print options

# Create my own here

In [3]:
import plotly.graph_objects as go
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import RidgeCV

# Generate noisy data with 2 features
X, y = make_regression(n_samples=500, n_features=200, noise=30)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a 3D scatter plot for the train set
fig_train = go.Figure(data=go.Scatter3d(
    x=X_train[:, 0],
    y=X_train[:, 1],
    z=y_train,
    mode='markers',
    marker=dict(
        size=5,
        color=y_train,
        colorscale='Viridis',
        opacity=0.8
    )
))

# Set axis labels and title for the train set plot
fig_train.update_layout(
    scene=dict(
        xaxis_title='Feature 1',
        yaxis_title='Feature 2',
        zaxis_title='y',
    ),
    title='Train Data'
)

# Create a 3D scatter plot for the test set
fig_test = go.Figure(data=go.Scatter3d(
    x=X_test[:, 0],
    y=X_test[:, 1],
    z=y_test,
    mode='markers',
    marker=dict(
        size=5,
        color=y_test,
        colorscale='Viridis',
        opacity=0.8
    )
))

# Set axis labels and title for the test set plot
fig_test.update_layout(
    scene=dict(
        xaxis_title='Feature 1',
        yaxis_title='Feature 2',
        zaxis_title='y',
    ),
    title='Test Data'
)

# Show the train and test plots
fig_train.show()
fig_test.show()


In [4]:
# Generate noisy data with 2 features
X, y = make_regression(n_samples=4000, n_features=200, noise=2)
n_tile=1
X = np.tile(X, (1, n_tile))

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
# Try out ridgeCV
from sklearn.linear_model import RidgeCV
alphas = np.logspace(-5, 3, 10)

model = RidgeCV(alphas = alphas)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
error = np.mean((y_pred - y_test) ** 2)
print(model.alpha_, "model.alpha_")
print(model.coef_, "model.coef_")
print(model.intercept_, "model.intercept_")
print(y_pred, "y_pred")
print(error, "error")

0.03593813663804626 model.alpha_
[-0.005 -0.041  0.04  ... -0.051  0.023 -0.043] model.coef_
-0.00041279637934277247 model.intercept_
[ -78.471   98.531 -255.697 ... -111.061 -288.202   14.412] y_pred
4.2752924849954335 error


In [11]:
device = "cpu"
X  = torch.tensor(X_train).to(device)
y = torch.tensor(y_train).to(device)
t_X_test = torch.tensor(X_test).to(device)
t_y_test = torch.tensor(y_test).to(device)

beta, intercept, alpha = fit_ridge_LOOCV(X, y, alphas=alphas)
y_pred = t_X_test @ beta + intercept
error = ((t_y_test - y_pred) ** 2).mean().item()
print(alpha, "alpha")
print(beta, "beta")
print(intercept, "intercept")
print(y_pred, "y_pred")
print(error, "error")

0.03593813663804626 alpha
tensor([-4.7743e-03, -4.0564e-02,  4.0495e-02,  7.9469e-02, -4.6906e-02,
         4.1752e-02,  1.9720e-02, -3.7174e-02,  7.5400e-02,  1.3501e-02,
        -5.8184e-02,  4.8154e-02,  1.6854e-02, -6.8361e-03,  3.4172e-02,
        -7.5490e-03, -4.6435e-02, -1.1788e-02, -2.6351e-02, -4.0333e-02,
         3.1305e-02, -2.3018e-02,  6.4497e-02, -1.2011e-02, -2.4603e-02,
        -1.9675e-02,  3.6954e-02, -6.1324e-03, -5.4983e-03, -1.6872e-02,
        -1.7655e-02, -4.7965e-03,  6.5902e-03, -2.7913e-02, -6.7609e-04,
         6.1246e-02,  2.4102e-02, -8.9842e-02,  5.7571e-03,  5.9070e-02,
         8.2279e+01, -1.6058e-02, -2.7447e-02, -8.2188e-02, -6.8864e-02,
        -2.2585e-02,  4.6233e-02,  3.2761e-02, -2.6924e-02,  4.9093e-03,
        -6.8736e-03,  4.1350e-04,  2.1410e-02,  3.6814e-02, -1.3523e-02,
         2.6033e-03, -1.9418e-02, -6.4456e-03,  1.3410e-02, -2.6276e-02,
         4.1443e-02, -9.0871e-03,  5.2930e-03,  2.9523e-02, -6.3755e-03,
         6.4673e-02,  7.0

# Test on "Face Detection"

In [12]:
# Print the different datasets
from aeon.datasets.tsc_datasets import multivariate, univariate, univariate_equal_length
from aeon.datasets import load_classification

def get_aeon_dataset(
        dataset_name:str,
        ):
    """Loads a dataset from the UCR/UEA archive using 
    the aeon library.

    Args:
        dataset_name (str): Name of the dataset

    Returns:
        Tuple: 4-tuple of the form (X_train, y_train, X_test, y_test)
    """
    X_train, y_train = load_classification(dataset_name, split="train")
    X_test, y_test = load_classification(dataset_name, split="test")

    return X_train.transpose(0,2,1), y_train, X_test.transpose(0,2,1), y_test

In [14]:
X_train, y_train, X_test, y_test = get_aeon_dataset("FaceDetection") #website down? cant find it online either

HTTPError: HTTP Error 404: 