# Code for running test

In [1]:
from load_datasets import get_aeon_dataset
from pathlib import Path
import os
import torch
import numpy as np

from tsml_eval.experiments import experiments, get_regressor_by_name, run_regression_experiment
from tsml_eval.evaluation.storage import load_regressor_results




def test_regressor(
        regressor, #= TSMLWrapperHydraBoost(),
        regressor_name = "HydraBoost",
    ):
    #get HouseholdPowerConsumption1 dataset
    current_dir = Path(os.path.dirname(os.getcwd()))
    TSER_data_dir = current_dir.parent / "Data" / "TSER"
    dataset_name = "HouseholdPowerConsumption1"
    X_train, y_train, X_test, y_test = get_aeon_dataset(dataset_name, TSER_data_dir, "regression")

    #run regression experiment
    run_regression_experiment(
        X_train,
        y_train,
        X_test,
        y_test,
        regressor,
        regressor_name=regressor_name,
        results_path="results/",
        dataset_name=dataset_name,
        resample_id=0,
    )
    rr = load_regressor_results(
        current_dir / "exploring-hydra-boosting" /"results" / regressor_name / "Predictions" / dataset_name / "testResample0.csv"
    )
    print(rr.predictions)
    print(rr.mean_squared_error, "mse")
    print(rr.root_mean_squared_error, "rmse")
    print(rr.mean_absolute_percentage_error, "mape")
    print(rr.r2_score, "r2")
    print(rr.fit_time, "fit time")




# Simple Wrapper no gridsearch

In [2]:
import torch
import pandas as pd
import numpy as np

from sklearn.base import ClassifierMixin, RegressorMixin
from tsml.base import BaseTimeSeriesEstimator

from models.random_feature_representation_boosting import HydraBoost


class TSMLWrapperHydraBoost(RegressorMixin, BaseTimeSeriesEstimator):
    
    def __init__(self, **kwargs):
        super(TSMLWrapperHydraBoost, self).__init__()
        self.hydraboost = HydraBoost(
            n_layers=1,
            init_n_kernels=8,
            init_n_groups=64,
            n_kernels=8,
            n_groups=64,
            max_num_channels=3,
            hydra_batch_size=10000,
            l2_reg=10,
            l2_ghat=0.1,
            boost_lr=1,
            train_top_at = [0, 5, 10],
            **kwargs
        )
        

    def fit(self, X: np.ndarray, y: np.ndarray) -> object:
        """Fit the estimator to training data.

        Parameters
        ----------
        X : 3D np.ndarray of shape (n_instances, n_channels, n_timepoints)
            The training data.
        y : 1D np.ndarray of shape (n_instances)
            The target labels for fitting, indices correspond to instance indices in X

        Returns
        -------
        self :
            Reference to self.
        """
        X = torch.from_numpy(X).float()
        y = torch.from_numpy(y).float()
        y = y.unsqueeze(1)
        self.X_mean = X.mean()
        self.X_std = X.std()
        self.y_mean = y.mean()
        self.y_std = y.std()
        X = (X - self.X_mean) / self.X_std
        y = (y - self.y_mean) / self.y_std
        self.hydraboost.fit(X, y)
        return self


    def predict(self, X: np.ndarray) -> np.ndarray:
        """Predicts labels for sequences in X.

        Parameters
        ----------
        X : 3D np.ndarray of shape (n_instances, n_channels, n_timepoints)
            The training data.

        Returns
        -------
        y : array-like of shape (n_instances)
            Predicted target labels.
        """
        X = torch.from_numpy(X).float()
        X = (X - self.X_mean) / self.X_std
        pred = self.hydraboost(X)
        pred = pred * self.y_std + self.y_mean
        return pred.squeeze().detach().numpy()
        
        

    def _more_tags(self) -> dict:
        return {
            "X_types": ["3darray"],
            "equal_length_only": True,
            "allow_nan": False,
        }


In [3]:
# test_regressor(
#     regressor = TSMLWrapperHydraBoost(),
#     regressor_name = "HydraBoost",
# )

# Gridsearch Wrapper

In [4]:
from sklearn.model_selection import GridSearchCV
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import KFold, ShuffleSplit
from sklearn.metrics import roc_auc_score
from typing import Tuple, List, Union, Any, Optional, Dict, Literal, Callable

import numpy as np
import torch
import torch.nn as nn


class SKLearnWrapper(BaseEstimator, RegressorMixin):
    def __init__(self, modelClass=None, **model_params,):
        self.modelClass = modelClass
        self.model_params = model_params
        self.seed = None
        self.model = None
        
        
    def set_params(self, **params):
        self.modelClass = params.pop('modelClass', self.modelClass)
        self.seed = params.pop('seed', self.seed)
        self.model_params.update(params)
        return self


    def get_params(self, deep=True):
        params = {'modelClass': self.modelClass}
        params.update(self.model_params)
        return params
    
    
    def fit(self, X, y):
        if self.seed is not None:
            np.random.seed(self.seed)
            torch.manual_seed(self.seed)
            torch.cuda.manual_seed(self.seed)
        self.model = self.modelClass(**self.model_params)
        self.model.fit(X, y)
        # #classes, either label for binary or one-hot for multiclass
        # if len(y.size()) == 1 or y.size(1) == 1:
        #     self.classes_ = np.unique(y.detach().cpu().numpy())
        # else:
        #     self.classes_ = np.unique(y.argmax(axis=1).detach().cpu().numpy())
        return self


    def predict(self, X):
        return self.model(X).squeeze()#.detach().cpu().squeeze().numpy()
        # #binary classification
        # if len(self.classes_) == 2:
        #     proba_1 = torch.sigmoid(self.model(X))
        #     return (proba_1 > 0.5).detach().cpu().numpy()
        # else:
        #     #multiclass
        #     return torch.argmax(self.model(X), dim=1).detach().cpu().numpy()
    
    # def predict_proba(self, X):
    #     #binary classification
    #     if len(self.classes_) == 2:
    #         proba_1 = torch.nn.functional.sigmoid(self.model(X))
    #         return torch.cat((1 - proba_1, proba_1), dim=1).detach().cpu().numpy()
    #     else:
    #         #multiclass
    #         logits = self.model(X)
    #         proba = torch.nn.functional.softmax(logits, dim=1)
    #         return proba.detach().cpu().numpy()
    
    # def decision_function(self, X):
    #     logits = self.model(X)
    #     return logits.detach().cpu().numpy()


    
    # def score(self, X, y):
    #     logits = self.model(X)
    #     if y.size(1) == 1:
    #         y_true = y.detach().cpu().numpy()
    #         y_score = logits.detach().cpu().numpy()
    #         auc = roc_auc_score(y_true, y_score)
    #         return auc
    #     else:
    #         pred = torch.argmax(logits, dim=1)
    #         y = torch.argmax(y, dim=1)
    #         acc = (pred == y).float().mean()
    #         return acc.detach().cpu().item()
    
    
    
class TSMLGridSearchWrapper(RegressorMixin, BaseTimeSeriesEstimator):
    
    def __init__(self,
                 holdour_or_kfold: Literal["holdout", "kfold"] = "kfold",
                 kfolds: Optional[int] = 5,
                 holdout_percentage: Optional[float] = 0.2,
                 seed: Optional[int] = None,
                 modelClass=None, 
                 model_param_grid: Dict[str, List[Any]] = {}
        ):
        self.holdour_or_kfold = holdour_or_kfold
        self.kfolds = kfolds
        self.holdout_percentage = holdout_percentage
        self.seed = seed
        self.modelClass = modelClass
        self.model_param_grid = model_param_grid
        super(TSMLGridSearchWrapper, self).__init__()
        

    def fit(self, X: np.ndarray, y: np.ndarray) -> object:
        """Fit the estimator to training data, with gridsearch hyperparameter optimization
        on holdout or kfold cross-validation.

        Parameters
        ----------
        X : 3D np.ndarray of shape (n_instances, n_channels, n_timepoints)
            The training data.
        y : 1D np.ndarray of shape (n_instances)
            The target labels for fitting, indices correspond to instance indices in X

        Returns
        -------
        self :
            Reference to self.
        """
        # TODO regression only
        X = torch.from_numpy(X).float()
        y = torch.from_numpy(y).float()
        y = y.unsqueeze(1)
        self.X_mean = X.mean()
        self.X_std = X.std()
        self.y_mean = y.mean()
        self.y_std = y.std()
        X = (X - self.X_mean) / self.X_std
        y = (y - self.y_mean) / self.y_std
        
        # Configure cross validation
        if self.holdour_or_kfold == "kfold":
            cv = KFold(n_splits=self.kfolds, shuffle=True, random_state=self.seed)
        else:  # holdout
            cv = ShuffleSplit(n_splits=1, test_size=self.holdout_percentage, random_state=self.seed)
                
        # Perform grid search
        grid_search = GridSearchCV(
            estimator=SKLearnWrapper(modelClass=self.modelClass),
            param_grid={**self.model_param_grid, "seed": [self.seed]},
            cv=cv,
            scoring="neg_mean_squared_error", # TODO regression only???
        )
        grid_search.fit(X, y)

        # Store best model
        self.best_model = grid_search.best_estimator_
        self.best_params = grid_search.best_params_
        print("self.best_params", self.best_params)
        return self
        
        
    def predict(self, X: np.ndarray) -> np.ndarray:
        """Predicts labels for sequences in X.

        Parameters
        ----------
        X : 3D np.ndarray of shape (n_instances, n_channels, n_timepoints)
            The training data.

        Returns
        -------
        y : array-like of shape (n_instances)
            Predicted target labels.
        """
        X = torch.from_numpy(X).float()
        X = (X - self.X_mean) / self.X_std
        pred = self.best_model.predict(X) #TODO regression only?
        pred = pred * self.y_std + self.y_mean
        return pred.squeeze().detach().cpu().numpy()
        

    def _more_tags(self) -> dict:
        return {
            "X_types": ["3darray"],
            "equal_length_only": True,
            "allow_nan": False,
        }
        
        
    def get_params(self):
        """Use for saving model configuration in tsml"""
        if hasattr(self, 'best_params'):
            return {
            "seed": self.seed,
            **self.best_params
            }
        else:
            return {}

In [None]:
test_regressor(
    TSMLGridSearchWrapper(
        "holdout",
        seed=0,
        modelClass=HydraBoost,
        model_param_grid={
            "n_layers": [1],              # [0,1,3,6,10] ?
            "init_n_kernels": [8],
            "init_n_groups": [64],
            "n_kernels": [8],
            "n_groups": [64],
            "max_num_channels": [3],
            "hydra_batch_size": [10000],
            "l2_reg": [10],                # [0.01, 0.1, 1, 10] ?
            "l2_ghat": [0.1],          # [0.01, 0.1, 1, 10] ?
            "boost_lr": [1],
            "train_top_at": [[0,1,  5, 10]],
        },
    ),
regressor_name = "HydraBoostGridSearch",
) #138.244700694982 rmse   ghat 0.01
# 138.11954862031027 rmse  ghat 0.1
# 132.26738702131547 rmse  ghat 0.1  l2reg 10
# 132.6139792366581 rmse   ghat 0.1  l2reg 10  n_layers 2
# 141.6371024323461 rmse   ghat 0.1  l2reg 100

#TODO MODIFY ALL VIRTUALENV TO PRINT BEST PARAMS (variable "second" in experiments.run_regression_experiment)

#also TODO, rerun this and see that it works with second and all....



# 132.6139792366581 rmse    WITH RETRAIN TOP AT 1
# 0.11765984882729758 mape
# 0.9340512642088609 r2
# 73529.0 fit time

X tensor([[9.3420e-01, 1.4848e+00, 6.0643e-03,  ..., 1.1000e+01, 0.0000e+00,
         9.0000e+00],
        [1.7452e+00, 2.7932e+00, 4.5203e-01,  ..., 1.1300e+02, 1.6500e+02,
         1.8000e+02],
        [1.5180e+00, 2.5001e+00, 3.9788e-01,  ..., 1.2900e+02, 1.6900e+02,
         1.5300e+02],
        ...,
        [1.5690e+00, 2.7859e+00, 4.0276e-01,  ..., 1.1900e+02, 1.9600e+02,
         1.8500e+02],
        [1.4198e+00, 2.2397e+00, 1.9726e-01,  ..., 1.1800e+02, 2.0000e+02,
         1.7900e+02],
        [2.4941e+00, 3.6901e+00, 6.8267e-01,  ..., 1.1200e+02, 1.8400e+02,
         1.9200e+02]])
W Parameter containing:
tensor([[ 3.8904e-04,  2.1385e-05, -5.6805e-04,  ..., -9.5189e-05,
          6.2680e-04, -9.2276e-04]], requires_grad=True)
b tensor([[0.0030]])
self.X_mean tensor([[ 4.2483e-07, -2.5642e-07, -5.5004e-08,  ..., -1.7441e-07,
          3.2629e-02, -8.0486e-07]])
self.y_mean tensor([[-0.0030]])
self.y_std tensor([[1.0011]])
training W0
Phi0 shape torch.Size([596, 8192])
X tensor



# TSMLOptunaWrapper