In [None]:
import os
import argparse
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import (
    PolynomialFeatures,
    StandardScaler,
)
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.compose import TransformedTargetRegressor
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.base import BaseEstimator, TransformerMixin

from time_templates.utilities.plot import plot_hist, plot_profile_1d
from time_templates.datareader.get_data import fetch_MC_data_from_tree

from time_templates.templates.universality.S1000_model import (
    S1000_comp_model,
    set_Rmu_df,
)
from time_templates.templates.universality.names import DICT_COMP_SIGNALKEY
from time_templates.templates.universality.rho_model import XLABELS, RHOPIPEFILE
XLABELS

In [None]:
df = pd.read_pickle('df_rho.pl')

In [None]:
class CustomTransformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, X, y=None):
        X_ = X.copy()
        

In [None]:
Xlabels = XLABELS
ylabel = DICT_COMP_SIGNALKEY['muon']

X = df[Xlabels].values
y = df[ylabel].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
regr = Ridge(alpha=1)

pipe = Pipeline(
    [
        ("scale", StandardScaler()),
        ("poly", PolynomialFeatures(4, include_bias=False)),
        (
            "regr",
            TransformedTargetRegressor(regr, func=np.log1p, inverse_func=np.expm1),
        ),
    ]
)
param_grid = {
    "poly__degree": [3],
    "regr__regressor__alpha": [0.01],
}
grid = GridSearchCV(pipe, param_grid, n_jobs=3, cv=3, verbose=10)
grid.fit(X_train, y_train)
print("Best parameters via grid search", grid.best_params_)

