### tunning ridge hyperparametr

In [154]:
from sklearn.model_selection import GridSearchCV, cross_val_predict, RepeatedKFold
from sklearn.linear_model import Ridge
from sklearn.svm import SVR
from pandas import read_csv
import numpy as np

# load the dataset
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/housing.csv'
dataframe = read_csv(url, header=None)
data = dataframe.values
X, y = data[:, :-1], data[:, -1]
# define model
model = Ridge()
# define model evaluation method
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
# define grid
grid = dict()
grid['alpha'] = np.arange(0, 1, 0.01)
# define search
search = GridSearchCV(model, grid, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
# perform the search
results = search.fit(X, y)
# summarize
print('MAE: %.3f' % results.best_score_)
print('Config: %s' % results.best_params_)

MAE: -3.379
Config: {'alpha': 0.51}


From data we can see that this is a regression problem.

In [96]:
X.shape, y.shape

((506, 13), (506,))

### implemented GridSearchCV

In [147]:
# ___Import Everything you need.
# ___For Example:

from sklearn import metrics
import copy
import itertools as it
from sklearn.model_selection import KFold, RepeatedKFold
from sklearn.model_selection import train_test_split

class MyGridSearchCV:
    
    def __init__(self, estimator, param_grid: dict, scoring=None, refit=True, cv=None):
        """
            estimator : model
            param_grid : list of parameters to search in the form of dict
            scoring : loss function
            cv : cross validation type
        """
        
        self.estimator = estimator
        self.estimator_type = type(estimator)
        self.param_grid = param_grid
        self.scoring = scoring
        self.refit = refit
        if cv is None:
            self.cv = KFold()
        else:
            self.cv = cv
            
            # TODO
            
        self.best_estimator_ = None
        self.best_score_ = None
        self.best_params_ = None

    def fit(self, X, y):
        
        X, y = np.array(X), np.array(y)
        
        # TODO
#         print(self.param_grid)
        
        for args in it.product(*self.param_grid.values()):
            params = { k : v for k, v in zip(self.param_grid.keys(), args)}
            self.estimator = self.estimator_type(**params)
            
            for train_ind, test_ind in self.cv.split(X):
                X_train, X_test = X[train_ind], X[test_ind]
                y_train, y_test = y[train_ind], y[test_ind]
                
                self.estimator.fit(X_train, y_train)
                
                y_pred = self.estimator.predict(X_test)
                score = self.scoring(y_test, y_pred)
                
                if self.best_score_ == None or self.best_score_ > score:
                    self.best_score_ = score
                    self.best_estimator_ = copy.deepcopy(self.estimator)
                    self.best_params_ = params
        
        return self

### testing implemented GridSearch

In [158]:
model = Ridge()

cv = KFold(n_splits=10, random_state=1, shuffle=True)

# define grid
param_grid = dict()
param_grid['alpha'] = np.arange(0.01, 1, 0.01)
# param_grid['max_iter'] = np.arange(100, 1000, 100)
# define search
search = MyGridSearchCV(model, param_grid, scoring=metrics.mean_absolute_error, cv=cv)
# perform the search
results = search.fit(X, y)
# summarize
print('MAE: %.3f' % results.best_score_)
print('Config: %s' % results.best_params_)

MAE: 2.274
Config: {'alpha': 0.99}


In [162]:
model = SVR()

cv = KFold(n_splits=10, random_state=1, shuffle=True)

# define grid
param_grid = dict()
param_grid['C'] = np.arange(0.01, 2, 0.1)
# param_grid['max_iter'] = np.arange(100, 1000, 100)
# define search
search = MyGridSearchCV(model, param_grid, scoring=metrics.mean_absolute_error, cv=cv)
# perform the search
results = search.fit(X, y)
# summarize
print('MAE: %.3f' % results.best_score_)
print('Config: %s' % results.best_params_)

MAE: 4.306
Config: {'C': 1.9100000000000001}
