In [1]:
import os
import pandas as pd
import numpy as np
import mlflow
import mlflow.sklearn
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV, KFold
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
os.chdir('../')

In [3]:
selected_data = pd.read_csv('data/processed/data.csv')

In [4]:
X, y = selected_data.drop('Ilg', axis=1), selected_data['Ilg']

In [5]:
reg = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('estimator', SVR())
])

In [6]:
params = {
    'scaler': [StandardScaler(), MinMaxScaler()],
    'estimator__C': [0.1, 1.0, 10.0, 100.0],
    'estimator__epsilon': [0.1, 0.2, 0.5],
    'estimator__kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'estimator__degree': [2, 3, 4],
    'estimator__gamma': ['scale', 'auto'],
    'estimator__shrinking': [True, False],
    'estimator__tol': [1e-4, 1e-3, 1e-2]
}

In [7]:
with mlflow.start_run():
    
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    grid = GridSearchCV(estimator=reg, param_grid=params, scoring='neg_mean_squared_error', cv=kf)
    grid.fit(X, y)

    mlflow.log_param('model_type', 'SVR')
    for param, value in grid.best_params_.items():
        mlflow.log_param(param, value)
    
    mlflow.log_metric("best_score", np.sqrt(-grid.best_score_))
    mlflow.sklearn.log_model(grid.best_estimator_, 'SVR')

    print('Best score: ', np.sqrt(-grid.best_score_))
    print('Best params: ', grid.best_params_)



Best score:  938.1976115464191
Best params:  {'estimator__C': 100.0, 'estimator__degree': 2, 'estimator__epsilon': 0.1, 'estimator__gamma': 'scale', 'estimator__kernel': 'rbf', 'estimator__shrinking': True, 'estimator__tol': 0.0001, 'scaler': StandardScaler()}
