# 1. Training the Model 

In [10]:
import time
import pandas as pd
import scipy as sci
from typing import Tuple, Dict, Any
from sklearn import svm
from sklearn.preprocessing._data import StandardScaler

def train_svr_regressor(
        X_train: pd.DataFrame,
        tp_x_train: pd.DataFrame,
        tp_y_train: pd.DataFrame,
        kernel: str,
        gamma: str,
    ) -> Tuple[svm._classes.SVR,Dict]:

    # Initializing hyperparameters for tp_x
    tp_x_response_scale = sci.stats.iqr(tp_x_train)
    tp_x_box_constraint = tp_x_response_scale/1.349
    tp_x_epsilon = 2*tp_x_response_scale/13.49
    
    # Initializing hyperparameters for tp_y
    tp_y_response_scale = sci.stats.iqr(tp_y_train)
    tp_y_box_constraint = tp_y_response_scale/1.349
    tp_y_epsilon = 2*tp_y_response_scale/13.49

    # Initializing the model for tp_x
    tp_x_model = svm.SVR(
        kernel = kernel,
        C = tp_x_box_constraint,
        gamma = gamma,
        epsilon = tp_x_epsilon
    )

    # Initializing the model for tp_y
    tp_y_model = svm.SVR(
        kernel = kernel,
        C = tp_y_box_constraint,
        gamma = gamma,
        epsilon = tp_y_epsilon
    )

    # Training the tp_x model
    time_start = time.time()
    tp_x_model.fit(X_train.drop('partition_key', axis=1), tp_x_train)
    time_end = time.time()

    # Elapsed time in seconds for tp_x
    tp_x_elapsed_time = time_end - time_start

    # Training the tp_y model
    time_start = time.time()
    tp_y_model.fit(X_train.drop('partition_key', axis=1), tp_y_train)
    time_end = time.time()

    # Elapsed time in seconds for tp_x
    tp_y_elapsed_time = time_end - time_start

    metrics = {
        "tp_x_training_elapsed_time": {
            "value": float(tp_x_elapsed_time),
             "step": 1
         },
        "tp_y_training_elapsed_time": {
            "value": float(tp_y_elapsed_time),
            "step": 1
        },
    }

    return tp_x_model, tp_y_model, metrics

In [6]:
X_train = catalog.load("regressor_x_train")
tp_x_train = catalog.load("regressor_tp_x_train")
tp_y_train = catalog.load("regressor_tp_y_train")
svr_kernel = catalog.load("params:regressor.svr.hyperp.kernel")
svr_gamma = catalog.load("params:regressor.svr.hyperp.gamma")

2021-02-10 20:28:00,580 - kedro.io.data_catalog - INFO - Loading data from `regressor_x_train` (CSVDataSet)...
2021-02-10 20:28:00,589 - kedro.io.data_catalog - INFO - Loading data from `regressor_tp_x_train` (CSVDataSet)...
2021-02-10 20:28:00,595 - kedro.io.data_catalog - INFO - Loading data from `regressor_tp_y_train` (CSVDataSet)...
2021-02-10 20:28:00,602 - kedro.io.data_catalog - INFO - Loading data from `params:regressor.svr.hyperp.kernel` (MemoryDataSet)...
2021-02-10 20:28:00,604 - kedro.io.data_catalog - INFO - Loading data from `params:regressor.svr.hyperp.gamma` (MemoryDataSet)...


In [13]:
tp_x_trained_model, tp_y_trained_model, metrics = train_svr_regressor(
    X_train = X_train,
    tp_x_train = tp_x_train,
    tp_y_train = tp_y_train,
    kernel = svr_kernel,
    gamma = svr_gamma,
)

  and should_run_async(code)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


# 2. Evaluate the model

In [19]:
from sklearn.metrics import mean_squared_error

def evaluate_svr_regressor(
        tp_x_model: svm._classes.SVR,
        tp_y_model: svm._classes.SVR,
        X_scaler: StandardScaler,
        tp_x_scaler: StandardScaler,
        tp_y_scaler: StandardScaler,
        X_test: pd.DataFrame,
        tp_x_test: pd.DataFrame,
        tp_y_test: pd.DataFrame,
    ) -> Dict[str, Dict[str, Any]]:

    # Removing partition key from the data frame
    X_test.drop('partition_key', axis=1, inplace=True)

    # Generate predictions for tp_x
    tp_x_pred = tp_x_model.predict(X_test)
    tp_x_scaled_rmse = mean_squared_error(
        y_true = tp_x_test.values.reshape(tp_x_test.values.shape[0],),
        y_pred = tp_x_pred
    )
    
    # Generate predictions for tp_y
    tp_y_pred = tp_y_model.predict(X_test)
    tp_y_scaled_rmse = mean_squared_error(
        y_true = tp_y_test.values.reshape(tp_y_test.values.shape[0],),
        y_pred = tp_y_pred
    )

    # Inverse transform the data with the scaler
    inversed_X_test_data = X_scaler.inverse_transform(X_test)
    inversed_tp_x_test = tp_x_scaler.inverse_transform(tp_x_test)
    inversed_tp_y_test = tp_y_scaler.inverse_transform(tp_y_test)
    inversed_tp_x_pred = tp_x_scaler.inverse_transform(tp_x_pred)
    inversed_tp_y_pred = tp_y_scaler.inverse_transform(tp_y_pred)

    tp_x_unscaled_rmse = mean_squared_error(
        y_true = inversed_tp_x_test,
        y_pred = inversed_tp_x_pred
    )
    
    tp_y_unscaled_rmse = mean_squared_error(
        y_true = inversed_tp_y_test,
        y_pred = inversed_tp_y_pred
    )

    
    return {
        "tp_x_scaled_rmse": {"value": float(tp_x_scaled_rmse), "step": 1},
        "tp_x_unscaled_rmse": {"value": float(tp_x_unscaled_rmse), "step": 1},
        "tp_y_scaled_rmse": {"value": float(tp_y_scaled_rmse), "step": 1},
        "tp_y_unscaled_rmse": {"value": float(tp_y_unscaled_rmse), "step": 1},
    }

  and should_run_async(code)


In [20]:
metrics = evaluate_svr_regressor(
    tp_x_model = tp_x_trained_model, 
    tp_y_model = tp_y_trained_model,
    X_scaler = catalog.load("regressor_x_scaler"),
    tp_x_scaler = catalog.load("regressor_tp_x_scaler"),
    tp_y_scaler = catalog.load("regressor_tp_y_scaler"),
    X_test = catalog.load("regressor_x_test"),
    tp_x_test = catalog.load("regressor_tp_x_test"),
    tp_y_test = catalog.load("regressor_tp_y_test"),
)
metrics

2021-02-10 20:40:31,180 - kedro.io.data_catalog - INFO - Loading data from `regressor_x_scaler` (MlflowMlflowModelSaverDataSet)...
2021-02-10 20:40:31,187 - kedro.io.data_catalog - INFO - Loading data from `regressor_tp_x_scaler` (MlflowMlflowModelSaverDataSet)...
2021-02-10 20:40:31,196 - kedro.io.data_catalog - INFO - Loading data from `regressor_tp_y_scaler` (MlflowMlflowModelSaverDataSet)...
2021-02-10 20:40:31,209 - kedro.io.data_catalog - INFO - Loading data from `regressor_x_test` (CSVDataSet)...
2021-02-10 20:40:31,224 - kedro.io.data_catalog - INFO - Loading data from `regressor_tp_x_test` (CSVDataSet)...
2021-02-10 20:40:31,234 - kedro.io.data_catalog - INFO - Loading data from `regressor_tp_y_test` (CSVDataSet)...


{'tp_x_scaled_rmse': {'value': 0.9225904691474833, 'step': 1},
 'tp_x_unscaled_rmse': {'value': 31.956953281033293, 'step': 1},
 'tp_y_scaled_rmse': {'value': 1.1870944350459145, 'step': 1},
 'tp_y_unscaled_rmse': {'value': 30.430529492396364, 'step': 1}}