In [None]:
import csv
import time  # Just to compare fit times
from pathlib import Path
from pprint import pprint

In [None]:
import numpy as np
import ray
import xgboost as xgb
from matplotlib import pyplot as plt
from sklearn import metrics
from sklearn.model_selection import train_test_split
from tune_sklearn import TuneSearchCV
from xgboost import XGBRegressor

In [None]:
%load_ext memory_profiler

In [None]:
!~/.local/bin/ray start --head --port=6379 --num-cpus=20

In [None]:
#!~/.local/bin/ray stop

In [None]:
ray.init(address='auto', _redis_password='5241590000000000')

In [None]:
ray.shutdown()

In [None]:
# read the csv file created
train = np.genfromtxt(str(root)+'/input_dataset.csv', delimiter=',')
train = train.astype(np.float32)
train = train[~np.isnan(train).any(axis=1)]
train = train[~(train == -9999.0).any(axis=1)]

X, y = train[:, 1:], train[:, :1]
y = y.ravel()

# Set training and validation sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

In [None]:
# XGBoost
parameters = {
    "eval_metric": ['logloss', 'error', "auc"],
    "n_estimators": [2, 5, 10],
    "max_depth": [5, 10],
    "learning_rate": [1, 10]
}

xgb_tune_search = TuneSearchCV(
    XGBRegressor(objective='reg:squarederror', colsample_bytree=0.3, alpha=10),
    parameters,

    search_optimization="bayesian",
    max_iters=10,
    n_jobs=90,
    n_trials=100,
    early_stopping=True,

    verbose=1,
    return_train_score=True,
    loggers=["csv"],

)

start = time.time()
xgb_tune_search.fit(X_train, y_train)
end = time.time()
print("Tune Fit Time:", end - start)
y_pred = xgb_tune_search.predict(X_test)


In [None]:
xgb_tune_search.best_estimator_

In [None]:
def regression_results(y_true, y_pred):

    # Regression metrics
    explained_variance = metrics.explained_variance_score(y_true, y_pred)
    mean_absolute_error = metrics.mean_absolute_error(y_true, y_pred)
    mse = metrics.mean_squared_error(y_true, y_pred)
    metrics.median_absolute_error(y_true, y_pred)
    r2 = metrics.r2_score(y_true, y_pred)

    return {'r2': round(r2, 4), 'MAE': round(mean_absolute_error, 4), 'MSE': round(mse, 4), 'RMSE': round(np.sqrt(mse), 4), "explained_variance": round(explained_variance, 4)}

In [None]:
regression_results(y_test, y_pred)