In [None]:
from exp.features import create_train_features
from exp.run import run_experiment
from exp.mappings import alg_map
from exp.train import train_model
import pandas as pd
import json

### Create Training Features

In [None]:
X_tr, y_tr = create_train_features(r'C:\Users\arvin\dev\lanl\train.csv')

### Example hyper-parameter experiments for different algorithms

In [None]:
params={"lr": {"fit_intercept": [False, True], "normalize": [False, True]},
       "ridge": {"alpha": [.000001, .00001, .0001, .001, .01, .1],
                 "fit_intercept": [False, True], "normalize": [False, True]},
       "lasso": {"alpha": [.000001, .00001, .0001, .001, .01, .1],
                 "fit_intercept": [False, True], "normalize": [False, True]},
       "elastic": {"alpha": [.000001, .00001, .0001, .001, .01, .1],
                   "fit_intercept": [False, True], "normalize": [False, True],
                  "l1_ratio": [.01, .99, .2, .4, .6, .8]},
       "dtreg": {"criterion": ["mse", "friedman_mse", "mae"],
                 "splitter": ["best", "best", "random"],
                "max_depth": [None, None, None, 5,10,20,50]},
       "rfreg": {"n_estimators": [5, 10, 50, 100, 200, 100],
                 "criterion": ["mse", "friedman_mse", "mae"],
                 "splitter": ["best", "best", "random"],
                "max_depth": [None, None, None, 5,10,20,50]},
       "abreg": {"n_estimators": [5, 10, 50, 100, 200, 100],
                 "learning_rate ": [1, .9, .5, .1],
                 "loss": ["linear", "square", "exponential"]},
       "gbreg": {"n_estimators": [5, 10, 50, 100, 200, 100],
                 "learning_rate ": [1, .9, .5, .1],
                 "loss": ["ls", "lad", "huber", "quantile"],
                 "subsample": [.1, .2, .5, 1.0]}}

### Run Experiment (for Ridge algorithm)

In [None]:
alg = "lr"
n_fold=10
save_results= "results.csv"
score_df = run_experiment(X=X_tr, Y=y_tr, n_fold=n_fold, alg=alg, alg_params=params[alg], search_type="random", num_searches=2, save_results=save_results)

In [None]:
display(score_df)

### Run Experiment (for Elastic algorithm) and append to CSV results

In [None]:
alg = "elastic"
score_df = run_experiment(score_df=save_results, X=X_tr, Y=y_tr, n_fold=n_fold, alg=alg, alg_params=params[alg], search_type="random", num_searches=2, save_results=save_results)

In [None]:
display(score_df)

### Display models ranked by CV scores

In [None]:
score_df = score_df.sort_values(by="score", axis=0)
display(score_df)

### Load results from CSV File and re-produce models ranked by CV scores

In [None]:
score_df = pd.read_csv(save_results)
score_df = score_df.sort_values(by="score", axis=0)
display(score_df)

### Load best model from CSV File

In [None]:
# retrieve top scoring row
best = score_df.iloc[0]
display(best)

# retrieve model parameters from pandas row
alg = best["alg"]
params_json = best["params_json"]
print("alg: {}".format(alg))
print("params_json: {}".format(params_json))

# retrieve relevant values
alg_cls = alg_map[alg]
params = json.loads(params_json)

# initialize model
model = alg_cls(**params)

# train algorithm
train_model(X=X_tr, Y=y_tr, n_fold=n_fold, model=model)