In [None]:
from exp.features import create_train_features, create_test_features
from exp.run import run_experiment
from exp.mappings import alg_map
from exp.train import train_model
import pandas as pd
import json
import numpy as np
import os
import pickle

### Create Training Features

In [None]:
X_save = "X_tr.csv"
y_save = "y_tr.csv"
X_save_scaled = "X_tr_scaled.csv"
scale_params_pickle = "scale_params.pickle"
other_params_json = "other.json"
tr_scaler = None
classic_sta_lta5_mean_fill = None
classic_sta_lta7_mean_fill = None

if not (os.path.exists(X_save_scaled) and os.path.exists(y_save)):
    if os.path.exists(X_save) and os.path.exists(y_save):
        X_tr = pd.read_csv(X_save, index_col=0)
        y_tr = pd.read_csv(y_save, index_col=0)

        scale_params_pickle_on = open(scale_params_pickle, "rb")
        tr_scaler = pickle.load(scale_params_pickle_on)
        scale_params_pickle_on.close()
        
        X_train_scaled = pd.DataFrame(tr_scaler.transform(X_tr), columns=X_tr.columns)
        X_train_scaled.to_csv(X_save_scaled)
    else:
        X_tr, X_train_scaled, y_tr, tr_scaler, classic_sta_lta5_mean_fill, classic_sta_lta7_mean_fill  = create_train_features(r'C:\Users\arvin\dev\lanl\train.csv')
        X_tr.to_csv(X_save)
        y_tr.to_csv(y_save)
        X_train_scaled.to_csv(X_save_scaled)

        scale_params_pickle_on = open(scale_params_pickle, "wb")
        pickle.dump(tr_scaler, scale_params_pickle_on)
        scale_params_pickle_on.close()

        with open(other_params_json, 'w') as fp:
            json.dump({"classic_sta_lta5_mean_fill": classic_sta_lta5_mean_fill,
                       "classic_sta_lta7_mean_fill": classic_sta_lta7_mean_fill}, fp)
else:
    X_train_scaled = pd.read_csv(X_save_scaled, index_col=0)
    y_tr = pd.read_csv(y_save, index_col=0)

### Create Test Features

In [None]:
X_test_save = "X_test.csv"
X_test_save_scaled = "X_test_scaled.csv"
if not os.path.exists(X_test_save_scaled):
    scale_params_pickle_on = open(scale_params_pickle, "rb")
    scaler = pickle.load(scale_params_pickle_on)
    scale_params_pickle_on.close()
    with open(other_params_json) as fp:  
        other_params = json.load(fp)
    X_test, X_test_scaled = create_test_features(scaler=scaler, **other_params)
    X_test.to_csv(X_test_save)
    X_test_scaled.to_csv(X_test_save_scaled)
else:
    X_test_scaled = pd.read_csv(X_test_save_scaled, index_col=0)

### Load results from CSV File and re-produce models ranked by CV scores

In [None]:
save_results= "exp2.csv"
score_df = pd.read_csv(save_results)
score_df = score_df.sort_values(by="score", axis=0)

In [None]:
display(score_df)

### Load best model from CSV File

In [None]:
# retrieve top scoring row
best = score_df.iloc[1]
display(best)

# retrieve model parameters from pandas row
alg = best["alg"]
params_json = best["params_json"]
print("alg: {}".format(alg))
print("params_json: {}".format(params_json))

# retrieve relevant values
alg_cls = alg_map[alg]
params = json.loads(params_json)

# initialize model
model = alg_cls(**params)

# train algorithm
#n_fold=10
#train_model(X=X_tr, Y=y_tr, n_fold=n_fold, model=model)
model, preds = train_model(X=X_train_scaled, Y=y_tr, X_test=X_test_scaled, n_fold=None, model=model)

In [None]:
submission = pd.read_csv(r'C:\Users\arvin\dev\lanl\sample_submission.csv', index_col='seg_id')
submission['time_to_failure'] = preds
print(submission.head())
submission.to_csv('submission.csv')