# Results

Generate and cache predictions for best models and collate test and validation metrics.

In [2]:
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from data import train, validation
from data import X, y, categorical, numerical
from utils import StandardizedGridSearchCV

# Gather best models

In [3]:
root = Path('models')

searches = dict()
for path in root.glob('*.p'):
    searches[path.stem] = StandardizedGridSearchCV.load(path)

searches = pd.Series(searches)



In [4]:
from sklearn.base import clone

candidates = []
for model in searches.index:
    search = searches[model]
    best_params = search.results.head(5)
    best_params = best_params.index.to_frame()
    best_params = best_params.to_dict(orient='records')
    for params in best_params:
        candidates.append({
            'model': model,
            'params': params,
            'estimator': clone(search.best_estimator_.set_params(**params))
        })
candidates = pd.DataFrame.from_records(candidates)
candidates.head()

Unnamed: 0,model,params,estimator
0,SVR_Radial,"{'C': 1300, 'epsilon': 3.0, 'gamma': 0.05}","SVR(C=1300, epsilon=3.0, gamma=0.05, max_iter=..."
1,SVR_Radial,"{'C': 1300, 'epsilon': 3.5, 'gamma': 0.05}","SVR(C=1300, epsilon=3.5, gamma=0.05, max_iter=..."
2,SVR_Radial,"{'C': 1350, 'epsilon': 2.5, 'gamma': 0.05}","SVR(C=1350, epsilon=2.5, gamma=0.05, max_iter=..."
3,SVR_Radial,"{'C': 1350, 'epsilon': 3.0, 'gamma': 0.05}","SVR(C=1350, epsilon=3.0, gamma=0.05, max_iter=..."
4,SVR_Radial,"{'C': 1350, 'epsilon': 3.5, 'gamma': 0.05}","SVR(C=1350, epsilon=3.5, gamma=0.05, max_iter=..."


# Cache predictions

In [5]:
# Fit to train
candidates['estimator'].apply(lambda x: x.fit(train[X], train[y]))

# Predict on different datasets
candidates['train_pred'] = candidates['estimator'].apply(lambda x: x.predict(train[X]))
candidates['val_pred'] = candidates['estimator'].apply(lambda x: x.predict(validation[X]))

# Save predictions
candidates['train_pred'].apply(pd.Series).T.to_csv('predictions/train.csv')
candidates['val_pred'].apply(pd.Series).T.to_csv('predictions/validation.csv')



# Generate Metrics

In [6]:
from sklearn.metrics import mean_squared_error

candidates['train_mse'] = candidates['train_pred'].apply(lambda x: mean_squared_error(train[y], x))
candidates['val_mse'] = candidates['val_pred'].apply(lambda x: mean_squared_error(validation[y], x))

candidates.sort_values('val_mse')[['model', 'params', 'train_mse', 'val_mse']]

Unnamed: 0,model,params,train_mse,val_mse
0,SVR_Radial,"{'C': 1300, 'epsilon': 3.0, 'gamma': 0.05}",836.609672,1265.690847
2,SVR_Radial,"{'C': 1350, 'epsilon': 2.5, 'gamma': 0.05}",821.433063,1269.399724
1,SVR_Radial,"{'C': 1300, 'epsilon': 3.5, 'gamma': 0.05}",836.279711,1271.187547
3,SVR_Radial,"{'C': 1350, 'epsilon': 3.0, 'gamma': 0.05}",821.499634,1274.060259
4,SVR_Radial,"{'C': 1350, 'epsilon': 3.5, 'gamma': 0.05}",822.112624,1278.397123
13,lasso,{'model_fitting__method__alpha': 0.51818181818...,2305.169896,1646.081846
12,lasso,{'model_fitting__method__alpha': 0.52727272727...,2306.044925,1646.350145
10,lasso,{'model_fitting__method__alpha': 0.53636363636...,2306.936969,1646.644823
11,lasso,{'model_fitting__method__alpha': 0.54545454545...,2307.846834,1646.974342
14,lasso,{'model_fitting__method__alpha': 0.55454545454...,2308.740174,1647.216059
