---
## Evaluating Model Performance
We'll investigate several different algorithms and determine which is best at modeling the data.

### Implementation - Creating a Training and Predicting Pipeline

To properly evaluate the performance of various models, we need to create a training and predicting pipeline that allows for quick and effective model training using various sizes of training data and perform predictions on the testing data.

In [None]:
# def evaluate(results, mae):

def evaluate(results):
    """
    Visualization code to display results of various learners.
    
    inputs:
      - results: a list of supervised learners
      - stats: a list of dictionaries of the statistic results from 'train_predict()'
      - mae: Root-Mean-Squared-Error (RMSE) between the logarithm of the predicted value and the logarithm of the observed sales price
    """
  
    # Create figure
    fig, ax = plt.subplots(2, 2, figsize = (7,9))

    # Constants
    bar_width = 0.3
    colors = ['#A00000','#00A0A0','#00A000']
    
    # Super loop to plot four panels of data
    for k, learner in enumerate(results.keys()):
        for j, metric in enumerate(['train_time', 'mae_train', 'pred_time', 'mae_test']):
            for i in np.arange(3):
                
                # Creative plot code
                ax[j//2, j%2].bar(i+k*bar_width, results[learner][i][metric], width = bar_width, color = colors[k])
                ax[j//2, j%2].set_xticks([0.45, 1.45, 2.45])
                ax[j//2, j%2].set_xticklabels(["1%", "10%", "100%"])
                ax[j//2, j%2].set_xlabel("Training Set Size")
                ax[j//2, j%2].set_xlim((-0.1, 3.0))
    
    # Add unique y-labels
    ax[0, 0].set_ylabel("Time (in seconds)")
    ax[0, 1].set_ylabel("MAE Score")
    ax[1, 0].set_ylabel("Time (in seconds)")
    ax[1, 1].set_ylabel("MAE Score")
    
    # Add titles
    ax[0, 0].set_title("Model Training")
    ax[0, 1].set_title("MAE Score on Training Subset")
    ax[1, 0].set_title("Model Predicting")
    ax[1, 1].set_title("MAE Score on Testing Set")
    
#     # Set y-limits for score panels
#     ax[0, 1].set_ylim((0, 1))
#     ax[1, 1].set_ylim((0, 1))

    # Create patches for the legend
    patches = []
    for i, learner in enumerate(results.keys()):
        patches.append(plt.mpatches.Patch(color = colors[i], label = learner))
    plt.legend(handles = patches, bbox_to_anchor = (-.80, 2.53), \
               loc = 'upper center', borderaxespad = 0., ncol = 3, fontsize = 'x-large')
    
    # Aesthetics
    plt.suptitle("Performance Metrics for Three Supervised Learning Models", fontsize = 16, y = 1.10)
    plt.tight_layout()
    plt.show()

In [None]:
from sklearn.metrics import mean_squared_error

def train_predict(learner, sample_size, X_train, y_train, X_test, y_test): 
    '''
    inputs:
       - learner: the learning algorithm to be trained and predicted on
       - sample_size: the size of samples (number) to be drawn from training set
       - X_train: features training set
       - y_train: income training set
       - X_test: features testing set
       - y_test: income testing set
    '''
    
    results = {}
    
    # Fit the learner to the training data using slicing with 'sample_size' using .fit(training_features[:], training_labels[:])
    start = time() # Get start time
    learner = learner.fit(X_train[:sample_size], y_train[:sample_size])
    end = time() # Get end time
    
    # Calculate the training time
    results['train_time'] = end - start
        
    # Get the predictions on the test set(X_test),
    #   then get predictions on the first 300 training samples(X_train) using .predict()
    start = time() # Get start time
    predictions_test = learner.predict(X_test)
    predictions_train = learner.predict(X_train[:200])
    end = time() # Get end time
    
    # Calculate the total prediction time
    results['pred_time'] = end - start
    
    # Compute MAE on the first 200 training samples which is y_train[:200]
    results['mae_train'] = mean_squared_error(y_train[:200], predictions_train[:200], squared=False)
    
    # Compute MAE on test set
    results['mae_test'] = mean_squared_error(y_test, predictions_test, squared=False)
       
    # Success
    print(f'{learner.__class__.__name__} trained on {sample_size} samples.')
        
    # Return the results
    return results

### Implementation: Initial Model Evaluation

**Note:** Depending on chosen algorithms, the following implementation may take some time to run!

In [None]:
# TODO: Import the three supervised learning models from sklearn

from sklearn.linear_model import TweedieRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor

# TODO: Initialize the three models
rgsr_A = TweedieRegressor(power=2, link='log')
rgsr_B = RandomForestRegressor(random_state=5)
rgsr_C = AdaBoostRegressor(random_state=5)

# Calculate the number of samples for 1%, 10%, and 100% of the training data
# samples_100 is the entire training set i.e. len(y_train)
# samples_10 is 10% of samples_100
# samples_1 is 1% of samples_100
samples_100 = len(y_train)
samples_10 = np.int_((samples_100 * 0.1))
samples_1 = np.int_((samples_100 * 0.01))

# Collect results on the learners
results = {}
for rgsr in [rgsr_A, rgsr_B, rgsr_C]:
    rgsr_name = rgsr.__class__.__name__
    results[rgsr_name] = {}
    for i, samples in enumerate([samples_1, samples_10, samples_100]):
        results[rgsr_name][i] = \
        train_predict(rgsr, samples, X_train, y_train, X_test, y_test)

# Run metrics visualization for the three supervised learning models chosen
evaluate(results)