# Part 7 - Model Comparison
Let's load and compare the models we trained in the previous notebook. 

## TODO: Ensembling!

In [1]:
import time
import pickle
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
%matplotlib inline

In [2]:
df = pd.read_csv('./data/sf/data_clean_engineered.csv')
features = [feature for feature in df.columns if feature != 'price']
X = df[features]
y = df['price']

In [3]:
def load_models():
    with open('./models/sf/multiple_linear.pkl', 'rb') as f:
        linear_model = pickle.load(f)
    with open('./models/sf/xgb.pkl', 'rb') as f:
        xgb_model = pickle.load(f)
    with open('./models/sf/random_forest.pkl', 'rb') as f:
        rf_model = pickle.load(f)
    with open('./models/sf/mlp.pkl', 'rb') as f:
        mlp_model = pickle.load(f)
    models = [linear_model, xgb_model, rf_model, mlp_model]
    return models
models = load_models()

In [4]:
def avg_prediction(models: list) -> float:
    """Get average prediction from a list of models"""
    predictions = []
    for model in models:
        predictions.append(model.predict(new_df))
    predictions = np.array(predictions) # convert to numpy array
    return np.average(predictions)

In [5]:
# try brand new data
actual_price = '$583,000'
sqft = 800
bed = 1
bath = 1
property_type = 'condo'
postal_code = '94124'
new_data = {'sqft': sqft,
            'bed': bed,
            'bath': bath,
            'property_type_{}'.format(property_type): 1,
            'postal_code_{}'.format(postal_code): 1
           }
new_df = pd.get_dummies(pd.DataFrame(data=[new_data], columns=X.columns).fillna(0))

for model in models:
    predicted_price = model.predict(new_df)
    print("method: {}".format(model.__class__))
    print("predicted price: ${}M".format(predicted_price[0]/1e6))
    print("actual price: {}".format(actual_price))
print(f"average price: ${avg_prediction(models)/1e6}M")

method: <class 'sklearn.linear_model.base.LinearRegression'>
predicted price: $0.087552M
actual price: $583,000
method: <class 'xgboost.sklearn.XGBRegressor'>
predicted price: $0.657329M
actual price: $583,000
method: <class 'sklearn.ensemble.forest.RandomForestRegressor'>
predicted price: $0.7235M
actual price: $583,000
method: <class 'sklearn.neural_network.multilayer_perceptron.MLPRegressor'>
predicted price: $0.9891526897756421M
actual price: $583,000
average price: $0.6143834224439105M
