***Train model***

In [1]:
import pandas as pd
from sklearn.ensemble import GradientBoostingRegressor 
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.externals import joblib

# Load our data set
df = pd.read_csv("house_data1.csv")

  from numpy.core.umath_tests import inner1d


In [2]:
# Create the X and y arrays
X = df[["sq_feet", "num_bedrooms", 'num_bathrooms']]
y = df['sale_price']


In [3]:
# Split the data set in a training set (75%) and a test set (25%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

# Create the Linear Regression model
model = GradientBoostingRegressor()

# Train the model
model.fit(X_train, y_train)

# Save the trained model to a file so we can use it to make predictions later
joblib.dump(model, "house_value_model.pkl")



['house_value_model.pkl']

In [6]:
# Report how well the model is performing 
print("Model training results:")

# Report an error rate on the training set 
mse_train = mean_absolute_error(y_train, model.predict(X_train))
print(f" - Training Set Error: {mse_train}")

# Report an error rate on the test set
mse_test = mean_absolute_error(y_test, model.predict(X_test))
print(f" - Test Set Error: {mse_test}")

Model training results:
 - Training Set Error: 8920.117245027746
 - Test Set Error: 9288.742478936045


***Use model***

In [7]:
from sklearn.externals import joblib

# Load our trained model
model1 = joblib.load("house_value_model.pkl")

# Define the house that we want to value (with the values in the same order as in the training data)
house_1 = [
    2000, # Size in Square Feet
    3, # Number of Bedrooms 
    2, # Number of Bathrooms 
]

# scikit-learn assumes you want to predict the values for multiple of houses at once, so it expects an array.
# We only want to estimate the value of a single house, so there will only be one item in our array.
homes = [
    house_1
]

# Make a prediction for each house in the homes array (we only have one)
home_values = model1.predict(homes)

# Since we are only predicting the house of one house, grab the first prediction returned 
predicted_value = home_values[0]

# Print the results 
print("House details:")
print(f"- {house_1[0]} sq feet")
print(f"- {house_1[1]} bedrooms")
print(f"- {house_1[2]} bathrooms")
print(f" Estimated value: ${predicted_value:,.2f}")

House details:
- 2000 sq feet
- 3 bedrooms
- 2 bathrooms
 Estimated value: $377,866.26


In [9]:
print(1-(100/133)**2-(33/133)**2)

0.373113234213353


In [10]:
print((164/(164+133))*0.349 + (133/(164+133))*0.373)

0.35974747474747476
