In [None]:
# Check the versions of libraries
# Python version
import sys
print('Python: {}'.format(sys.version))
# scipy
import scipy
print('scipy: {}'.format(scipy.__version__))
# numpy
import numpy as np
print('numpy: {}'.format(np.__version__))
# matplotlib
import matplotlib
print('matplotlib: {}'.format(matplotlib.__version__))
# pandas
import pandas as pd
print('pandas: {}'.format(pd.__version__))
# scikit-learn
import sklearn
print('sklearn: {}'.format(sklearn.__version__))

In [None]:
# Load libraries
from pandas import read_csv
from pandas.plotting import scatter_matrix
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.preprocessing import PolynomialFeatures

In [None]:
from sklearn.datasets import load_boston
boston = load_boston()
dataset = pd.DataFrame(boston.data)
dataset.columns = boston.feature_names

In [None]:
dataset

In [None]:
# descriptions
print(dataset.describe())

In [None]:
# box and whisker plots
pyplot.rcParams["figure.figsize"] = (25,8)
dataset.plot(kind='box', subplots=True, sharex=False, sharey=False)
pyplot.show()

In [None]:
# histograms
pyplot.rcParams["figure.figsize"] = (25,10)
dataset.hist()
pyplot.show()

In [None]:
# scatter plot matrix
pyplot.rcParams["figure.figsize"] = (25,20)
scatter_matrix(dataset)
pyplot.show()

In [None]:
array = dataset.values
X = array[:,0:-1]
y = array[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1)

In [None]:
# evaluate each model in turn

#Linear Regression
regressor_lin = LinearRegression()
regressor_lin.fit(X_train, y_train)
y_pred_lin = regressor_lin.predict(X_test)

#Polynomial Regression
poly_reg = PolynomialFeatures(degree = 2)
X_poly = poly_reg.fit_transform(X_train)
regressor_poly = LinearRegression()
regressor_poly.fit(X_poly, y_train)
y_pred_poly=regressor_poly.predict(poly_reg.transform(X_test))

#SVR
regressor_svr = SVR(kernel = 'rbf')
regressor_svr.fit(X_train, y_train)
y_pred_svr = regressor_svr.predict(X_test)

#KNN
regressor_knn = KNeighborsRegressor(n_neighbors=5)
regressor_knn.fit(X_train, y_train)
y_pred_knn = regressor_knn.predict(X_test)

#Decision Tree
regressor_tree = DecisionTreeRegressor()
regressor_tree.fit(X_train, y_train)
y_pred_tree = regressor_tree.predict(X_test)

#Random Forest
regressor_forest = RandomForestRegressor(n_estimators = 10)
regressor_forest.fit(X_train, y_train)
y_pred_forest = regressor_forest.predict(X_test)

In [None]:
#Select Best Model
preds=[y_pred_lin, y_pred_poly, y_pred_svr, y_pred_knn, y_pred_tree, y_pred_forest]
names=['lin', 'poly', 'svr', 'knn', 'tree', 'forest']
results=[]

for i in preds:
  r2 = r2_score(y_test, i)
  results.append(r2)

res = "\n".join("{} {}".format(x, y) for x, y in zip(names, results))
print(res)

In [None]:
# Evaluate predictions
print(mean_absolute_error(y_test, y_pred_forest))
print(mean_squared_error(y_test, y_pred_forest))
print(r2_score(y_test, y_pred_forest))