
# Linear Regression Example (Simplified)

In [10]:
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split


# Load the housing dataset
boston = load_boston()

# reset data as pandas DataFrame
boston_df = pd.DataFrame(boston.data)

# label columns
boston_df.columns = boston.feature_names


# Use all features
X = boston_df

Y = boston.target

# Split the data into training/testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X,Y)

# Create linear regression object
lreg = LinearRegression()

# Train the model using the training sets
lreg.fit(X_train, Y_train)

# Make predictions using the testing set
pred_test = lreg.predict(X_test)

# Set a DataFrame from the Features
coeff_df = pd.DataFrame(boston_df.columns)
coeff_df.columns = ['Features']

# Set a new column lining up the coefficients from the linear regression
coeff_df["Coefficient Estimate"] = pd.Series(lreg.coef_)

# Show coefficients
print(coeff_df)

# The mean squared error
print("\nMean squared error: %.2f"
      % mean_squared_error(Y_test, pred_test))

# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % r2_score(Y_test, pred_test))

   Features  Coefficient Estimate
0      CRIM             -0.122997
1        ZN              0.047103
2     INDUS              0.028702
3      CHAS              1.475443
4       NOX            -18.688069
5        RM              3.634476
6       AGE             -0.001070
7       DIS             -1.633152
8       RAD              0.338757
9       TAX             -0.012964
10  PTRATIO             -1.034120
11        B              0.009061
12    LSTAT             -0.529999

Mean squared error: 16.19
Variance score: 0.77
