# Linear Regression

In [None]:
# Common imports
import numpy as np
import pandas as pd

In [None]:
housing = pd.read_csv("datasets/House.txt", header = None, names=['Size', 'Bedrooms', 'Price'])
housing.head()

In [None]:
housing.info()

In [None]:
housing.describe()

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
pd.plotting.scatter_matrix(housing, figsize=(8, 8),
                           marker='o', hist_kwds={'bins': 50}, s=60,
                           alpha=.8)

In [None]:
X = housing[["Size"]]

In [None]:
X.shape

In [None]:
y = housing.Price

In [None]:
y.shape

In [None]:
plt.figure(figsize=(10,8))
plt.scatter(X, y, marker= 'o', s=50, alpha=0.8)
plt.title('Least-squares linear regression')
plt.xlabel('Feature value (x)')
plt.ylabel('Target value (y)')
plt.show()

In [None]:
from sklearn.linear_model import LinearRegression
linreg = LinearRegression().fit(X, y)

In [None]:
print('linear model coeff (w): {}'
     .format(linreg.coef_))
print('linear model intercept (b): {:.3f}'
     .format(linreg.intercept_))
print('R-squared score: {:.3f}'
     .format(linreg.score(X, y)))

In [None]:
X_new = [[3000]]

In [None]:
linreg.predict(X_new)

In [None]:
plt.figure(figsize=(10,8))
plt.scatter(X, y, marker= 'o', s=50, alpha=0.8)
plt.plot(X, linreg.coef_ * X + linreg.intercept_, 'r-')
plt.title('Least-squares linear regression')
plt.xlabel('Feature value (x)')
plt.ylabel('Target value (y)')
plt.show()

In [None]:
plt.figure(figsize=(10,8))
plt.scatter(X, y, marker= 'o', s=50, alpha=0.8)
plt.plot(X, linreg.predict(X), 'r-')
plt.title('Least-squares linear regression')
plt.xlabel('Feature value (x)')
plt.ylabel('Target value (y)')
plt.show()

In [None]:
from sklearn.metrics import mean_squared_error

housing_predictions = linreg.predict(X)
lin_mse = mean_squared_error(y, housing_predictions)
lin_rmse = np.sqrt(lin_mse)
lin_rmse

In [None]:
from sklearn.metrics import mean_absolute_error

lin_mae = mean_absolute_error(y, housing_predictions)
lin_mae

In [None]:
from sklearn.preprocessing import PolynomialFeatures

poly3 = PolynomialFeatures(degree=3, include_bias=False)
X_poly = poly3.fit_transform(X)
X_poly

In [None]:
linreg2 = LinearRegression().fit(X_poly, y)

In [None]:
print('linear model coeff (w): {}'
     .format(linreg2.coef_))
print('linear model intercept (b): {:.3f}'
     .format(linreg2.intercept_))
print('R-squared score: {:.3f}'
     .format(linreg2.score(X_poly, y)))

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

pipeline_reg = Pipeline([('poly', poly3), ('scal', scaler), ('lin', linreg)])
pipeline_reg.fit(X, y)

In [None]:
plt.figure(figsize=(10,8))
plt.scatter(X, y, marker= 'o', s=50, alpha=0.8)
X1 = np.linspace(1000, 4500, 100)
curve = pipeline_reg.predict(X1[:, np.newaxis])
plt.plot(X1, curve)
plt.title('Least-squares linear regression')
plt.xlabel('Feature value (x)')
plt.ylabel('Target value (y)')
plt.show()

In [None]:
poly10 = PolynomialFeatures(degree=10, include_bias=False)
scaler = StandardScaler()
lin_reg2 = LinearRegression()

pipeline_reg = Pipeline([('poly', poly10), ('scal', scaler), ('lin', lin_reg2)])
pipeline_reg.fit(X, y)

print('R-squared score: {:.3f}'
     .format(pipeline_reg.score(X, y)))

plt.figure(figsize=(10,8))
plt.scatter(X, y, marker= 'o', s=50, alpha=0.8)

X1 = np.linspace(1000, 4500, 100)
curve = pipeline_reg.predict(X1[:, np.newaxis])
plt.plot(X1, curve)

plt.title('Least-squares linear regression')
plt.xlabel('Feature value (x)')
plt.ylabel('Target value (y)')
plt.show()

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=0)

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import make_pipeline

pipe = make_pipeline(PolynomialFeatures(), StandardScaler(),LinearRegression())
param_grid = {'polynomialfeatures__degree': range(1,10)}

grid = GridSearchCV(pipe, param_grid=param_grid, cv=5, n_jobs=-1,return_train_score=True)
grid.fit(X_train, y_train)

In [None]:
df_grid = pd.DataFrame(grid.cv_results_)
df_grid

In [None]:
print("Score with poly features: {:.2f}".format(grid.score(X_test, y_test)))

## Using both Size and Bedrooms as features

In [None]:
X = housing[["Size","Bedrooms"]]

In [None]:
X.shape

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=0)

In [None]:
pipe = make_pipeline(PolynomialFeatures(), StandardScaler(),LinearRegression())
param_grid = {'polynomialfeatures__degree': range(1,10)}

grid = GridSearchCV(pipe, param_grid=param_grid, cv=5, n_jobs=-1,return_train_score=True)
grid.fit(X_train, y_train)

In [None]:
df_grid = pd.DataFrame(grid.cv_results_)
df_grid

In [None]:
print("Score with poly features: {:.2f}".format(grid.score(X_test, y_test)))