## Linear regression

In [0]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

from mpl_toolkits.mplot3d import Axes3D
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

%matplotlib inline

### Linear regression with one variable

#### Load data from CSV file
#### - column 1: population (x)
#### - column 2: profit (y)

In [0]:
#### In this cell we are going to load the data using np.loadtxt
data = np.loadtxt(...)

In [0]:
# Separate features (x) from target (y) using np.hsplit
X, y = np.hsplit(...)

In [0]:
# Return number of examples m
m = ...

In [0]:
# Fit a linear regression model (without regularization)
model = LinearRegression()
model.fit(X, y)

In [0]:
# Obtain coefficients theta0 and theta1 from model
theta0, theta1 = model.intercept_, model.coef_[0]
theta0, theta1

In [0]:
# Plot data and regression line
plt.plot(X, y, 'bx', label='Data')
plt.plot(X, model.predict(X), 'r-', label='Regression')
plt.xlabel('Population')
plt.ylabel('Profit')
plt.legend()

In [0]:
# Training mean squared error
# Using the np.mean() of the difference between the predcted utput and the actual output. 
...

In [0]:
# Training score
#To get the accuracy of the model we use modeL.score() 
...

In [0]:
# Predict profit for populations of 35.000 and 70.000 using np.array
model.predict(...) 

### Linear regression with multiple variables

In [0]:
# Load data from CSV file
# - column 1: house size in feet**2 (x1)
# - column 2: number of bedrooms (x2)
# - column 3: house price (y)
data = ...

In [0]:
# Separate features (x1, x2) from target (y)
X, y = ...

In [0]:
# Number of examples
m = ...

In [0]:
# Pipeline components
scaler = StandardScaler()
regr = LinearRegression()

# Pipeline of feature scaler and linear regressor
model = Pipeline([('scaler', scaler), ('regr', regr)])

In [0]:
# Run linear regression on scaled features
model.fit(X, y)

In [0]:
# Create a grid from feature min and max values
grid_range = np.vstack([X.min(axis=0), X.max(axis=0)])
grid = np.meshgrid(grid_range[:,0], grid_range[:,1])

# Compute predictions from grid values
X_grid = np.c_[grid[0].ravel(), grid[1].ravel()]
y_grid = model.predict(X_grid).reshape(grid[0].shape)

def plot_data_and_regression(fig, fignum, view_angle):
    sp = fig.add_subplot(2, 1, fignum, projection='3d')
    sp.view_init(view_angle, 90)
    sp.set_xlabel('Square feet')
    sp.set_ylabel('Bedrooms')
    sp.set_zlabel('Price')
    sp.set_title(f'angle = {view_angle}')
    # Customize tick locators
    sp.yaxis.set_major_locator(ticker.MultipleLocator(1.00))
    sp.zaxis.set_major_locator(ticker.MultipleLocator(200000))
    # Plot examples
    sp.scatter(X[:,0], X[:,1], y)
    # Plot regression area
    sp.plot_surface(grid[0], grid[1], y_grid, color='y', alpha=0.2)

# Create new figure
fig = plt.figure(2, figsize=[9, 10])
 
# Plot data and regression from two different angles
plot_data_and_regression(fig, fignum=1, view_angle=20)
plot_data_and_regression(fig, fignum=2, view_angle=75)

In [0]:
# Training mean squared error
np.mean((model.predict(X) - y) ** 2)

In [0]:
# Training score
model.score(X, y)