# Linear Regression with Python

In [1]:
import numpy as np
from ml_utils import ml_lib

## Linear Regression with one feature

In [2]:
# Load the dataset
data = np.loadtxt('../data/data1.txt', delimiter=',')

In [3]:
# Select X and Y
X = np.array(data[:, :-1])
y = np.array(data[:, -1:])

# Add intercept column
X = np.append(np.ones((y.size, 1)), X, axis=1)

# Thetas initialization
theta = np.zeros((X.shape[1], 1))

# Parameters
num_iters = 1500
alpha = 0.01

In [4]:
print('Cost at initialization : {}.'.format(ml_lib.meanSquaredErrorCost(X, y, theta)))

Cost at initialization : 32.072733877455676.


In [5]:
[theta, J_history] = ml_lib.linearRegressionGradientDescent(X, y, theta, alpha, num_iters)
print('Theta computed from gradient descent : {}, {}.'.format(theta[0][0], theta[1][0]))

Theta computed from gradient descent : -3.63029143940436, 1.166362350335582.


In [6]:
print('Cost after gradient descent : {}.'.format(ml_lib.meanSquaredErrorCost(X, y, theta)))

Cost after gradient descent : 4.483388256587725.


In [7]:
predict1 = np.array([1, 3.5]).dot(theta)*10000
print('For population = 35,000, we predict a profit of {}.'.format(predict1[0]))

For population = 35,000, we predict a profit of 4519.7678677017675.


In [8]:
predict2 = np.array([1, 7]).dot(theta)*10000
print('For population = 70,000, we predict a profit of {}.'.format(predict2[0]))

For population = 70,000, we predict a profit of 45342.45012944714.


## Linear Regression with two features

In [9]:
data2 = np.loadtxt('../data/data2.txt', delimiter=',')

In [10]:
# Select X and Y
X = data2[:, :-1]
y = data2[:, -1]
y = y.reshape(y.size, 1)

# Normalize (save time for gradient descent)
[X, mu, sigma] = ml_lib.normalizeFeature(X)

# Add intercept column
X = np.append(np.ones((y.size, 1)), X, axis=1)

# Thetas initialization
theta = np.zeros((X.shape[1], 1))

# Parameters
num_iters = 400
alpha = 0.1

In [11]:
print('Cost at initialization : {}.'.format(ml_lib.meanSquaredErrorCost(X, y, theta)))

Cost at initialization : 65591548106.45744.


In [12]:
[theta, J_history] = ml_lib.linearRegressionGradientDescent(X, y, theta, alpha, num_iters)
print('Theta computed from gradient descent : {}, {}, {}.'.format(theta[0][0], theta[1][0], theta[2, 0]))

Theta computed from gradient descent : 340412.65957446786, 110631.04895815473, -6649.47295012843.


In [13]:
print('Cost after gradient descent : {}.'.format(ml_lib.meanSquaredErrorCost(X, y, theta)))

Cost after gradient descent : 2043280050.602829.


In [14]:
test = np.array([1650, 3])
normalizedTest = np.divide(test-mu, sigma) 
test = np.append(np.ones(1), normalizedTest)
predict3 = test.dot(theta)
print('Predicted price of a 1650 sq-ft, 3 br house (using gradient descent): {}.'.format(predict3[0]))

Predicted price of a 1650 sq-ft, 3 br house (using gradient descent): 293081.4646222758.


## Linear Regression with Normal Equation

In [15]:
# Select X and Y
X = data2[:, :-1]
y = data2[:, -1].reshape(y.size, 1)

# Add intercept column
X = np.append(np.ones((y.size, 1)), X, axis=1)

# Thetas initialization
theta = ml_lib.normalEquation(X, y)
print('Theta computed from Normal Equation : {}, {}, {}.'.format(theta[0][0], theta[1][0], theta[2][0]))

Theta computed from Normal Equation : 89597.90954360693, 139.21067401759794, -8738.019112551454.


In [16]:
print('Cost after Normal Equation : {}.'.format(ml_lib.meanSquaredErrorCost(X, y, theta)))

Cost after Normal Equation : 2043280050.6028285.


In [17]:
predict4 = np.array([1, 1650, 3]).dot(theta)
print('Predicted price of a 1650 sq-ft, 3 br house (using normal equations): {}.'.format(predict4[0]))

Predicted price of a 1650 sq-ft, 3 br house (using normal equations): 293081.4643349892.
