In [1]:
import holoviews as hv
hv.extension('matplotlib')
import numpy as np
import pandas as pd

# Loading data

In [2]:
df = pd.read_csv('ex1data1.txt', names=['x', 'y'], header=None)
df.head()

Unnamed: 0,x,y
0,6.1101,17.592
1,5.5277,9.1302
2,8.5186,13.662
3,7.0032,11.854
4,5.8598,6.8233


In [3]:
X = df['x']
y = df['y']

# Plotting data

In [4]:
%%output size=150
scatter = hv.Scatter(df, ('x', 'Population of City in 10,000s') ,('y', 'Profit in $10,000s'))

scatter

# Cost and gradient descent

In [5]:
df1 = df.copy()
X = np.column_stack((np.ones(X.shape[0]), X))
# Equivalent:
# X = np.vstack((np.ones(X.shape[0]), X)).T

In [6]:
theta = np.zeros(X.shape[1])
theta

array([0., 0.])

In [7]:
iterations = 1500
alpha = 0.01

## Cost function

In [8]:
def compute_cost(X, y, theta):
    m = X.shape[0]
    sum = 0
    for i in range(m):
        sum += (np.dot(theta, X[i]) - y[i] ) ** 2
    return (1 / (2 * m)) * sum

In [9]:
%%time
compute_cost(X, y, theta)

CPU times: user 2.43 ms, sys: 1.19 ms, total: 3.63 ms
Wall time: 2.58 ms


32.072733877455654

In [10]:
def compute_cost_better(X, y, theta):
    m = X.shape[0]
    sum = np.sum( (np.dot(X, theta) - y ) ** 2 )
    return (1 / (2 * m)) * sum

In [11]:
%%time
compute_cost_better(X, y, theta)

CPU times: user 787 µs, sys: 204 µs, total: 991 µs
Wall time: 790 µs


32.072733877455676

In [12]:
def compute_cost_vectorized(X, y, theta):
    m = X.shape[0]
    a = np.dot(X, theta) - y
    J = (1 / (2 * m)) * np.dot( a.T, a )
    return J

In [13]:
%%time
compute_cost_vectorized(X, y, theta)

CPU times: user 208 µs, sys: 24 µs, total: 232 µs
Wall time: 212 µs


32.07273387745567

In [14]:
# def compute_cost_vectorized_pandas(X, y, theta):
#     X = pd.DataFrame(X)
#     print(X.head())
#     y = pd.DataFrame(y)
#     print(y.head())
#     m = X.shape[0]
#     a = X @ theta
#     a = pd.DataFrame(a)
#     print(a.head())
#     J = (1 / (2 * m)) * (X @ theta - y).T @ (X @ theta - y)
#     return J

## Gradient descent

In [15]:
def gradient_descent_vectorized(X, y, theta=[[0],[0]], alpha=0.01, num_iter=1500):
    m = X.shape[0]
    history = []
    for iter in range(num_iter):
        a = np.dot(X, theta) - y
        theta = theta - ( (alpha / m) * np.dot(X.T, a) )
        history.append(compute_cost_vectorized(X, y, theta))
    return theta, history

In [16]:
theta, history = gradient_descent_vectorized(X, y, theta, alpha, iterations)

In [17]:
theta

array([-3.63029144,  1.16636235])

In [18]:
%%output size=200
hv.Curve(history)

In [19]:
%%output size=200
xs = [i for i in range(30)]
line = hv.Curve((xs, [theta[0] + x * theta[1] for x in xs]))

scatter * line

# Feature normalization

In [20]:
# def feature_normalization(X):
#     mean = X.mean()
#     std = X.std()
#     return (X - mean) / std

In [21]:
# df_norm = feature_normalization(X)
# df_norm[:10]

# Visualizing J(theta_0, theta_1)

In [22]:
th0s = np.linspace(-10, 10, num=100)
th1s = np.linspace(-10, 10, num=100)
z=[]
for i in range(th0s.shape[0]):
    zz=[]
    for j in range(th1s.shape[0]):
        zz.append( compute_cost_vectorized(X, y, [th0s[i], th1s[j]]) )
    z.append(zz)

In [23]:
# Create grid coordinates for plotting
theta0 = np.linspace(-100, 100, 100)
theta1 = np.linspace(-100, 100, 100)
xx, yy = np.meshgrid(theta0, theta1, indexing='xy')
Z = np.zeros((theta0.size,theta1.size))

# Calculate Z-values (Cost) based on grid of coefficients
for (i,j), val in np.ndenumerate(Z):
    Z[i,j] = compute_cost_vectorized(X,y, theta=[xx[i,j], yy[i,j]])

In [24]:
%%output size=250
hv.Surface((xx, yy, Z))

