In [1]:
%matplotlib qt
import matplotlib.pyplot as plt
import numpy as np
import os

In [2]:
def solveRegressionKaczmarz(A, b, x_0, i_max=1e4, discrepancy=1e-3):
    i, x = 0, x_0
    err = np.linalg.norm(A @ x - b)
    X = []; X.append(x)
    while (i < i_max) & (err > discrepancy):
        z = x
        for j in range(np.size(A, 0)):
            z = z + 1 / (A[j, :].dot(A[j, :])) * (b[j] - A[j, :].dot(z)) * A[j, :].T
        x = z
        err = np.linalg.norm(A @ x - b)
        X.append(x)
        i += 1
    return x, np.array(X), i

In [3]:
# Load dataset
H = np.loadtxt(os.path.join('/home/rob/data/databookuw', 'housing.data'))
b = H[:, -1] # Housing values in $1000s
A = H[:, :-1] # Other factors

# Pad with ones for nonzero offset
A = np.pad(A, [(0,0),(0,1)], mode='constant', constant_values=1)

In [4]:
# Solve Ax=b using SVD
U, S, VT = np.linalg.svd(A, full_matrices=0)
x_svd = VT.T @ np.linalg.inv(np.diag(S)) @ U.T @ b
print('x_svd =', x_svd)

x_svd = [-1.08011358e-01  4.64204584e-02  2.05586264e-02  2.68673382e+00
 -1.77666112e+01  3.80986521e+00  6.92224640e-04 -1.47556685e+00
  3.06049479e-01 -1.23345939e-02 -9.52747232e-01  9.31168327e-03
 -5.24758378e-01  3.64594884e+01]


In [5]:
# Solve using Kaczmarz iteration
x_0 = np.zeros((np.size(A, 1),))
x_kac, X, n_steps = solveRegressionKaczmarz(A, b, x_0, i_max=5000)
print('x_kac =', x_kac)
print('no. of iterations =', n_steps)
print('Min. norm error =', np.linalg.norm(A @ x_kac - b))

x_kac = [-9.31260053e-02  1.74683101e-04 -1.34940452e-01 -4.30878256e-01
  5.40106814e-02  5.12454224e+00 -3.48918049e-02 -2.96417787e-01
  6.77591996e-01 -3.83922080e-02  9.88110037e-02 -8.90012254e-03
 -3.50519400e-01  1.67102469e-01]
no. of iterations = 5000
Min. norm error = 272.9607754262492


In [6]:
plt.rcParams['figure.figsize'] = [16, 8]
plt.rcParams.update({'font.size': 18})

fig = plt.figure()
ax1 = fig.add_subplot(121)

plt.plot(b, color='k', linewidth=2, label='Housing Value') # True relationship
plt.plot(A@x_svd, '-o', color='r', linewidth=2, markersize=6, label='SVD')
plt.plot(A@x_kac, '--', color='b', linewidth=3, markersize=6, label='Kaczmarz')
plt.xlabel('Neighborhood')
plt.ylabel('Median Home Value [$1k]')
plt.legend()

ax2 = fig.add_subplot(122)
sort_ind = np.argsort(H[:,-1])
plt.plot(b[sort_ind], color='k', linewidth=2, label='Housing Value') # True relationship
plt.plot(A[sort_ind,:]@x_svd, '-o', color='r', linewidth=2, markersize=6, label='SVD')
plt.plot(A[sort_ind,:]@x_kac, '--', color='b', linewidth=3, markersize=6, label='Kaczmarz')
plt.xlabel('Neighborhood')
plt.legend()

plt.show()