In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
def abline(a, b, label_, c=None):
    
    """Plot a line from slope and intercept"""
    axes = plt.gca()
    x_vals = np.array(axes.get_xlim())
    y_vals = a * x_vals + b
    plt.plot(x_vals, y_vals, label=label_, color=c, zorder=1)

In [None]:
num_sample = 290
num_outlier = 10
total_samples = num_sample + num_outlier

theta = [-1, 0.2]
#X = np.random.random_sample(num_sample) * 0.5 
X = np.random.normal(0, 0.5, num_sample)
y = theta[0]*X + theta[1] 

y += np.random.normal(0, 0.1, num_sample)

X_out = np.random.normal(-2, 0.5, num_outlier)
y_out = np.random.normal(0, 0.5, num_outlier)


plt.figure(figsize=(8, 8))
plt.scatter(X, y, s=2, label='the major cluster of data')
plt.scatter(X_out, y_out, s=2, label="outliers")
abline(theta[0], theta[1], 'ground truth', "red")
plt.legend()

y = np.append(y, y_out)
X = np.append(X, X_out)

print(X.shape, y.shape)

In [None]:
iters = 20000

X = X.reshape((total_samples, 1))
intercept = np.ones((X.shape[0], 1))
X_concatenate = np.concatenate((X, intercept), axis=1)


thetas=[]
max_=[]
min_=[]
avg_=[]
var_=[]
ts=[]

for t in np.arange(-10, 0, 0.2):
    theta_hat = np.zeros(2)
    for _ in range(iters):
        y_pred = np.dot(X_concatenate, theta_hat)
        error = (y-y_pred)**2
        grad = np.dot(-1*X_concatenate.T, np.multiply(np.exp(t*error), 2 * (y-y_pred))) 
        loss_mean = np.sum(np.exp(t * error))
        theta_hat = theta_hat - 0.01 * grad/loss_mean
        
    thetas.append([theta_hat[0], theta_hat[1]])
    print(theta_hat)
    
    loss = error * 0.5
    ts.append(t)
    avg_.append(np.mean(loss))
    max_.append(max(loss))
    min_.append(min(loss))
    var_.append(np.var(loss))
    
    print("t={}, max loss: {}, min loss: {}, avg loss: {}, variance: {}".format(t, max(loss), min(loss), np.mean(loss), np.var(loss)))

for t in np.arange(0, 10, 0.2):
    theta_hat = np.zeros(2)
    for _ in range(iters):
        y_pred = np.dot(X_concatenate, theta_hat)
        error = (y-y_pred)**2
        grad = np.dot(-1*X_concatenate.T, np.multiply(np.exp(t*error), 2 * (y-y_pred))) 
        loss_mean = np.sum(np.exp(t * error))
        theta_hat = theta_hat - 0.01 * grad/loss_mean
        
    thetas.append([theta_hat[0], theta_hat[1]])
    print(theta_hat)
    
    loss = error * 0.5
    ts.append(t)
    avg_.append(np.mean(loss))
    max_.append(max(loss))
    min_.append(min(loss))
    var_.append(np.var(loss))
    
    print("t={}, max loss: {}, min loss: {}, avg loss: {}, variance: {}".format(t, max(loss), min(loss), np.mean(loss), np.var(loss)))



In [None]:
import matplotlib.pylab as pl
from matplotlib import rc
rc('text', usetex=True)

colors_positive = pl.cm.Reds(np.linspace(0,0.8, 50))
colors_negative = pl.cm.Blues(np.linspace(0, 0.8, 50))

plt.figure(figsize=(4, 3.5))


ax = plt.subplot(1, 1, 1)



print(len(thetas))
for i in range(len(thetas)):
    if i > 50:
        abline(thetas[i][0], thetas[i][1], None, c=colors_positive[min(int((i-50)*1.1), 39)])
    elif i < 50:
        abline(thetas[i][0], thetas[i][1], None, c=colors_negative[min(int((49-i)*3.2), 49)])


plt.scatter(X, y, s=1, c='#8c564b', zorder=2)
plt.scatter(X_out, y_out, s=3,  c='#8c564b', zorder=2)
abline(thetas[50][0], thetas[50][1], None, c='#e377c2')



    
ax.tick_params(color='#dddddd')
ax.spines['bottom'].set_color('#dddddd')
ax.spines['top'].set_color('#dddddd')
ax.spines['right'].set_color('#dddddd')
ax.spines['left'].set_color('#dddddd')
    
plt.xlim(-3.5, 2.5)
plt.ylim(-1.2, 1.8)
plt.title("linear regression", fontsize=17)
plt.xlabel(r'$x$', fontsize=17)
plt.ylabel(r'$y$', fontsize=17)
plt.tight_layout()
plt.savefig("2-linear_regression.pdf")