# Practication Optimization 
## for Stats Nerds & Snake Charmers

[Ryan J. O'Neil](mailto:ryanjoneil@gmail.com)  
Data Science DC  
March 20, 2017

### Least Squares

$$y = 3x^2 - 2x + 10 + \epsilon$$

$$\epsilon \sim N\left(0, 25\right)$$

In [156]:
# Generate some random data.
import numpy as np
import random

x = []
y = []
for _ in range(500):
    xi = random.uniform(-10, 10)
    eps = random.normalvariate(0, 25)
    yi = 3*xi**2 - 2*xi + 10 + eps
    
    x.append(xi)
    y.append(yi)
    
    
x = np.array(x)
y = np.array(y)

In [157]:
from bokeh.charts import Scatter, output_notebook, show
output_notebook()

scatter = Scatter({'x': x, 'y': y}, width=750, height=400)
show(scatter)

In [158]:
from sklearn.linear_model import LinearRegression

X = np.array([[xi**2, xi, 1] for xi in x])

lin = LinearRegression(fit_intercept=False)
lin.fit(X, y)
scatter = Scatter({'x': x, 'y': lin.predict(X)}, width=750, height=400)
print(lin.coef_)
show(scatter)

[ 3.02697999 -1.93969738  8.37864024]


$\text{min}\ f(x) = \sum_i ||y_i - a_i' x_i||^2_2$

# TODO


In [159]:
from numpy.linalg import inv

# (X'X)^-1 * X * y
Xt = X.transpose()
pseudo_inv = inv(np.matmul(Xt, X))
beta = np.matmul(np.matmul(pseudo_inv, Xt), y)

scatter = Scatter({'x': x, 'y': [beta[0]*xi**2 + beta[1]*xi + beta[2] for xi in x]}, width=750, height=400)

print(beta)
show(scatter)

[ 3.02697999 -1.93969738  8.37864024]


In [160]:
import cvxopt as cvx

P = cvx.matrix(np.matmul(X.transpose(), X))
q = cvx.matrix(-1 * np.matmul(y.transpose(), X))
solution = cvx.solvers.qp(P, q)
beta = solution['x']

scatter = Scatter({'x': x, 'y': [beta[0]*xi**2 + beta[1]*xi + beta[2] for xi in x]}, width=750, height=400)

print(beta)
show(scatter)

[ 3.03e+00]
[-1.94e+00]
[ 8.38e+00]

