# 1. Regression: quadratic function optimization

---

## qudratic function optimization

At the point which minimize $E(w)=\sum_{i=1}^{}(f(x_i)-t_i)^2$ ,
<br> 
$dE(w)/dw_j=0$ for j = 0,...,d, where d is the number of features. ($x_0$ = 1 for all data points)

<br>

**a)** It is equivalent to $\sum_{i=1}^{n}2x_{ij}(f(x_i)-t_i)=0$ for j = 0,...,d.

<br>

**b)** It is equivalent to $\sum_{i=1}^{n}2cos(\pi x_{ij})(f(x_i)-t_i)=0$ for j = 0,...,d.

<br>

The process is as follows:

In [4]:
import numpy as np
from sympy import symbols, solve 
from fractions import Fraction

# Define symbols for weights
w0, w1= symbols('w0 w1')

# given data points (x, t)
given_data = np.array([(-1, 1), (0, 1), (1, 1), (1, 0)])

# a) f(x) = w0 + w1*x
# derivatives of the error function respect to w0 and w1 equals to 0
derivative_w0 = sum([2*1*(w0 + w1*x - t) for x, t in given_data]) # x0 = 1
derivative_w1 = sum([2*x*(w0 + w1*x - t) for x, t in given_data])

# print the equations
print("a)")
print(f"""dE(w0, w1)/dw0 = {derivative_w0} = 0""")
print(f"""dE(w0, w1)/dw1 = {derivative_w1} = 0""")

# solve the equations
solution_for_a = solve([derivative_w0, derivative_w1], (w0, w1))
print(f"""
w0 = {solution_for_a[w0]},
w1 = {solution_for_a[w1]}""")

# b) f(x) = w0 + w1*cos(pi*x)
# derivatives of the error function respect to w0 and w1 equals to 0
derivative_w0 = sum([2*1*(w0 + w1*np.cos(np.pi*x) - t) 
                    for x, t in given_data]) # x0 = 1
derivative_w1 = sum([2*np.cos(np.pi*x)*(w0 + w1*np.cos(np.pi*x) - t) 
                    for x, t in given_data])

# print the equations
print("\nb)")
print(f"""dE(w0, w1)/dw0 = {derivative_w0} = 0""")
print(f"""dE(w0, w1)/dw1 = {derivative_w1} = 0""")

# solve the equations
solution_for_b = solve([derivative_w0, derivative_w1], (w0, w1))
print(f"""
w0 = {solution_for_b[w0]},
w1 = {solution_for_b[w1]}""")

# show the solution in fraction
print(f"""in fraction:
w0 = {Fraction(str(solution_for_b[w0])).limit_denominator()}, 
w1 = {Fraction(str(solution_for_b[w1])).limit_denominator()}""")


a)
dE(w0, w1)/dw0 = 8*w0 + 2*w1 - 6 = 0
dE(w0, w1)/dw1 = 2*w0 + 6*w1 = 0

w0 = 9/11,
w1 = -3/11

b)
dE(w0, w1)/dw0 = 8*w0 - 4.0*w1 - 6 = 0
dE(w0, w1)/dw1 = -4.0*w0 + 8.0*w1 + 2.0 = 0

w0 = 0.833333333333333,
w1 = 0.166666666666667
in fraction:
w0 = 5/6, 
w1 = 1/6


---

# 2. Regression based on Matrix operations (1)

---

## Generalized version of linear regression

when $f(x) = w_0 + w_1h_1(x) + ... + w_dh_d(x) = w_0 + \sum_{i=1}^{d} w_j h_j(x)$

> Lets say that
> $$ H = \begin{bmatrix}
> h_0(x_1)=1 & h_1(x_1) & ... & h_d(x_1) \\
> h_0(x_2)=1 & h_1(x_2) & ... & h_d(x_2) \\
> ... & ... & ... & ... \\
> h_0(x_n)=1 & h_1(x_n) & ... & h_d(x_n) \\
> \end{bmatrix}
> $$
>
> $$ w = \begin{bmatrix}
> w_0 \\
> w_1 \\
> ... \\
> w_d \\
> \end{bmatrix}
> $$
>
> $$ T = \begin{bmatrix}
> t_1 \\
> t_2 \\
> ... \\
> t_n \\
> \end{bmatrix}
> $$
> 

Then, $w = (H^T H)^{-1} H^T T$




In [5]:
# redefine dataset for matrix operations
X = np.array([[-1], [0], [1], [1]])
t = np.array([1, 1, 1, 0])

# (a)
# construct X matrix for f(x) = w0 + w1*x
X_a = np.hstack([np.ones((X.shape[0], 1)), X]) # x0 = 1 for all data points

# calculate the weights (w = (H^T*H)^-1*H^T*t)
w = np.linalg.inv(X_a.T @ X_a) @ X_a.T @ t

# print the result
print(f"""a) f(x) = w0 + w1*x

X = 
{X_a},

t = {t},

w0 in fraction = {Fraction(str(w[0])).limit_denominator()},
w1 in fraction = {Fraction(str(w[1])).limit_denominator()}""")

a) f(x) = w0 + w1*x

X = 
[[ 1. -1.]
 [ 1.  0.]
 [ 1.  1.]
 [ 1.  1.]],

t = [1 1 1 0],

w0 in fraction = 9/11,
w1 in fraction = -3/11


In [6]:
# (b)
# construct H matrix for f(x) = w0 + w1*cos(pi*x)
H_b = np.hstack([np.ones((X.shape[0], 1)), np.cos(np.pi*X)])

# calculate the weights (w = (H^T*H)^-1*H^T*t)
w = np.linalg.inv(H_b.T @ H_b) @ H_b.T @ t

# print the result
print(f"""\nb) f(x) = w0 + w1*cos(pi*x)

H =
{H_b},

t = {t},

w0 in fraction = {Fraction(str(w[0])).limit_denominator()},
w1 in fraction = {Fraction(str(w[1])).limit_denominator()}""")


b) f(x) = w0 + w1*cos(pi*x)

H =
[[ 1. -1.]
 [ 1.  1.]
 [ 1. -1.]
 [ 1. -1.]],

t = [1 1 1 0],

w0 in fraction = 5/6,
w1 in fraction = 1/6


In [7]:
# c) 
# f(x) = w0 + w1*x + w2*x^2
# construct H matrix for f(x) = w0 + w1*x + w2*x^2
H_c = np.hstack([np.ones((X.shape[0], 1)), X, X**2])

# calculate the weights (w = (H^T*H)^-1*H^T*t)
w = np.linalg.inv(H_c.T @ H_c) @ H_c.T @ t

# print the result
print(f"""\nc) f(x) = w0 + w1*x + w2*x^2

H =
{H_c},

t = {t},

w0 in fraction = {Fraction(str(w[0])).limit_denominator()},
w1 in fraction = {Fraction(str(w[1])).limit_denominator()},
w2 in fraction = {Fraction(str(w[2])).limit_denominator()}""")


c) f(x) = w0 + w1*x + w2*x^2

H =
[[ 1. -1.  1.]
 [ 1.  0.  0.]
 [ 1.  1.  1.]
 [ 1.  1.  1.]],

t = [1 1 1 0],

w0 in fraction = 1,
w1 in fraction = -1/4,
w2 in fraction = -1/4


In [8]:
# d)
# f(x) = w0 + w1*exp(-x^2) + w2*exp(-(x+1)^2)
# construct H matrix for f(x) = w0 + w1*exp(-x^2) + w2*exp(-(x+1)^2)
H_d = np.hstack([np.ones((X.shape[0], 1)), np.exp(-X**2), np.exp(-(X+1)**2)])

# calculate the weights (w = (H^T*H)^-1*H^T*t)
w = np.linalg.inv(H_d.T @ H_d) @ H_d.T @ t

# print the result
print(f"""\nd) f(x) = w0 + w1*exp(-x^2) + w2*exp(-(x+1)^2)

H =
{H_d},

t = {t},

w0 = {w[0]},
w1 = {w[1]},
w2 = {w[2]}

w0 in fraction = {Fraction(str(w[0])).limit_denominator()},
w1 in fraction = {Fraction(str(w[1])).limit_denominator()},
w2 in fraction = {Fraction(str(w[2])).limit_denominator()}""")


d) f(x) = w0 + w1*exp(-x^2) + w2*exp(-(x+1)^2)

H =
[[1.         0.36787944 1.        ]
 [1.         1.         0.36787944]
 [1.         0.36787944 0.01831564]
 [1.         0.36787944 0.01831564]],

t = [1 1 1 0],

w0 = 0.30329976958021276,
w1 = 0.5093286801818862,
w2 = 0.509328680181887

w0 in fraction = 283265/933944,
w1 in fraction = 26562/52151,
w2 in fraction = 26562/52151


---

# 3. Regression based on Matrix operations (2)

use the same logic as the #2, but with the following $H$ matrix.

> $$
> H = \begin{bmatrix}
> x0 = 1 & Brand_i * log(MPG_i) & log(HP_i) & HP_i/AGE_i \\
> x0 = 1 & Brand_i * log(MPG_i) & log(HP_i) & HP_i/AGE_i \\
> ... & ... & ... & ... \\
> x0 = 1 & Brand_i * log(MPG_i) & log(HP_i) & HP_i/AGE_i \\
> \end{bmatrix}
> $$

where $x_0 = 1$ for all data points.




In [9]:
# given data points (x1, x2, x3, x4, t)
# x1 : age, x2 : HP, x3 : Brand, x4 : MPG, t : Price
X = np.array([[2, 200, 4, 27], 
              [5, 150, 3, 35],
              [3, 180, 4, 25],
              [1, 230, 2, 10],
              [5, 180, 5, 40],
              [4, 210, 3, 30]])
t = np.array([30000, 20000, 25000, 21000, 38000, 31000])

# construct the matrix H for 
# f(x) = w0 + w1 * x3 * sqrt(x4) + w2 * log(x2) + w3 * x2 / x1
H = np.hstack([np.ones((X.shape[0], 1)), 
               X[:, 2:3] * np.sqrt(X[:, 3:4]), 
               np.log(X[:, 1:2]), 
               X[:, 1:2] / X[:, 0:1]])

# calculate the weights (w = (H^T*H)^-1*H^T*t)
w = np.linalg.inv(H.T @ H) @ H.T @ t

# print the result
print(f"""\n
f(x) = w0 + w1 * x3 * sqrt(x4) + w2 * log(x2) + w3 * x2/x1

H =
{H},

t = {t},

w0 = {w[0]},
w1 = {w[1]},
w2 = {w[2]},
w3 = {w[3]}""")

print(f"""\nThe best Price function is:
Price = {w[0]} + 
        {w[1]} * Brand * sqrt(MPG) + 
        {w[2]} * log(HP) + 
        {w[3]} * HP/Age""")





f(x) = w0 + w1 * x3 * sqrt(x4) + w2 * log(x2) + w3 * x2/x1

H =
[[  1.          20.78460969   5.29831737 100.        ]
 [  1.          17.74823935   5.01063529  30.        ]
 [  1.          20.           5.19295685  60.        ]
 [  1.           6.32455532   5.43807931 230.        ]
 [  1.          31.6227766    5.19295685  36.        ]
 [  1.          16.43167673   5.34710753  52.5       ]],

t = [30000 20000 25000 21000 38000 31000],

w0 = -185350.17920454353,
w1 = 799.5978401950955,
w2 = 38172.70195692336,
w3 = -29.22653054540816

The best Price function is:
Price = -185350.17920454353 + 
        799.5978401950955 * Brand * sqrt(MPG) + 
        38172.70195692336 * log(HP) + 
        -29.22653054540816 * HP/Age
