Implemented the gradient descent function for linear regression. Howe…

…ver, it requires additional testing. All functions should take care of checking correct input sizes and the adding of x's zeros.
mansenfranzen · Apr 24, 2016 · ba8fefb · ba8fefb
1 parent a4c253a
commit ba8fefb
Show file tree

Hide file tree

Showing 3 changed files with 88 additions and 15 deletions.
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -17,6 +17,7 @@
 import os
 
 sys.path.insert(0, os.path.abspath('../../'))
+sys.path.insert(0, os.path.abspath('../../mlearn/algorithms/'))
 import mlearn
 
 # If extensions (or modules to document with autodoc) are in another directory,

diff --git a/mlearn/algorithms/linreg.py b/mlearn/algorithms/linreg.py
@@ -1,6 +1,7 @@
 """This module contains simple functions for linear regressions."""
 
 import numpy as np
+import transform
 
 def sum_of_squared_residuals(x, y, beta):
     """
@@ -28,7 +29,7 @@ def sum_of_squared_residuals(x, y, beta):
 
     Notes
     -----
-    .. math:: SSR(\\beta_m) = \\displaystyle\\sum_{i=1}^{n}(x_i\\beta_m-y_i)^2
+    .. math:: SSR(\\beta) = \\displaystyle\\sum_{i=1}^{n}(x_i\\beta-y_i)^2
     .. math:: SSR(\\beta) = (x\\beta-y)^T (x\\beta-y)
 
     Examples
@@ -55,7 +56,7 @@ def sum_of_squared_residuals(x, y, beta):
 
     return ssr.ravel()
 
-def gradient_descent(x, y, beta_init=None, gamma=1, max_iter=200,
+def gradient_descent(x, y, beta_init=None, gamma=0.01, max_iter=200,
                      threshold=0.01, scaling=True, regularize=False):
     """
     Numerically estimates the unknown parameters :math:`\\beta_i` for a linear
@@ -104,22 +105,65 @@ def gradient_descent(x, y, beta_init=None, gamma=1, max_iter=200,
 
     Notes
     -----
-    .. math:: (1) \\text{The cost function is: } f(\\beta)=\\frac{1}{2n}\\
-              \\displaystyle\\sum_{i=1}^{n}(x_i\\beta_m-y_i)^2 = \\
+    .. math:: f(\\beta)=\\frac{1}{2n}\\
+              \\displaystyle\\sum_{i=1}^{n}(x_i\\beta-y_i)^2 = \\
               \\frac{1}{2n}(x\\beta-y)^T (x\\beta-y)=\\frac{1}{2n}SSR(\\beta)
-    .. math:: (2) \\text{The aim is: } \\min_\\beta f(\\beta)
-    .. math:: (3) \\text{The gradient function is: }f'(\\beta_m)=\\beta_m-\\
-              \\gamma\\frac{1}{n}\\displaystyle\\sum_{i=1}^{n}\\
-              (x_i\\beta_m-y_i)^2 x_i
-    .. math:: (4) f'(\\beta)=\\beta-\\gamma\\frac{1}{n}x^T(x\\beta-y)
+    .. math:: f'(\\beta)=\\beta-\\
+              \\frac{\\gamma}{n}\\displaystyle\\sum_{i=1}^{n}\\
+              (x_i\\beta_m-y_i) x_i = \\beta-\\gamma\\frac{1}{n}x^T(x\\beta-y)
+    .. math:: f'_{reg}(\\beta)=\\beta(1-\\gamma \\frac{\\lambda}{n} \\
+              \\beta_{reg})-\\frac{\\gamma}{n}x^T(x\\beta-y) \\text{ where } \\
+              \\beta_{reg} = \\begin{bmatrix} 0 \\\\ 1 \\\\ \\vdots \\\\ 1_m \\
+              \\end{bmatrix}
 
     References
     ----------
     [4] https://en.wikipedia.org/wiki/Gradient_descent
 
     """
 
-    pass
+    n, m = x.shape
+
+    if scaling:
+        x = transform.standardize(x[:,1:])
+        x = np.append(np.ones((x.shape[0], 1)), x, axis=1)
+
+    if not beta_init:
+        beta_init = np.ones((m, 1))
+
+    reg_term = np.ones((m,1))
+    if regularize:
+        beta_reg = np.ones((m,1))
+        beta_reg[0,0] = 0
+        reg_term = 1-(gamma*(regularize/float(n)) * beta_reg)
+
+    beta = beta_init
+    current_cost = None
+    n_iter = 0
+
+    while True:
+        right_term = (gamma / float(n))
+        right_term = right_term * np.dot(np.transpose(x), np.dot(x, beta) - y)
+
+        left_term = beta * reg_term
+        beta = left_term - right_term
+
+        cost = sum_of_squared_residuals(x, y, beta)
+
+        if current_cost is None:
+            current_cost = cost
+        elif np.abs(cost - current_cost) < threshold:
+            break
+        else:
+            current_cost = cost
+
+        if n_iter == max_iter:
+            break
+        else:
+            n_iter += 1
+
+    return beta
+
 
 def ordinary_least_squares(x, y, regularize=False):
     """
@@ -199,19 +243,47 @@ def ordinary_least_squares(x, y, regularize=False):
     return beta
 
 if __name__ == "__main__":
+    """
     x = np.array([[1, 11, 104],
                   [1, 15, 99],
                   [1, 22, 89],
                   [1, 27, 88]])
 
+    x2 = np.array([[1, 11],
+                   [1, 15],
+                   [1, 22],
+                   [1, 27]])
+
     y = np.array([[12],
                   [15],
                   [19],
                   [22]])
 
-    beta_zero = np.zeros((3,1))
+    beta_zero = np.zeros((2,1))
+
+    print(sum_of_squared_residuals(x2, y, beta_zero))
+    beta_min = ordinary_least_squares(x2, y)
+    print(sum_of_squared_residuals(x2, y, beta_min))
+
+    beta_grad = gradient_descent(x2, y, scaling=False, gamma=0.001)
+    print(sum_of_squared_residuals(x2, y, beta_grad))
+    """
+
+    from sklearn import datasets, linear_model
+    diabetes = datasets.load_diabetes()
+
+    # Create linear regression object
+    regr = linear_model.LinearRegression()
+
+    # Train the model using the training sets
+    regr.fit(diabetes.data, diabetes.target)
+    print(sum_of_squared_residuals(diabetes.data, diabetes.target, regr.coef_))
+
+    x = np.append(np.ones((diabetes.data.shape[0], 1)), diabetes.data, axis=1)
+    y = diabetes.target
+    ols = ordinary_least_squares(x, y)
+    print(sum_of_squared_residuals(x, y, ols))
 
-    print(sum_of_squared_residuals(x, y, beta_zero))
-    beta_min = ordinary_least_squares(x, y)
-    print(sum_of_squared_residuals(x, y, beta_min))
+    gd = gradient_descent(x,np.reshape(y,(442,1)))
+    print(sum_of_squared_residuals(x, y, gd.ravel()))
 
diff --git a/mlearn/algorithms/transform.py b/mlearn/algorithms/transform.py
@@ -3,7 +3,7 @@
 import numpy as np
 
 
-def standardize(x,):
+def standardize(x):
     """
     Perform z-transformation to get standard score of input matrix.