In [4]:
import pandas as pd 
import numpy as np
from sklearn.preprocessing import StandardScaler


In [5]:
df = pd.read_csv('https://raw.githubusercontent.com/rasbt/'
                 'python-machine-learning-book-2nd-edition'
                 '/master/code/ch10/housing.data.txt',
                 header=None,
                 sep='\s+')

df.columns = ['CRIM', 'ZN', 'INDUS', 'CHAS', 
              'NOX', 'RM', 'AGE', 'DIS', 'RAD', 
              'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']
df.head()


# create the training and test sets 
X = df.drop('MEDV',axis = 1).values
y = df['MEDV'].values



sc_x = StandardScaler()
sc_y = StandardScaler()
X_std = sc_x.fit_transform(X)
y_std = sc_y.fit_transform(y[:, np.newaxis]).flatten()

In [6]:
class LinearRegressionGD(object):
    # n_iter is # of passes through training set
    # eta is the leanring rate. 
    def __init__(self, eta=0.01, n_iter=100,lam = .3):
        self.eta = eta
        self.n_iter = n_iter
        self.lam = lam
        self.w_ = None
        self.wols_ = None
        
     # we initialize the weights, w_ by setting them = to 0 
    # the reason we add 1 is because one of them will act as our intercept and we 
    # have to deal with it separately
    def fit(self, X, y):
        self.w_ = np.zeros(1 + X.shape[1])
        self.wols_ = np.zeros(1 + X.shape[1])
        
        

        # where we will store the value of our cost function 
        self.cost_ = []
        self.ovun_ = []
        self.ols_ = []
        self.rige_ = []
        self.cost_ols_ = []
   
        for i in range(self.n_iter):
            # this will be the cost of when we update the weights
            # for each row of individual training data
            cost = []
            cost_ols = []
            ols = []
            rige = []
            ovun = []
            
            for xi, target in zip(X,y):
                cost.append(self._update_weights(xi,target)[0])
                ovun.append(self._update_weights(xi,target)[1])
                rige.append(self._update_weights(xi,target)[2])
                ols.append(self._update_weights(xi,target)[3])
                cost_ols.append(self._update_weights(xi,target)[4])

            # So we basically just want all of our 'fitted' points generated from our model with the current weights
           # output = self.net_input(X)
           # errors = (y - output)
            # we see how far off we are. 
            # uhhhhh why are we updating the weights outside of update weights???
            # no thanks, man. 
            #self.w_[1:] += self.eta * X.T.dot(errors)
            #self.w_[0] += self.eta * errors.sum()
            #cost = (errors**2).sum() / 2.0
            avg_cost = sum(cost) / len(y)
            avg_ols_cost = sum(cost_ols) / len(y)
            self.cost_.append(avg_cost)
            self.cost_ols_.append(avg_ols_cost)
            self.ovun_.append(ovun)
            self.ols_.append(ols)
            self.rige_.append(rige)
            
        return self
    
    def _update_weights(self, xi, target):
        """Apply Adaline learning rule to update the weights"""
        
        # for ridge 
        output = self.net_input_ridge(xi)
        error = (target - output)
        # for ols 
        ols_output = self.net_input_ols(xi)
        ols_error = (target - ols_output)
        #ovun = []
        #ridge_weights = []
        #ls_weights = []
        # so we keep track of whether we were overshooting or undershooting
        # and what the weights are for ridge regression v. ols so that we can compare... 
        # AND FINALLY so we can tell which ones are bigger/smaller. 
    
        ### this elif block is so we can know if we guessed too high 
        # or too low so we can know what the penalty does in 
        # either case.... 
        if error >0:
            ovun = "undershot"
        elif error < 0:
            ovun = "overshot"
        else:
            ovun = "correct"
    
        
        
        # this is the penalty added to our cost function  (before taking derivative wrt weights)
        
        penalty = np.dot(self.w_[1:].T,self.w_[1:]) * self.lam
        #penalty = np.dot(self.w_[0:].T,self.w_[0:]) * self.lam
        
        
        
        
        # alright, for comparision purposes,we will capture the new weights
        # for OLS and for Ridge, and store them so that they can be accesed and compared after running the function 
        
        ridge_weights = self.w_[1:] + (self.eta * (xi.dot(error) - (self.lam * self.w_[1:])))
        ls_weights = self.wols_[1:] + (self.eta * xi.dot(ols_error))
        
        
        # so these are all of the weights that aren't the intercept
        # actually updating the weight here!
        self.w_[1:] += (self.eta * (xi.dot(error) - (self.lam * np.sum(self.w_[1:]))))
        self.wols_[1:] += (self.eta * xi.dot(ols_error))

        # then update the intercept
        # allegedly, we don't have to penalize the intercept
        # besides, it wouldn't really make sense to do that 
        self.w_[0] += (self.eta * error)
        self.wols_[0] += (self.eta * ols_error)
        # return the value of the cost function we wish to minimize
        
        cost = (0.5 * (error**2 ))+ (0.5 * penalty)
        cost_ols = (0.5 * (error**2 ))
        return cost, ovun, ridge_weights, ls_weights, cost_ols
    
    
    def net_input_ridge(self, X):
        # Here, we're just plugging in our datapoints to the linear regression equation 
        # with new weights (well, with the exception of the first time you run through the data)
        return np.dot(X, self.w_[1:]) + self.w_[0]
    
    def net_input_ols(self,X):
          return np.dot(X, self.wols_[1:]) + self.wols_[0]

    def predict_ridge(self, X):
        return self.net_input_ridge(X)
    def predict_ols(self,X):
        return self.net_input_ols(X)

In [8]:
lr = LinearRegressionGD()

lr.fit(X_std, y_std)
lr.rige_
# this is still a work in progress, but you can run this to get an idea of what's happening to the weights with 
# and without the penalty in OLS v. Ridge Regression

# if you uncomment what we had before. 
#for i in range(0,len(lr.rige_)):
##    for update in range(0,len(lr.rige_[0])): 
#        print("You {} your target, your new ols weight is {} and your new ridge weight is {}".format(lr.ovun_[i][update],lr.ols_[i][update],lr.rige_[i][update]))

print(y_std.max() - y_std.min())
print((np.sum(lr.predict_ols(X_std)-y_std)**2)/len(X_std))
# as you can see, the error really wasn't very small... but we have to remember that you need to choose a lambda that will 
# give you the best test error. I will attempt to create a gridSearch for Lambda by the end of the semester

4.897686488337717
24.24856837148799
