In [3]:
#Imports
import scipy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from scipy.special import expit


df = pd.read_csv('car.data')
buying_maint_map = {'vhigh':3,'high':2,'med':1,'low':0}
df['buy_price'] = df['buy_price'].map(buying_maint_map).astype(np.int)
df['maint_price'] = df['maint_price'].map(buying_maint_map).astype(np.int)
doors_map = {'2':0,'3':1,'4':2,'5more':3}
df['doors'] = df['doors'].map(doors_map).astype(np.int)
persons_map = {'2':0,'3':1,'4':2,'more':3}
df['persons'] = df['persons'].map(persons_map).astype(np.int)
trunk_map = {'small':0,'med':1,'big':2}
df['trunk_size'] = df['trunk_size'].map(trunk_map).astype(np.int)
safety_map = {'low':0,'med':1,'high':2}
df['safety'] = df['safety'].map(safety_map).astype(np.int)
class_map = {'unacc':0,'acc':1,'good':2,'vgood':3}
df['class'] = df['class'].map(class_map).astype(np.int)


feature_cols = ['buy_price','maint_price','doors','persons','trunk_size','safety']
class_cols = ['class']

#Make X a 2D numpy array
X = df[feature_cols].as_matrix()
#Make y a 1D numpy array
y = (df[class_cols]==0).astype(np.int).values.ravel()
y_not_binary = (df[class_cols]).astype(np.int).values.ravel()


In [4]:
#Taken from notebook 6
class BinaryLogisticRegression:
    def __init__(self, eta, iterations=20, C=0.001):
        self.eta = eta
        self.iters = iterations
        self.C = C
        # internally we will store the weights as self.w_ to keep with sklearn conventions
        
    def __str__(self):
        if(hasattr(self,'w_')):
            return 'Binary Logistic Regression Object with coefficients:\n'+ str(self.w_) # is we have trained the object
        else:
            return 'Untrained Binary Logistic Regression Object'
        
    # convenience, private:
    @staticmethod
    def _add_bias(X):
        return np.hstack((np.ones((X.shape[0],1)),X)) # add bias term
    
    @staticmethod
    def _sigmoid(theta):
        # increase stability, redefine sigmoid operation
        return expit(theta) #1/(1+np.exp(-theta))
    
    # vectorized gradient calculation with regularization using L2 Norm
    def _get_gradient(self,X,y):
        ydiff = y-self.predict_proba(X,add_bias=False).ravel() # get y difference
        gradient = np.mean(X * ydiff[:,np.newaxis], axis=0) # make ydiff a column vector and multiply through
        gradient = gradient.reshape(self.w_.shape)
        gradient[1:] += -2 * self.w_[1:] * self.C
        
        return gradient
    
    # public:
    def predict_proba(self,X,add_bias=True):
        # add bias term if requested
        Xb = self._add_bias(X) if add_bias else X
        return self._sigmoid(Xb @ self.w_) # return the probability y=1
    
    def predict(self,X):
        return (self.predict_proba(X)>0.5) #return the actual prediction
    
    
    def fit(self, X, y):
        Xb = self._add_bias(X) # add bias term
        num_samples, num_features = Xb.shape
        
        self.w_ = np.zeros((num_features,1)) # init weight vector to zeros
        
        # for as many as the max iterations
        for _ in range(self.iters):
            gradient = self._get_gradient(Xb,y)
            self.w_ += gradient*self.eta # multiply by learning rate 

In [5]:
%%time
class StochasticLogisticRegression(BinaryLogisticRegression):
    # stochastic gradient calculation 
    def _get_gradient(self,X,y):
        idx = int(np.random.rand()*len(y)) # grab random instance
        ydiff = y[idx]-self.predict_proba(X[idx],add_bias=False) # get y difference (now scalar)
        gradient = X[idx] * ydiff[:,np.newaxis] # make ydiff a column vector and multiply through
        
        gradient = gradient.reshape(self.w_.shape)
        gradient[1:] += -2 * self.w_[1:] * self.C
        
        return gradient
    
    
slr = StochasticLogisticRegression(0.1,1000, C=0.001) # take a lot more steps!!

slr.fit(X,y)

yhat = slr.predict(X)
print(slr)
print('Accuracy of: ',accuracy_score(y,yhat))

Binary Logistic Regression Object with coefficients:
[[ 3.14017307]
 [ 0.87231865]
 [ 1.05457088]
 [ 0.18844524]
 [-1.26338706]
 [-0.13929759]
 [-2.18322287]]
Accuracy of:  0.8692129629629629
CPU times: user 15.6 ms, sys: 0 ns, total: 15.6 ms
Wall time: 22.3 ms


In [6]:
class MultiClassLogisticRegression:
    def __init__(self, eta, iterations=20, C=0.0001, optimize_func='steepest'):
        self.eta = eta
        self.iters = iterations
        self.C = C
        self.classifiers_ = []
        self.optimize_func = optimize_func
        # internally we will store the weights as self.w_ to keep with sklearn conventions
    
    def __str__(self):
        if(hasattr(self,'w_')):
            return 'MultiClass Logistic Regression Object with coefficients:\n'+ str(self.w_) # is we have trained the object
        else:
            return 'Untrained MultiClass Logistic Regression Object'
        
    def fit(self,X,y):
        num_samples, num_features = X.shape
        self.unique_ = np.sort(np.unique(y)) # get each unique class value
        num_unique_classes = len(self.unique_)
        self.classifiers_ = []
        for i,yval in enumerate(self.unique_): # for each unique value
            y_binary = (y==yval).astype(np.int) # create a binary problem
            # train the binary classifier for this class
            if self.optimize_func == 'stochastic':
                 hblr = StochasticLogisticRegression(self.eta,self.iters,self.C)
            elif self.optimize_func == 'steepest':
                hblr = BinaryLogisticRegression(self.eta,self.iters,self.C)
            hblr.fit(X,y_binary)
            # add the trained classifier to the list
            self.classifiers_.append(hblr)
            
        # save all the weights into one matrix, separate column for each class
        self.w_ = np.hstack([x.w_ for x in self.classifiers_]).T
        
    def predict_proba(self,X):
        probs = []
        for hblr in self.classifiers_:
            probs.append(hblr.predict_proba(X).reshape((len(X),1))) # get probability for each classifier
        
        return np.hstack(probs) # make into single matrix
    
    def predict(self,X):
        return np.argmax(self.predict_proba(X),axis=1) # take argmax along row
    



In [7]:
%%time
lr = MultiClassLogisticRegression(0.1,iterations=8,C=0.00001,optimize_func='steepest')
lr.fit(X,y_not_binary)
print(lr)

yhat = lr.predict(X)
print('Accuracy of: ',accuracy_score(y_not_binary,yhat))

MultiClass Logistic Regression Object with coefficients:
[[ 0.08093106  0.18241802  0.17404447  0.08358793 -0.05967811  0.03953186
  -0.05569466]
 [-0.10943317 -0.15751531 -0.16004672 -0.13135618 -0.02709511 -0.0853471
  -0.01265203]
 [-0.17085057 -0.27019536 -0.27019536 -0.23320433 -0.23126378 -0.16183375
  -0.14974222]
 [-0.17436727 -0.27103951 -0.25973252 -0.23233586 -0.23616838 -0.14860788
  -0.13741636]]
Accuracy of:  0.7002314814814815
CPU times: user 15.6 ms, sys: 15.6 ms, total: 31.2 ms
Wall time: 7.28 ms


In [8]:
%%time
lr_st = MultiClassLogisticRegression(0.1,iterations=50,C=0.00001,optimize_func='stochastic')
lr_st.fit(X,y_not_binary)
print(lr_st)

yhat_st = lr_st.predict(X)
print('Accuracy of: ',accuracy_score(y_not_binary,yhat_st))

MultiClass Logistic Regression Object with coefficients:
[[ 0.37429721  0.41336732  0.64324467  0.28234004 -0.45950797 -0.10530225
  -0.33073976]
 [-0.13606086  0.08825639 -0.46378114  0.35885522  0.37971928 -0.3577954
   0.29727138]
 [-0.28578377 -0.75799361 -0.57174902 -0.24092036 -0.27953794 -0.28539655
  -0.23057175]
 [-0.39878982 -0.5466606  -0.53826877 -0.54204305 -0.40924341 -0.37562083
  -0.2849944 ]]
Accuracy of:  0.7048611111111112
CPU times: user 15.6 ms, sys: 31.2 ms, total: 46.9 ms
Wall time: 11.3 ms
