In [3]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
import pandas as pd

In [4]:
def min_max_normalize(lst):
    """
        Helper function for movielens dataset, not useful for discrete multi class clasification.

        Return:
        
        Normalized list x, in range [0, 1]
    """
    maximum = max(lst)
    minimum = min(lst)
    toreturn = []
    for i in range(len(lst)):
        toreturn.append((lst[i]- minimum)/ (maximum - minimum))
    return toreturn

In [5]:
def z_standardize(X_inp):
    """
        Z-score Standardization.
        Standardize the feature matrix, and store the standarize rule.

        Parameter:
        X_inp: Input feature matrix.

        Return:
        Standardized feature matrix.
    """
    
    toreturn = X_inp.copy()
    for i in range(X_inp.shape[1]):
        std = np.std(X_inp[:, i])               # ------ Find the standard deviation of the feature
        mean = np.mean(X_inp[:, i])             # ------ Find the mean value of the feature
        temp = []
        for j in np.array(X_inp[:, i]):
            
            """    
                #TODO: 1. implement the standardize function
            """
            temp.append((j-mean)/std)
            
        toreturn[:, i] = temp
    return toreturn

In [6]:
def sigmoid(x):
    """ 
        Sigmoid Function

        Return:
        transformed x.
    """
    """    
        #TODO: 2. implement the sigmoid function
    """
    return 1 / (1 + np.exp(-x))

In [7]:
def logistic_loss(y_true, y_hat):
    """
        Calculate the logistic loss.

        Parameter:
            y_true: Matrix or 2-D array. True target value.
            y_hat: Matrix or 2-D array. Predicted target value.

        Return:
            logistic loss
    """

    n = len(y_true)
    loss = -np.sum(y_true * np.log(y_hat) + (1 - y_true) * np.log(1 - y_hat))

    return loss

In [78]:
class Logistic_Regression():
    
    def __init__(self):
        """
            Some initializations, if neccesary
        """
        
        self.model_name = 'Logistic Regression'
    
    def fit(self, X_train, y_train):
        """
            Save the datasets in our model, and do normalization to y_train
            
            Parameter:
                X_train: Matrix or 2-D array. Input feature matrix.
                Y_train: Matrix or 2-D array. Input target value.
        """
        
        self.X = X_train
        self.y = y_train
        
        count = 0
        uni = np.unique(y_train)
        for y in y_train:
            if y == min(uni):
                self.y[count] = -1
            else:
                self.y[count] = 1
            count += 1        
        
        n,m = X_train.shape
        self.theta = np.zeros(m)
        self.b = 0
    
    def gradient(self, X_inp, y_inp, theta, b):
        """
            Calculate the grandient of Weight and Bias, given sigmoid_yhat, true label, and data

            Parameter:
                X_inp: Matrix or 2-D array. Input feature matrix.
                y_inp: Matrix or 2-D array. Input target value.
                theta: Matrix or 1-D array. Weight matrix.
                b: int. Bias.

            Return:
                grad_theta: gradient with respect to theta
                grad_b: gradient with respect to b

        NOTE: There are several ways of implementing the gradient. We are merely providing you one way
        of doing it. Feel free to change the code and implement the way you want.
        """
        grad_b = 0
        grad_theta = 0
        
        """
            TODO: 3. Update grad_b and grad_theta using the Sigmoid function
        """
#         for (xi, yi) in zip(X_inp, y_inp):
#             z = np.dot(xi, theta) + b
#             sigmoid_yhat = sigmoid(z)
#             grad_b += (sigmoid_yhat - yi)
#             grad_theta += (sigmoid_yhat - yi) * xi
#         grad_b/=len(y_inp)
#         grad_theta/=len(y_inp)
#         return grad_theta, grad_b
        z = np.dot(X_inp, theta) + b
        y_hat = sigmoid(z)
        error = y_inp - y_hat
        grad_theta = -np.dot(X_inp.T, error)
        grad_b = -np.sum(error) 
        return grad_theta, grad_b


    def gradient_descent_logistic(self, alpha, num_pass, early_stop=0, standardized = True):
        """
            Logistic Regression with gradient descent method

            Parameter:
                alpha: (Hyper Parameter) Learning rate.
                num_pass: Number of iteration
                early_stop: (Hyper Parameter) Least improvement error allowed before stop. 
                            If improvement is less than the given value, then terminate the function and store the coefficents.
                            default = 0.
                standardized: bool, determine if we standardize the feature matrix.
                
            Return:
                self.theta: theta after training
                self.b: b after training
        """
        
        if standardized:
            self.X = z_standardize(self.X)
        
        n, m = self.X.shape
        

        for i in range(num_pass):    
            
            """
                TODO: 4. Modify the following code to implement gradient descent algorithm
            """
            grad_theta, grad_b = self.gradient(self.X, self.y, self.theta, self.b)
            temp_theta = self.theta - alpha * grad_theta
            temp_b = self.b - alpha * grad_b

            """
                TODO: 5. Modify the following code to implement early Stop Mechanism (use Logistic Loss when calculating error)
            """ 
            previous_y_hat = sigmoid(np.dot(self.X, self.theta) + self.b)
            temp_y_hat = sigmoid(np.dot(self.X, temp_theta) + temp_b)
#             pre_error = -np.mean(self.y * np.log(previous_y_hat) + (1 - self.y) * np.log(1 - previous_y_hat))
#             temp_error = -np.mean(self.y * np.log(temp_y_hat) + (1 - self.y) * np.log(1 - temp_y_hat))
            pre_error = logistic_loss(self.y,previous_y_hat)
            temp_error = logistic_loss(self.y,temp_y_hat)
            if (abs(pre_error - temp_error) < early_stop) | (abs(abs(pre_error - temp_error) / pre_error) < early_stop):
                return temp_theta, temp_b
            

            self.theta = temp_theta
            self.b = temp_b
        return self.theta, self.b
    
    def predict_ind(self, x: list):
        """
            Predict the most likely class label of one test instance based on its feature vector x.

            Parameter:
            x: Matrix, array or list. Input feature point.
            
            Return:
                p: prediction of given data point
        """
        
        """
            TODO: 7. Implement the prediction function
        """
        p = sigmoid(np.dot(x, self.theta) + self.b)                 # -------- calculate probability (you can use the sigmoid function)
        
        return p
    
    def predict(self, X):
        """
            X is a matrix or 2-D numpy array, represnting testing instances. 
            Each testing instance is a feature vector. 
            
            Parameter:
            x: Matrix, array or list. Input feature point.
            
            Return:
                p: prediction of given data matrix
        """
        
        """
            TODO: 8. Revise the following for-loop to call predict_ind to get predictions.
        """
        
        ret = [self.predict_ind(x) for x in X]                # -------- Use predict_ind to generate the prediction list
        return ret

In [79]:
url_Wine = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
#names = ['f_acid', 'v_acid', 'c_acid', 'sugar', 'chlorides', 'f_SO2', 't_SO2', 'density', 'ph', 'sulphates', 'alcohol', 'quality']
wine = pd.read_csv(url_Wine, delimiter=';')

In [80]:
wine5 = wine.loc[wine.quality == 5]
wine6 = wine.loc[wine.quality == 6]
wineall = pd.concat([wine5,wine6])
wineall

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.70,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
1,7.8,0.88,0.00,2.6,0.098,25.0,67.0,0.99680,3.20,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.99700,3.26,0.65,9.8,5
4,7.4,0.70,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
5,7.4,0.66,0.00,1.8,0.075,13.0,40.0,0.99780,3.51,0.56,9.4,5
...,...,...,...,...,...,...,...,...,...,...,...,...
1592,6.3,0.51,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,6
1593,6.8,0.62,0.08,1.9,0.068,28.0,38.0,0.99651,3.42,0.82,9.5,6
1595,5.9,0.55,0.10,2.2,0.062,39.0,51.0,0.99512,3.52,0.76,11.2,6
1596,6.3,0.51,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,6


In [81]:
X = np.array(wineall.iloc[:,:10])
Y = np.array(wineall.quality)

In [82]:
count = 0
for y in Y:
    if y == 5:
        Y[count] = -1
    else:
        Y[count] = 1
    count += 1

In [83]:
logit = Logistic_Regression()
logit.fit(X, Y)

In [84]:
g = logit.gradient_descent_logistic(0.001, 10000)

  loss = -np.sum(y_true * np.log(y_hat) + (1 - y_true) * np.log(1 - y_hat))
  loss = -np.sum(y_true * np.log(y_hat) + (1 - y_true) * np.log(1 - y_hat))
  return 1 / (1 + np.exp(-x))
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


In [85]:
w, b = g

In [86]:
g

(array([  398.41682565, -1096.69732389,   -84.11435186,   137.02982506,
         -485.38897091,    97.98909252, -1696.28462547,  -753.98008483,
          456.15498662,   888.194281  ]),
 -2458.977546990029)

In [87]:
hat = np.array(w.dot(z_standardize(X).T) + b)

In [88]:
hat1 = sigmoid(hat)
hat1

  return 1 / (1 + np.exp(-x))


array([0.00000000e+000, 0.00000000e+000, 0.00000000e+000, ...,
       0.00000000e+000, 4.08877102e-280, 1.12903177e-004])

In [89]:
count = 0
for i in range(len(hat)):
    if hat1[i] < 0.5:
        if Y[i] == -1:
            count += 1
    else:
        if Y[i] == 1:
            count += 1
count

795

In [90]:
801/1319

0.6072782410917361

My own data

In [91]:
df = pd.read_csv('diabetes_binary_health_indicators_BRFSS2015.csv')
df_clean = df.drop(columns = ['Fruits','AnyHealthcare','Sex','NoDocbcCost'])
X = np.array(df.iloc[:, 1:])
Y = np.array(df['Diabetes_binary'])

In [92]:
count = 0
for y in Y:
    if y == 1.0:
        Y[count] = -1
    else:
        Y[count] = 1
    count += 1

In [93]:
logit = Logistic_Regression()
logit.fit(X, Y)

In [103]:
g = logit.gradient_descent_logistic(0.001, 10000)
g

  loss = -np.sum(y_true * np.log(y_hat) + (1 - y_true) * np.log(1 - y_hat))
  loss = -np.sum(y_true * np.log(y_hat) + (1 - y_true) * np.log(1 - y_hat))
  return 1 / (1 + np.exp(-x))
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


(array([-21.39993916, -18.5523915 , -17.39593164,  -9.77192725,
         -8.93513449, -18.30472478, -26.76561094,   9.34538913,
         -2.13500047,  -1.09241708,   9.2704025 ,  -3.62026529,
         -3.70970653, -36.39431695, -11.55516597, -22.77958956,
        -26.6968044 ,  -3.36234408, -24.77381339,   5.72381706,
         14.30216026]),
 1.9597663304855786)

In [95]:
w, b = g

In [104]:
hat = np.array(w.dot(z_standardize(X).T) + b)
hat

array([-36.915243  , 239.49568536,  35.6507856 , ..., 135.927575  ,
        53.48570481,  13.67614433])

In [101]:
hat1 = sigmoid(hat)

array([9.28781732e-17, 1.00000000e+00, 1.00000000e+00, ...,
       1.00000000e+00, 1.00000000e+00, 9.99998850e-01])

In [98]:
count = 0
for i in range(len(hat)):
    if hat1[i] < 0.5:
        if Y[i] == -1:
            count += 1
    else:
        if Y[i] == 1:
            count += 1
count

202164

In [99]:
count/len(Y)

0.7969252601702933

In [100]:
from sklearn.metrics import classification_report