In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn import datasets
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import scale
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [74]:
#Naive Bayes Classifier Class
class NaiveBayes:    
    def prior(self, X, y):
        # calculate prior probability P(y)
        self.prior = (X.groupby(y).apply(lambda x: len(x)) / self.rows).to_numpy()
        return self.prior
    
    def stats(self, X, y):
        #calculate mean, variance for each column and convert to numpy array
        self.mu = X.groupby(y).apply(np.mean).to_numpy()
        self.sigma = X.groupby(y).apply(np.var).to_numpy()              
        return self.mu, self.sigma
    
    def density(self, i, x):     
        #calculate probability from gaussian density function (normally distributed)
        mu = self.mu[i]
        sigma = self.sigma[i]
        n = np.exp((-1 / 2) * ((x - mu) ** 2) / (2 * sigma))
        d = np.sqrt(2 * np.pi * sigma)
        p = n / d
        return p
    
    def posterior(self, x):
        posteriors = []
        # calculate posterior probability for each class
        for i in range(self.count):
            prior = np.log(self.prior[i]) ## use the log to make it more numerically stable
            conditional = np.sum(np.log(self.density(i, x))) # use the log to make it more numerically stable
            posterior = prior + conditional
            posteriors.append(posterior)
        # return class with highest posterior probability
        return self.classes[np.argmax(posteriors)]     

    def fit(self, X, y):
        self.classes = np.unique(y)
        self.count = len(self.classes)
        self.feature_nums = X.shape[1]
        self.rows = X.shape[0]
        
        self.stats(X, y)
        self.prior(X, y)
        
    def predict(self, X):
        preds = [self.posterior(f) for f in X.to_numpy()]
        return preds

In [19]:
class LogisticRegression:
    def __init__(self, learning_rate = 0.01, epoch = 100000, fit_intercept = True, lam = 0.1, b = 0.5, func = "grad", isprint = False):
        self.learning_rate = learning_rate
        self.epoch = epoch
        self.fit_intercept = fit_intercept
        self.lam = lam
        self.b = b
        self.isprint = isprint
        self.func = func
        
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
    def loss(self, y_h, y):
        y = np.squeeze(np.asarray(y))
        y_h = np.squeeze(np.asarray(y_h))
        if self.func == "grad":
            return -np.mean(y * np.log(y_h) + (1.0001 - y) * (np.log(1.0001 - y_h)))
        elif self.func == "l1":
            return -np.mean(y * np.log(y_h) + (1.0001 - y) * (np.log(1.0001 - y_h))) + (self.lam * (np.sum(self.theta)))
        else:
            return -np.mean(y * np.log(y_h) + (1.0001 - y) * (np.log(1.0001 - y_h))) + (self.lam * (np.sum(np.square(self.theta))))
    
    def add_intercept(self, X):
        i = np.ones((X.shape[0], 1))
        return np.concatenate((i, X), axis=1)
    
    def fit(self, X, y):
        if self.fit_intercept:
            X = self.add_intercept(X)            
        self.theta = np.zeros((X.shape[1], 1))
        self.b = np.zeros((len(y), 1))
        
        for i in range(self.epoch):
            if self.func == "grad":
                z = np.dot(X, self.theta)
                y_h = self.sigmoid(z)
                gradient = np.dot(X.T, (y_h - y)) / len(y)                
            else:
                z = np.dot(X, self.theta) + self.lam
                y_h = self.sigmoid(z)
                if self.func == "l1":
                    gradient = np.dot(X.T, (y_h - y)) / len(y) + self.lam
                else:
                    gradient = np.dot(X.T, (y_h - y)) / len(y) + self.lam * self.theta            
            self.theta = self.theta - (self.learning_rate * gradient)          
            self.b = self.b - (self.learning_rate * np.sum(y_h - y))
            loss = self.loss(y_h, y)
            
            if(self.isprint == True and i % self.epoch == 0):
                print(self.func, "Loss:", loss)
    
    def predict_prob(self, X):
        if self.fit_intercept:
            X = self.add_intercept(X)    
        return self.sigmoid(np.dot(X, self.theta))
    
    def predict(self, X):
        return self.predict_prob(X).round()

In [168]:
df = pd.read_csv('data/banknote.csv')
df.head()

Unnamed: 0,variance,skewness,curtosis,entropy,class
0,3.6216,8.6661,-2.8073,-0.44699,0
1,4.5459,8.1674,-2.4586,-1.4621,0
2,3.866,-2.6383,1.9242,0.10645,0
3,3.4566,9.5228,-4.0112,-3.5944,0
4,0.32924,-4.4552,4.5718,-0.9888,0


In [169]:
labelencoder=LabelEncoder()
for column in df.columns:
    df[column] = labelencoder.fit_transform(df[column])
X = df.drop(['class'], axis=1)
y = df['class']
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.3)
y_train1 = np.asmatrix(y_train).T
y_test1 = np.asmatrix(y_test).T
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(960, 4) (960,)
(412, 4) (412,)


In [80]:
lrg = LogisticRegression(learning_rate=0.0001, epoch=1000,func="grad")
%time lrg.fit(X_train, y_train1)
pg = lrg.predict(X_test)
print("Logistic Regression with Gradient Descent Classifier report: \n\n", classification_report(y_test1, pg))
lrl1 = LogisticRegression(learning_rate=0.0001, epoch=1000,func="l1")
%time lrl1.fit(X_train, y_train1)
pl1 = lrl1.predict(X_test)
print("Logistic Regression with L1 regularization Classifier report: \n\n", classification_report(y_test1, pl1))
lrl2 = LogisticRegression(learning_rate=0.0001, epoch=1000,func="l2")
%time lrl2.fit(X_train, y_train1)
pl2 = lrl2.predict(X_test)
print("Logistic Regression with L2 Regularization Classifier report: \n\n", classification_report(y_test1, pl2))
nb = NaiveBayes()
%time nb.fit(X_train, y_train)
pnb = nb.predict(X_test)
print("Naive Bayes Classifier report: \n\n", classification_report(y_test, pnb))

Wall time: 107 ms
Logistic Regression with Gradient Descent Classifier report: 

               precision    recall  f1-score   support

           0       0.70      0.98      0.82       229
           1       0.96      0.48      0.64       183

   micro avg       0.76      0.76      0.76       412
   macro avg       0.83      0.73      0.73       412
weighted avg       0.82      0.76      0.74       412

Wall time: 118 ms
Logistic Regression with L1 regularization Classifier report: 

               precision    recall  f1-score   support

           0       0.57      1.00      0.73       229
           1       1.00      0.07      0.12       183

   micro avg       0.58      0.58      0.58       412
   macro avg       0.79      0.53      0.43       412
weighted avg       0.76      0.58      0.46       412

Wall time: 126 ms
Logistic Regression with L2 Regularization Classifier report: 

               precision    recall  f1-score   support

           0       0.78      0.97      0.86

In [127]:
diab = pd.read_csv("data/diabetes.csv")
diab.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [128]:
diab.isna().any()

Pregnancies                 False
Glucose                     False
BloodPressure               False
SkinThickness               False
Insulin                     False
BMI                         False
DiabetesPedigreeFunction    False
Age                         False
Outcome                     False
dtype: bool

In [129]:
diab.dtypes

Pregnancies                   int64
Glucose                       int64
BloodPressure                 int64
SkinThickness                 int64
Insulin                       int64
BMI                         float64
DiabetesPedigreeFunction    float64
Age                           int64
Outcome                       int64
dtype: object

In [158]:
labelencoder=LabelEncoder()
for column in diab.columns:
    diab[column] = labelencoder.fit_transform(diab[column])
X = diab.drop(['Outcome'], axis=1)
y = diab['Outcome']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = 5)
print(X_train[:5])
x_train = np.reshape(X_train.values, (-1, 1))
yy_train = np.reshape(y_train.values, (-1, 1))

     Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin  BMI  \
140            3       66             26              0        0   14   
358           12       26             23             33       26  140   
143           10       46             19              0        0  113   
674            8       29             28              0        0  143   
121            6       49             17             32        0  129   

     DiabetesPedigreeFunction  Age  
140                       136   34  
358                       212   27  
143                       140   21  
674                       331   47  
121                       129    3  


In [159]:
#X_train = np.asmatrix(X_train)
y_train1 = np.asmatrix(y_train).T
#X_test = np.asmatrix(X_test)
y_test1 = np.asmatrix(y_test).T
print(X_train.shape, y_train1.shape)
print(X_test.shape, y_test.shape)

(614, 8) (614, 1)
(154, 8) (154,)


In [165]:
lrg = LogisticRegression(learning_rate=0.0001, epoch=1000,func="grad")
%time lrg.fit(X_train, y_train1)
pg = lrg.predict(X_test)
print("Logistic Regression with Gradient Descent Classifier report: \n\n", classification_report(y_test1, pg))
lrl1 = LogisticRegression(learning_rate=0.0001, epoch=1000,func="l1")
%time lrl1.fit(X_train, y_train1)
pl1 = lrl1.predict(X_test)
print("Logistic Regression with L1 regularization Classifier report: \n\n", classification_report(y_test1, pl1))
lrl2 = LogisticRegression(learning_rate=0.0001, epoch=1000,func="l2")
%time lrl2.fit(X_train, y_train1)
pl2 = lrl2.predict(X_test)
print("Logistic Regression with L2 Regularization Classifier report: \n\n", classification_report(y_test1, pl2))
nb = NaiveBayes()
%time nb.fit(X_train, y_train)
pnb = nb.predict(X_test)
print("Naive Bayes Classifier report: \n\n", classification_report(y_test, pnb))

Wall time: 83.8 ms
Logistic Regression with Gradient Descent Classifier report: 

               precision    recall  f1-score   support

           0       0.73      0.87      0.79       100
           1       0.63      0.41      0.49        54

   micro avg       0.71      0.71      0.71       154
   macro avg       0.68      0.64      0.64       154
weighted avg       0.70      0.71      0.69       154

Wall time: 100 ms
Logistic Regression with L1 regularization Classifier report: 

               precision    recall  f1-score   support

           0       0.71      0.89      0.79       100
           1       0.62      0.33      0.43        54

   micro avg       0.69      0.69      0.69       154
   macro avg       0.67      0.61      0.61       154
weighted avg       0.68      0.69      0.67       154

Wall time: 105 ms
Logistic Regression with L2 Regularization Classifier report: 

               precision    recall  f1-score   support

           0       0.71      0.89      0.7

In [149]:
df = pd.read_csv("data/mushrooms.csv")
labelencoder=LabelEncoder()
for column in df.columns:
    df[column] = labelencoder.fit_transform(df[column])
df = df.drop(["veil-type"],axis=1)
X = df.drop(['class'], axis=1)
y = df['class']
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.1)
y_train1 = np.asmatrix(y_train).T
y_test1 = np.asmatrix(y_test).T
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(7311, 21) (7311,)
(813, 21) (813,)


In [152]:
lrg = LogisticRegression(learning_rate=0.0001, epoch=1000,func="grad")
%time lrg.fit(X_train, y_train1)
pg = lrg.predict(X_test)
print("Logistic Regression with Gradient Descent Classifier report: \n\n", classification_report(y_test1, pg))
lrl1 = LogisticRegression(learning_rate=0.0001, epoch=1000,func="l1")
%time lrl1.fit(X_train, y_train1)
pl1 = lrl1.predict(X_test)
print("Logistic Regression with L1 regularization Classifier report: \n\n", classification_report(y_test1, pl1))
lrl2 = LogisticRegression(learning_rate=0.0001, epoch=1000,func="l2")
%time lrl2.fit(X_train, y_train1)
pl2 = lrl2.predict(X_test)
print("Logistic Regression with L2 Regularization Classifier report: \n\n", classification_report(y_test1, pl2))
nb1 = NaiveBayes()
%time nb1.fit(X_train, y_train)
pnb = nb1.predict(X_test)
print("Naive Bayes Classifier report: \n\n", classification_report(y_test, pnb))

Wall time: 423 ms
Logistic Regression with Gradient Descent Classifier report: 

               precision    recall  f1-score   support

           0       0.71      0.96      0.82       433
           1       0.92      0.56      0.70       380

   micro avg       0.77      0.77      0.77       813
   macro avg       0.82      0.76      0.76       813
weighted avg       0.81      0.77      0.76       813

Wall time: 394 ms
Logistic Regression with L1 regularization Classifier report: 

               precision    recall  f1-score   support

           0       0.67      0.98      0.79       433
           1       0.95      0.44      0.60       380

   micro avg       0.73      0.73      0.73       813
   macro avg       0.81      0.71      0.70       813
weighted avg       0.80      0.73      0.70       813

Wall time: 424 ms
Logistic Regression with L2 Regularization Classifier report: 

               precision    recall  f1-score   support

           0       0.67      0.98      0.80

In [174]:
df = pd.read_csv('data/hrdata.csv')
df.head()

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,left,promotion_last_5years,sales,salary
0,0.1,0.9,7,286,4,0,1,0,sales,low
1,0.89,0.93,4,249,3,0,0,0,sales,low
2,0.38,0.5,2,132,3,0,1,0,accounting,low
3,0.95,0.71,4,151,4,0,0,0,sales,medium
4,0.84,0.84,5,163,3,0,0,0,technical,low


In [175]:
labelencoder=LabelEncoder()
for column in df.columns:
    df[column] = labelencoder.fit_transform(df[column])
X = df.drop(['left'], axis=1)
y = df['left']
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.1)
y_train1 = np.asmatrix(y_train).T
y_test1 = np.asmatrix(y_test).T
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(10114, 9) (10114,)
(1124, 9) (1124,)


In [188]:
lrg = LogisticRegression(learning_rate=0.00001, epoch=1000,func="grad")
%time lrg.fit(X_train, y_train1)
pg = lrg.predict(X_test)
print("Logistic Regression with Gradient Descent Classifier report: \n\n", classification_report(y_test1, pg))
lrl1 = LogisticRegression(learning_rate=0.00001, epoch=1000,func="l1")
%time lrl1.fit(X_train, y_train1)
pl1 = lrl1.predict(X_test)
print("Logistic Regression with L1 regularization Classifier report: \n\n", classification_report(y_test1, pl1))
lrl2 = LogisticRegression(learning_rate=0.00001, epoch=1000,func="l2")
%time lrl2.fit(X_train, y_train1)
pl2 = lrl2.predict(X_test)
print("Logistic Regression with L2 Regularization Classifier report: \n\n", classification_report(y_test1, pl2))
nb1 = NaiveBayes()
%time nb1.fit(X_train, y_train)
pnb = nb1.predict(X_test)
print("Naive Bayes Classifier report: \n\n", classification_report(y_test, pnb))

Wall time: 339 ms
Logistic Regression with Gradient Descent Classifier report: 

               precision    recall  f1-score   support

           0       0.79      0.98      0.88       833
           1       0.84      0.26      0.40       291

   micro avg       0.80      0.80      0.80      1124
   macro avg       0.81      0.62      0.64      1124
weighted avg       0.80      0.80      0.75      1124

Wall time: 304 ms
Logistic Regression with L1 regularization Classifier report: 

               precision    recall  f1-score   support

           0       0.79      1.00      0.88       833
           1       0.96      0.26      0.41       291

   micro avg       0.81      0.81      0.81      1124
   macro avg       0.88      0.63      0.64      1124
weighted avg       0.84      0.81      0.76      1124

Wall time: 314 ms
Logistic Regression with L2 Regularization Classifier report: 

               precision    recall  f1-score   support

           0       0.79      1.00      0.88