In [1]:
import pandas as pd
import os.path as osp
import numpy as np

In [2]:
# class LogistRegression(object):
#     def __init__(self):
#         self.intercept_ = None
#         self.coef_ = None
#         self.__theta = None
#     def __sigmoid(self, t):
#         return 1 / (1.+ np.exp(-t))
#     def fit(self, X_train, y_train,eta = 0.01, n_iters = 1e4 ):
#         def J(theta, X_b, y):
#             y_hat = self.__sigmoid(X_b.dot(theta))
#             try:
#                 return - np.sum(y*np.log(y_hat) + (1-y)*np.log(1-y_hat))/len(X_b)
#             except:
#                 return float('inf')
#         def dJ(theta, X_b, y):
#             return X_b.T.dot(self.__sigmoid(X_b.dot(theta)) - y)/len(X_b)

#         def gradient_descent(X_b, y, initial_theta, eta, n_iters=1e4, epsilon=1e-8):
#             theta = initial_theta
#             i_iters = 0
#             while i_iters < n_iters:
#                 last_theta = theta
#                 gradient = dJ(theta, X_b, y)
#                 theta = theta - gradient * eta
#                 if abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon:
#                     break
#                 i_iters += 1
#             return theta

#         X_b = np.hstack([np.ones((len(X_train), 1)), X_train])
#         inital_theta = np.zeros(X_b.shape[1])
#         self.__theta = gradient_descent(X_b, y_train, inital_theta, eta, n_iters)
#         self.coef_ = self.__theta[1:]
#         self.intercept_ = self.__theta[0]
#         return self
#     def predict_proba(self, X_test):
#         X_b = np.hstack([np.ones((len(X_test), 1)), X_test])
#         return self.__sigmoid(X_b.dot(self.__theta))
#     def predict(self, X_test):
#         proba = self.predict_proba(X_test)
#         return np.array(proba >=0.5, dtype='int')
#     def score(self, X_test, y_test):
#         return R_square(y_test, self.predict(X_test))
#     def __repr__(self):
#         return "LogisticRegression()"

In [106]:
class LogisticRegression:
    
    def __init__(self, add_bias=True, learning_rate=.1, epsilon=1e-4, max_iters=1e5, verbose=False):
        self.add_bias = add_bias
        self.learning_rate = learning_rate
        self.epsilon = epsilon                        #to get the tolerance for the norm of gradients 
        self.max_iters = max_iters                    #maximum number of iteration of gradient descent
        self.verbose = verbose
    def gradient(self, x, y):
        logistic = lambda z: 1./ (1 + np.exp(-z))       #logistic function
        N,D = x.shape
        yh = logistic(np.dot(x, self.w))    # predictions  size N
        grad = np.dot(x.T, yh - y)/N        # divide by N because cost is mean over N points
        return grad  
        
    def fit(self, x, y):
        logistic = lambda z: 1./ (1 + np.exp(-z))       #logistic function
        if x.ndim == 1:
            x = x[:, None]
        if self.add_bias:
            N = x.shape[0]
            x = np.column_stack([x,np.ones(N)])
        N,D = x.shape
        self.w = np.zeros(D)
        g = np.inf 
        t = 0
        # the code snippet below is for gradient descent
        while np.linalg.norm(g) > self.epsilon and t < self.max_iters:
            g = self.gradient(x, y)
            self.w = self.w - self.learning_rate * g 
            t += 1
        
        if self.verbose:
            print(f'terminated after {t} iterations, with norm of the gradient equal to {np.linalg.norm(g)}')
            print(f'the weight found: {self.w}')
        return self
    def cost_fn(x, y, w):
        N, D = x.shape                                                       
        z = np.dot(x, w)
        J = np.mean(y * np.log1p(np.exp(-z)) + (1-y) * np.log1p(np.exp(z)))  #log1p calculates log(1+x) to remove floating point inaccuracies 
        return J
    def predict(self, x):
        logistic = lambda z: 1./ (1 + np.exp(-z))       #logistic function
        if x.ndim == 1:
            x = x[:, None]
        Nt = x.shape[0]
        if self.add_bias:
            x = np.column_stack([x,np.ones(Nt)])
        yh = logistic(np.dot(x,self.w))            #predict output
        
        res = np.array(yh >=0.5, dtype='int')
        print(res)
        return res
    def score(self, X_test, y_test):
        return R_square(y_test, self.predict(X_test))
    def __repr__(self):
        return "LogisticRegression()"
    def accuracy(self, X_test, y_test):
        y_hat = self.predict(X_test)
       

        return sum(y_hat == y_test)/len(y_hat)
class StandardScaler(object):
    def __init__(self):
        self.mean_ = None
        self.scale_ = None
    def fit(self, X):
        self.mean_ = [np.mean(X[:, i]) for i in range(X.shape[1])]
        self.scale_ = [np.std(X[:, i]) for i in range(X.shape[1])]
        return self
    def transform(self, X):
        resX = np.empty(shape=X.shape, dtype=float)
        for col in range(X.shape[1]):
            resX[:, col] = (X[:, col] - self.mean_[col])/self.scale_[col]

        # k = X_train.shape
        # for n in k[1] :
        #     X_train[:, n] = (X_train[:, n] - np.mean(X_train[:, n]))/np.std(X_train[:, n])
        return resX

In [107]:
import numpy as np
def accuracy_score(y_hat, y_true):
    return sum(y_hat == y_true)/len(y_true)
def mean_square_error(y_true, y_predict):
    assert  len(y_true) ==len(y_predict)
    return np.sum((y_true-y_predict)**2)/len(y_true)
def rse(y_true, y_predict):
    return np.sqrt(mean_square_error(y_true, y_predict))
def mae(y_true, y_predict):
    return np.sum(np.abs(y_true - y_predict)) / len(y_true)
def R_square(y_true, y_predict):
    return 1 - mean_square_error(y_true, y_predict)/np.var(y_true)

In [108]:
def gradient(self, x, y):
        N,D = x.shape
        yh = logistic(np.dot(x, self.w))    # predictions  size N
        grad = np.dot(x.T, yh - y)/N        # divide by N because cost is mean over N points
        return grad  
    
logistic = lambda z: 1./ (1 + np.exp(-z))       #logistic function
def cost_fn(x, y, w):
    N, D = x.shape                                                       
    z = np.dot(x, w)
    J = np.mean(y * np.log1p(np.exp(-z)) + (1-y) * np.log1p(np.exp(z)))  #log1p calculates log(1+x) to remove floating point inaccuracies 
    return J

In [109]:
df_train = pd.read_csv("./data_A2/diabetes/diabetes_train.csv")
df_test = pd.read_csv("./data_A2/diabetes/diabetes_test.csv")
df_val = pd.read_csv("./data_A2/diabetes/diabetes_val.csv")

In [110]:
train_x = df_train.iloc[:, :-1].to_numpy()
train_y = df_train.iloc[:, -1].to_numpy()
val_x = df_val.iloc[:, :-1].to_numpy()
val_y = df_val.iloc[:, -1].to_numpy()

In [111]:
print(train_x.shape)

(600, 8)


In [112]:
# train_x = np.ndarray(train_x)

In [113]:
# train_y = df_train.iloc[:, -1].to_numpy()

In [114]:
print(train_y)

[1 1 1 1 1 0 1 0 1 1 1 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 0 1 1 0 0 0 1 0 0
 0 0 0 1 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 0 0 1 0 0 1 0 0 1 1 1 0 1 0 1
 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 1 1 0 0 0 1 1 0 0 0 1 0 0 1 0 1 0 1 0
 1 1 0 1 0 1 0 0 1 0 1 1 1 1 1 0 1 1 0 0 0 1 1 1 0 0 0 0 0 0 0 1 1 1 0 0 1
 1 0 0 1 0 1 0 1 0 1 0 0 1 0 1 0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 0 1 1 0 0 0 0
 1 1 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 1 0 0 1 0 1 0 0 0 1 0 1 1 0 1 0 0 1 0 0
 0 1 1 0 1 1 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 1 0 1 1 0 1 0 0 0 1 1 1 0 0 0
 0 1 0 0 1 0 1 0 1 0 1 1 0 1 0 1 1 1 1 1 1 0 0 1 0 0 1 1 0 0 0 0 1 1 1 0 0
 1 0 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 0 0 0 1 0 1 1 0 0 0 0 1
 0 1 1 0 0 1 1 1 0 1 1 0 0 0 0 1 0 1 0 0 1 1 0 0 1 0 0 1 0 1 0 0 0 0 0 1 0
 1 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 1 0 0 1 0 1 0 0 0 0 1 1 0 0 1 0 0 1 0 0 0
 0 0 1 1 0 1 1 1 0 0 0 0 0 0 1 0 0 1 1 0 0 0 1 1 0 1 0 1 1 0 0 0 1 1 0 0 0
 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 1 1 0 0 1 1 0
 0 0 0 0 0 1 0 1 1 0 0 0 

In [115]:
df_train

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,5,144,82,26,285,32.0,0.452,58,1
1,0,128,68,19,180,30.5,1.391,25,1
2,9,156,86,28,155,34.3,1.189,42,1
3,1,144,82,46,180,46.1,0.335,46,1
4,0,179,90,27,0,44.1,0.686,23,1
...,...,...,...,...,...,...,...,...,...
595,4,158,78,0,0,32.9,0.803,31,1
596,12,84,72,31,0,29.7,0.297,46,1
597,2,158,90,0,0,31.6,0.805,66,1
598,2,83,66,23,50,32.2,0.497,22,0


In [116]:
std_scaler = StandardScaler()

In [117]:
std_scaler.fit(train_x)
train_x_std = std_scaler.transform(train_x)

In [118]:
std_scaler.fit(val_x)
val_x_std = std_scaler.transform(val_x)

In [119]:
lr_clf = LogisticRegression(learning_rate=1e-3, verbose=True, epsilon=1e-6)

In [120]:
lr_clf.fit(train_x_std, train_y)

terminated after 100000 iterations, with norm of the gradient equal to 9.388086494019812e-05
the weight found: [ 0.490168    1.17023562 -0.22574154 -0.05961615 -0.05721845  0.69279054
  0.30081972  0.12838712 -0.84671712]


LogisticRegression()

In [128]:
p = lr_clf.predict(val_x_std)

[1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 0 1 0 1
 0 0 0 0 0 1 1 1 1 1 0 0 0 1 1 1 0 1 0 1 0 0 0 0 1 0 1 1 0 0 1 0 0 0 0 0 1
 0 0 0 0 0 0 0 1 1 0 1 0 0 1 0 1 0 0 0 0 1 0 0 1 0 0]


In [129]:
val_y

array([0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1,
       1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0,
       0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], dtype=int64)

In [130]:
lr_clf.score(val_x, val_y)

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


-1.8571428571428577

In [131]:
lr_clf.accuracy(val_x, val_y)


[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
[0 0 0 1 0 1 1 0 0 0 0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 1 1 0 0 1 1 0 0 1 1 0 0
 0 1 0 1 0 1 1 1 1 1 0 1 0 1 0 1 1 1 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 0 0 0 1
 0 0 0 0 0 1 0 1 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 1 0 0]
35


0.35

In [132]:
len(val_y)

100

In [133]:
p

array([1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1,
       1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0,
       0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0])

In [134]:
sum(p==val_y)

75