In [324]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [352]:
def sklearn_model(X, t):
    
    model = LogisticRegression(C=1000000000000, random_state=0).fit(X, t)
    params = model.coef_[0]
    print(params)

In [293]:
def normalize(data, method='min-max'):
    
    """
    Computes statistics assuming each sample is a row.
    """
    
    if (method == 'min-max'):
        numerator = data - np.min(data, axis=0)
        denominator = np.max(data, axis=0) - np.min(data, axis=0)
        return numerator/denominator
    
    if (method == 'standardization'):
        numerator = data - np.mean(data, axis=0)
        denominator = np.std(data, axis=0)
        return numerator/denominator
    

In [294]:
def segregate_target(data):
    
    X = data[:, :-1]
    t = data[:, -1:]
    
    return X, t

In [295]:
def train_test_validation_split(X, t):
    
    X_train, X_test, t_train, t_test = train_test_split(X, t, test_size=0.33, random_state=42)
    X_valid, X_test, t_valid, t_test = train_test_split(X_test, t_test, test_size=0.5, random_state=42)
    
    data = {
        'X_train': X_train,
        't_train': t_train,
        'X_valid': X_valid,
        't_valid': t_valid,
        'X_test': X_test,
        't_test': t_test
    }
    
    return data

In [296]:
def load_data(path):
    
    data = np.load(path)
    return data

In [297]:
def initialize_parameters(num, random=True):
    
    if random:
        w = np.random.randn(num).reshape(-1, 1)
    else:
        w = np.zeros((num, 1))
        
    b = 0
    
    return w, b

In [298]:
def sigmoid(x):
    
    return (1 / (1 + np.exp(-x))).reshape(-1, 1)

In [299]:
def compute_error(y, t):
    
    y = np.where(y >= 1, 0.99, y)
    return -np.sum(np.multiply(t, np.log(y)) + np.multiply(1 - t, np.log(1 - y)))

In [300]:
def propagate(X, w, b, t):
    
    # forward calculation
    y = sigmoid(np.dot(w.T, X) + b)
    
    # cost calculation
    cost = compute_error(y, t)
    
    # backward calculation
    dw = np.dot(X, y - t)
    db = np.sum(y - t)
    
    grads = {'dw': dw, 'db': db}
    
    return grads, cost

In [301]:
def update(w, b, grads, learningRate=0.001):
    
    dw = grads['dw']
    db = grads['db']
    
    w = w - learningRate*dw
    b = b - learningRate*db
    
    return w, b

In [302]:
def predict(w, b, X):
    
    probabilities = sigmoid(np.dot(w.T, X) + b)
    predictions = np.array([1 if value > 0.5 else 0 for value in probabilities])
    
    return predictions

In [303]:
def model(X, t, learningRate=0.01, maxIters=1000):
    
    # number of samples
    N = X.shape[0]
    # number of features
    M = X.shape[1]
    
    w, b = initialize_parameters(N)
    costs = []
    
    for iteration in range(maxIters):
        
        grads, cost = propagate(X, w, b, t)
        w, b = update(w, b, grads, learningRate)
        costs.append(cost)
        
    return w, b, costs

In [332]:
def main():
    
    path = 'data.npy'
    data = load_data(path)
    
    # from here each sample is a row
    X, t = segregate_target(data)
    data = train_test_validation_split(X, t)
    
    data['X_train'] = normalize(data['X_train'])
    data['X_valid'] = normalize(data['X_valid'])
    data['X_test'] = normalize(data['X_test'])
    
    # from here each sample is a column
    X_train = data['X_train'].T
    # print(X)
    t_train = data['t_train'].reshape(-1,1)
    w, b, costs = model(X_train, t_train, learningRate=0.2, maxIters=50000)
    
    print(w, b)
    
    # print(costs[0], costs[-1])
    X_valid = data['X_valid'].T
    print(np.sum(np.abs(predict(w, b, X_valid).reshape(-1,1) - data['t_valid'].reshape(-1,1))) / X_valid.shape[1])
    print(np.sum(np.abs(predict(w, b, X_train).reshape(-1,1) - data['t_train'].reshape(-1,1))) / X_train.shape[1])

In [333]:
main()

[[-118.48513109]
 [-121.18275152]
 [-132.30682719]
 [  -6.00342466]] 165.28233836168224
0.01327433628318584
0.007616974972796518


In [354]:
path = 'data.npy'
data = load_data(path)

X, t = segregate_target(data)
data = train_test_validation_split(X, t)

data['X_train'] = normalize(data['X_train'])
data['X_valid'] = normalize(data['X_valid'])
data['X_test'] = normalize(data['X_test'])

X_train = data['X_train']
t_train = data['t_train']

# print(X_train.shape)

sklearn_model(X_train, np.ravel(t_train))

[-113.22650398 -116.01325007 -126.50360739   -5.68533025]


