In [121]:
import numpy as np
from sklearn.model_selection import train_test_split
from scipy.special import expit

In [129]:
def normalize(data, method='min-max'):
    
    """
    Computes statistics assuming each sample is a row.
    """
    
    if (method == 'min-max'):
        numerator = data - np.min(data, axis=0)
        denominator = np.max(data, axis=0) - np.min(data, axis=0)
        return numerator/denominator
    
    if (method == 'standardization'):
        numerator = data - np.mean(data, axis=0)
        denominator = np.std(data, axis=0)
        return numerator/denominator
    
def segregate_target(data):
    
    X = data[:, :-1]
    t = data[:, -1:]
    
    return X, t

def train_test_validation_split(X, t, test_ratio=0.33):
    
    X_train, X_test, t_train, t_test = train_test_split(X, t, test_size=test_ratio, random_state=42)
    X_valid, X_test, t_valid, t_test = train_test_split(X_test, t_test, test_size=0.5, random_state=42)
    
    data = {
        'X_train': X_train,
        't_train': t_train,
        'X_valid': X_valid,
        't_valid': t_valid,
        'X_test': X_test,
        't_test': t_test
    }
    
    return data

def load_data(path):
    
    data = np.load(path)
    return data

In [144]:
class Model:
    
    def __init__(self, learning_rate=0.1, random_init=True, maxIters=10000):
        
        self.learning_rate = learning_rate
        self.random_init = random_init
        self.maxIters = maxIters
        self.w = None
        self.b = None
        
    
    def initalize_parameters(self, dims):
        
        if self.random_init:
            self.w = np.random.randn(dims).reshape(-1, 1)
        else:
            self.w = np.zeros((dims, 1))
        self.b = 0
    
    def sigmoid(self, x):
        
        M = x.shape[1]
        x[x > 15] = 15
        x[x < -15] = -15
            
        return (1 / (1 + np.exp(-x)))
    
    def predict(self, X):
        
        M = X.shape[1]
        preds = np.zeros((1, M))
        
        probabilities = self.sigmoid(np.dot(self.w.T, X) + self.b)
        
        for i in range(M):
            preds[0][i] = 0 if probabilities[0][i] <= 0.5 else 1
            
        return preds
    
    def calculate_accuracy(self, y, t):
        
        return 100 - (np.mean(np.abs(y - t))*100)
        
    
    def fit(self, X_train, t_train):
        
        # number of features
        N = X_train.shape[0]
        # number of samples
        M = X_train.shape[1]
        
        self.initalize_parameters(N)
        costs = []
        
        for iteration in range(self.maxIters - 1):
            
            # forward calculation
            y = self.sigmoid(np.dot(self.w.T, X_train) + self.b)
            
            # cost calculation
            cost = -np.sum((t_train*np.log(y)) + ((1 - t_train)*np.log((1 - y))))
            costs.append(cost)
            
            # gradient calculation
            dw = np.dot(X_train, (y - t_train).T)
            db = np.sum(y - t_train)
            
            # backward update
            self.w = self.w - self.learning_rate*dw
            self.b = self.b - self.learning_rate*db
            
        predictions = self.predict(X_train)
        train_accuracy = self.calculate_accuracy(predictions, t_train)
        print('Training accuracy is: ', train_accuracy)
        
        return costs

In [150]:
path = 'data/data.npy'
data = load_data(path)

# from here each sample is a row
X, t = segregate_target(data)
data = train_test_validation_split(X, t, test_ratio=0.33)

data['X_train'] = normalize(data['X_train'])
data['X_valid'] = normalize(data['X_valid'])
data['X_test'] = normalize(data['X_test'])

# from here each sample is a column
X_train = data['X_train'].T
t_train = data['t_train'].reshape(1,-1)
X_test = data['X_test'].T
t_test = data['t_test'].reshape(1,-1)

model = Model(learning_rate=1.5, random_init=False, maxIters=100000)
costs = model.fit(X_train, t_train)
preds = model.predict(X_test)
test_accuracy = model.calculate_accuracy(preds, t_test)
print('Testing accuracy is: ', test_accuracy)

Training accuracy is:  99.34711643090316
Testing accuracy is:  90.30837004405286


In [151]:
X_val = data['X_valid'].T
t_val = data['t_valid'].T
preds = model.predict(X_val)t
val_accuracy = model.calculate_accuracy(preds, t_val)
print('Validation accuracy is: ', val_accuracy)

Validation accuracy is:  97.34513274336283


In [125]:
"""
X_train = np.array([[1.2, 1.4, 1.1], [-2.3, -1.6, -1.5]])
t_train = np.array([[1, 0, 1]])
X_test = np.array([[1.3], [-1.8]])
t_test = np.array([[0]])

model = Model(maxIters=2)
model.fit(X_train, t_train)
"""

'\nX_train = np.array([[1.2, 1.4, 1.1], [-2.3, -1.6, -1.5]])\nt_train = np.array([[1, 0, 1]])\nX_test = np.array([[1.3], [-1.8]])\nt_test = np.array([[0]])\n\nmodel = Model(maxIters=2)\nmodel.fit(X_train, t_train)\n'