In [1]:
%config Completer.use_jedi = False

In [2]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

In [3]:
cancer = load_breast_cancer()

x = cancer.data
y = cancer.target

x_train_all, x_test, y_train_all, y_test = train_test_split(x, y, stratify=y, test_size=0.2, random_state=42)
x_train, x_val, y_train, y_val = train_test_split(x_train_all, y_train_all, stratify=y_train_all, test_size=0.2, random_state=42)

In [8]:
print("train: {}".format(x_train.shape))
print("val: {}".format(x_val.shape))
print("test: {}".format(x_test.shape))

train: (364, 30)
val: (91, 30)
test: (114, 30)


In [9]:
from sklearn.preprocessing import StandardScaler

In [10]:
scaler = StandardScaler()
scaler.fit(x_train)
x_train_scaled = scaler.transform(x_train)
x_val_scaled = scaler.transform(x_val)

In [12]:
class DualLayer:
    
    def __init__(self, units=10, learning_rate=0.1, l1=0, l2=0):
        self.w1 = None
        self.b1 = None
        self.w2 = None
        self.b2 = None
        self.a1 = None
        self.units = units
        self.losses = []
        self.val_losses = []
        self.lr = learning_rate
        self.l1 = l1
        self.l2 = l2
       
    def init_weights(self, x):
        self.w1 = np.ones((x.shape[1], self.units))
        self.b1 = np.zeros(self.units)
        self.w2 = np.ones((self.units, 1))
        self.b2 = 0
        
    def activation(self, z):
        z = np.clip(z, -100, None)
        a = 1 / (1 + np.exp(-z))
        return a
        
    def forpass(self, x):
        z1 = np.dot(x, self.w1) + self.b1
        self.a1 = self.activation(z1)
        z2 = np.dot(self.a1, self.w2) + self.b2
        return z2
    
    def backprop(self, x, err):
        m = len(x)
        w2_grad = np.dot(self.a1.T, err) / m
        b2_grad = np.sum(err) / m
        hidden_to_err = np.dot(err, self.w2.T) * self.a1 * (1 - self.a1)
        w1_grad = np.dot(x.T, hidden_to_err) / m
        b1_grad = np.sum(hidden_to_err, axis=0) / m
        return w1_grad, b1_grad, w2_grad, b2_grad
    
    def reg_loss(self):
        return self.l1 * (np.sum(np.abs(self.w1)) + np.sum(np.abs(self.w2))) + \
               self.l2 / 2 * (np.sum(self.w1**2) + np.sum(self.w2**2))
    
    def update_val_losses(self, x_val, y_val):
        z = self.forpass(x_val)
        a = self.activation(z)
        a = np.clip(a, 1e-10, 1-1e-10)
        val_loss = np.sum(-(y_val*np.log(a) + (1-y_val)*np.log(1-a)))
        self.val_losses.append((val_loss + self.reg_loss()) / len(y_val))
     
    def fit(self, x, y, epochs=100, x_val=None, y_val=None):
        y = y.reshape(-1, 1)
        y_val = y_val.reshape(-1, 1)
        self.init_weights(x)
        m = len(x)
        for i in tqdm(range(epochs)):
            z = self.forpass(x)
            a = self.activation(z)
            err = - (y - a)
            w1_grad, b1_grad, w2_grad, b2_grad = self.backprop(x, err)
            w1_grad += (self.l1 * np.sign(self.w1) + self.l2 * self.w1) / m
            w2_grad += (self.l1 * np.sign(self.w2) + self.l2 * self.w2) / m
            self.w1 -= self.lr * w1_grad
            self.b1 -= self.lr * b1_grad
            self.w2 -= self.lr * w2_grad
            self.b2 -= self.lr * b2_grad
            a = np.clip(a, 1e-10, 1-1e-10)
            loss = np.sum(-(y*np.log(a) + (1-y)*np.log(1-a)))
            self.losses.append((loss + self.reg_loss()) / m)
            self.update_val_losses(x_val, y_val)
            
    def predict(self, x):
        z = self.forpass(x)
        return z > 0
    
    def score(self, x, y):
        return np.mean(self.predict(x) == y.reshape(-1, 1))    

In [13]:
import matplotlib.pyplot as plt

In [14]:
class RandomInitNetwork(DualLayer):
    
    def init_weights(self, x):
        np.random.seed(seed=42)
        self.w1 = np.random.normal(0, 1, (x.shape[1], self.units))
        self.b1 = np.zeros(self.units)
        self.w2 = np.random.normal(0, 1, (self.units, 1))
        self.b2 = 0

In [15]:
class MinibatchNetwork(RandomInitNetwork):
    
    def __init__(self, units=10, batch_size=32,learning_rate=0.1, l1=0, l2=0):
        super().__init__(units, learning_rate, l1, l2)
        self.batch_size = batch_size
            
    def gen_batch(self, x, y):
        length = len(x)
        bins = length // self.batch_size
        if length % self.batch_size:
            bins += 1
        indexes = np.random.permutation(np.arange(len(x)))
        x = x[indexes]
        y = y[indexes]
        for i in range(bins):
            start = self.batch_size * i
            end = self.batch_size * (i + 1)
            yield x[start:end], y[start:end]
        
    def fit(self, x, y, epochs=100, x_val=None, y_val=None):
        y_val = y_val.reshape(-1, 1)
        self.init_weights(x)
        np.random.seed(42)
        for i in tqdm(range(epochs)):
            loss = 0
            for x_batch, y_batch in self.gen_batch(x, y):
                m = len(x_batch)
                
                y_batch = y_batch.reshape(-1, 1)
                
                z = self.forpass(x_batch)
                a = self.activation(z)
                err = - (y_batch - a)

                w1_grad, b1_grad, w2_grad, b2_grad = self.backprop(x_batch, err)

                w1_grad += (self.l1 * np.sign(self.w1) + self.l2 * self.w1) / m
                w2_grad += (self.l1 * np.sign(self.w2) + self.l2 * self.w2) / m

                self.w1 -= self.lr * w1_grad
                self.b1 -= self.lr * b1_grad

                self.w2 -= self.lr * w2_grad
                self.b2 -= self.lr * b2_grad

                a = np.clip(a, 1e-10, 1-1e-10)

                loss += np.sum(- (y_batch * np.log(a) + (1 - y_batch) * np.log(1 - a)))
            
            self.losses.append((loss + self.reg_loss()) / len(x))
            self.update_val_losses(x_val, y_val)
            
    def reg_loss(self):
        return self.l1 * (np.sum(np.abs(self.w1)) + np.sum(np.abs(self.w2))) + \
               self.l2 / 2 * (np.sum(self.w1**2) + np.sum(self.w2**2))
    
    def update_val_losses(self, x_val, y_val):
        z2 = self.forpass(x_val)
        a2 = self.activation(z2)
        a2 = np.clip(a2, 1e-10, 1-1e-10)
        val_loss = np.sum(- (y_val * np.log(a2) + (1 - y_val) * np.log(1 - a2)))
        self.val_losses.append((val_loss + self.reg_loss()) / len(y_val))

In [None]:
class MiniClassNetwork():
    
    def __init__(self, units=10, batch_size=32,learning_rate=0.1, l1=0, l2=0):
        self.w1 = None
        self.b1 = None
        self.w2 = None
        self.b2 = None
        self.a1 = None
        self.units = units
        self.batch_size = batch_size
        self.losses = []
        self.val_losses = []
        self.lr = learning_rate
        self.l1 = l1
        self.l2 = l2
       
    def init_weights(self, x, y):
        np.random.seed(seed=42)
        self.w1 = np.random.normal(0, 1, (x.shape[1], self.units))
        self.b1 = np.zeros(self.units)
        self.w2 = np.random.normal(0, 1, (self.units, y.shape[1]))
        self.b2 = 0
        
    def sigmoid(self, z):
        z = np.clip(z, -100, None)
        a = 1 / (1 + np.exp(-z))
        return a
    
    def softmax(self, z):
        exp_z = np.exp(z)
        return exp_z / np.sum(np.exp(z), axis=0).reshape(-1, 1)
        
    def forpass(self, x):
        z1 = np.dot(x, self.w1) + self.b1
        self.a1 = self.sigmoid(z1)
        z2 = np.dot(self.a1, self.w2) + self.b2
        return z2
    
    def backprop(self, x, err):
        m = len(x)
        w2_grad = np.dot(self.a1.T, err) / m
        b2_grad = np.sum(err) / m
        hidden_to_err = np.dot(err, self.w2.T) * self.a1 * (1 - self.a1)
        w1_grad = np.dot(x.T, hidden_to_err) / m
        b1_grad = np.sum(hidden_to_err, axis=0) / m
        return w1_grad, b1_grad, w2_grad, b2_grad
    
    def reg_loss(self):
        return self.l1 * (np.sum(np.abs(self.w1)) + np.sum(np.abs(self.w2))) + \
               self.l2 / 2 * (np.sum(self.w1**2) + np.sum(self.w2**2))
    
    def update_val_losses(self, x_val, y_val):
        z = self.forpass(x_val)
        a = self.softmax(z)
        a = np.clip(a, 1e-10, 1-1e-10)
        val_loss = np.sum(- y_val*np.log(a))
        self.val_losses.append((val_loss + self.reg_loss()) / len(y_val))
        
    def gen_batch(self, x, y):
        length = len(x)
        bins = length // self.batch_size
        if length % self.batch_size:
            bins += 1
        indexes = np.random.permutation(np.arange(len(x)))
        x = x[indexes]
        y = y[indexes]
        for i in range(bins):
            start = self.batch_size * i
            end = self.batch_size * (i + 1)
            yield x[start:end], y[start:end]
     
    def fit(self, x, y, epochs=100, x_val=None, y_val=None):
        self.init_weights(x, y)
        np.random.seed(42)
        for i in range(epochs):
            loss = 0
            for x_batch, y_batch in self.gen_batch(x, y):
                m = len(x_batch)
                
                z = self.forpass(x_batch)
                a = self.softmax(z)
                err = - (y_batch - a)

                w1_grad, b1_grad, w2_grad, b2_grad = self.backprop(x_batch, err)

                w1_grad += (self.l1 * np.sign(self.w1) + self.l2 * self.w1) / m
                w2_grad += (self.l1 * np.sign(self.w2) + self.l2 * self.w2) / m

                self.w1 -= self.lr * w1_grad
                self.b1 -= self.lr * b1_grad

                self.w2 -= self.lr * w2_grad
                self.b2 -= self.lr * b2_grad

                a = np.clip(a, 1e-10, 1-1e-10)

                loss += np.sum(- y_batch * np.log(a))
            
            self.losses.append((loss + self.reg_loss()) / len(x))
            self.update_val_losses(x_val, y_val)
            
    def predict(self, x):
        z = self.forpass(x)
        return np.argmax(z, axis=1)
    
    def score(self, x, y):
        return np.mean(self.predict(x) == np.argmax(y, axis=1))    
    