In [35]:
import numpy as np

NUM_EXAMPLES = 1000
NUM_MIDDLE_FEATURES = 10
NUM_FEATURES = 20

training_y = np.random.binomial(1, 0.50, NUM_EXAMPLES)

middle_log_odds_matrix = np.random.normal(
    size = (2, NUM_MIDDLE_FEATURES)
)

middle_x_odds = np.zeros((NUM_EXAMPLES, NUM_MIDDLE_FEATURES))
middle_x_odds[training_y == 0, :] = middle_log_odds_matrix[0]
middle_x_odds[training_y == 1, :] = middle_log_odds_matrix[1]
middle_x_probs = 1 / (1 + np.exp(-middle_x_odds))
middle_x = np.random.binomial(1, middle_x_probs, (NUM_EXAMPLES, NUM_MIDDLE_FEATURES))

training_x = np.random.binomial(1, 0.50, (NUM_EXAMPLES, NUM_FEATURES))

NUM_POSITIVES = np.sum(training_y == 1)
NUM_NEGATIVES = np.sum(training_y == 0)

training_x[training_y == 0, 10:] = (
    0
    +
    training_x[training_y == 0, :10]
) % 2
training_x[training_y == 1, 10:] = (
    1
    +
    training_x[training_y == 1, :10]
) % 2

print(NUM_POSITIVES)
print(NUM_NEGATIVES)

# final_log_odds_matrix = np.random.uniform(1, 0.50, NUM_EXAMPLES)

515
485


In [36]:
# Write InputNode and LogisticNode

class InputNode:
    def __init__(self):
        self._successors = []

    def set_value(self, value):
        self._value = value
        
    def value(self):
        return self._value

class LogisticNode:
    def __init__(self):
        self._inputs = []
        self._successors = []
        self._weights = []
        self._bias = np.random.normal()

    def add_input(self, prior_node):
        prior_node._successors.append(self)
        self._inputs.append(prior_node)
        self._weights.append(np.random.normal())
    
    def value(self):
        value = self._bias
        for idx, ipt in enumerate(self._inputs):
            value += self._weights[idx] * ipt.value()
        
        return 1 / (1 + np.exp(-value))
    
    def randomize_weights(self):
        self._weights = [
            np.random.uniform(low = -.125, high = +0.125)
            for
            _ in self._weights
        ]

class OutputNode(LogisticNode):
    pass



In [37]:
def deriv_wrt_activation(self, label):
    prediction = self.value()
    if label == 0:
        return 1 / (1 - prediction)
    else:
        return -1 / prediction

OutputNode.deriv_wrt_activation = deriv_wrt_activation

def deriv_wrt_activation(self, label):
    idx = self._successors[0]._inputs.index(self)
    return (
        self._successors[0]._weights[idx]
        *
        self._successors[0].deriv_wrt_zvalue(label)
    )

def deriv_wrt_zvalue(self, label):
    return (
        self.deriv_wrt_activation(label)
        *
        (self.value() * (1 - self.value()))
    )

def deriv_wrt_weights(self, label):
    deriv_wrt_zvalue = self.deriv_wrt_zvalue(label)
    return [
        deriv_wrt_zvalue * self._inputs[idx].value()
        for
        idx, weights in enumerate(self._weights)
    ]

LogisticNode.deriv_wrt_activation = deriv_wrt_activation
LogisticNode.deriv_wrt_zvalue = deriv_wrt_zvalue
LogisticNode.deriv_wrt_weights = deriv_wrt_weights


In [48]:
# Have them do SGD with batch size of 1 vs 1024 to see the difference.

from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import SGD

model = Sequential()
model.add(Dense(
    NUM_MIDDLE_FEATURES,
    activation = 'sigmoid',
    input_shape = (NUM_FEATURES,)
))
model.add(Dense(
    1,
    activation = 'sigmoid',
    input_shape = (NUM_FEATURES,)
))

model.compile(
    loss = 'binary_crossentropy',
    metrics = ['accuracy'],
    optimizer = SGD(lr = 1.0)
)

model.fit(
    training_x,
    training_y,
    epochs = 5,
    batch_size = 1,
    verbose = 1
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x11c13ea20>

In [49]:
LEARNING_RATE = 1.00

def train_example(x, y, input_nodes, logistic_nodes):
    for idx in range(NUM_FEATURES):
        input_nodes[idx].set_value(x[idx])

    node_derivs = []
    for lr_node in logistic_nodes:
        node_derivs.append(
            lr_node.deriv_wrt_weights(y)
        )

    return node_derivs

def train_dataset(training_x, training_y, input_nodes, logistic_nodes):
    for idx in range(NUM_EXAMPLES):
        x = training_x[idx, :]
        y = training_y[idx]
        node_derivs = train_example(x, y, input_nodes, logistic_nodes)
        for node_idx, node_deriv in enumerate(node_derivs):
            for weight_idx, deriv in enumerate(node_deriv):
                logistic_nodes[node_idx]._weights[weight_idx] -= (
                    LEARNING_RATE * deriv / NUM_EXAMPLES
                )


In [50]:
input_nodes = []
for _ in range(NUM_FEATURES):
    input_nodes.append(InputNode())

logistic_nodes = []
for _ in range(4):
    lr_node = LogisticNode()
    for ipt_node in input_nodes:
        lr_node.add_input(ipt_node)
    logistic_nodes.append(lr_node)

output_node = OutputNode()
for lr_node in logistic_nodes:
    output_node.add_input(lr_node)
logistic_nodes.append(output_node)

# input_nodes = []
# for _ in range(NUM_FEATURES):
#     input_nodes.append(InputNode())

# logistic_nodes = []

# output_node = OutputNode()
# for input_node in input_nodes:
#     output_node.add_input(input_node)
# logistic_nodes.append(output_node)


In [51]:
def predict(x, input_nodes, output_node):
    for idx in range(NUM_FEATURES):
        input_nodes[idx].set_value(x[idx])
    return output_node.value()

def classify(x, input_nodes, output_node):
    if predict(x, input_nodes, output_node) > 0.50:
        return 1
    else:
        return 0

def accuracy(training_x, training_y, input_nodes, output_node):
    num_correct = 0
    for idx in range(NUM_EXAMPLES):
        predicted_class = classify(training_x[idx, :], input_nodes, output_node)
        if predicted_class == training_y[idx]:
            num_correct += 1
    return num_correct / NUM_EXAMPLES

accuracy(training_x, training_y, input_nodes, output_node)
    

0.515

In [54]:
LEARNING_RATE = 100.0

np.seterr(divide = 'raise')

for lr_node in logistic_nodes: lr_node.randomize_weights()
print(logistic_nodes[0]._weights)

for idx in range(100):
    train_dataset(training_x, training_y, input_nodes, logistic_nodes)
    print(idx)
    print(
        accuracy(training_x, training_y, input_nodes, output_node)
    )

[-0.032529248671876065, -0.07979917705240139, -0.00019327249228467958, -0.02018625511053837, 0.08613315290993487, -0.012331011238479656, -0.07325468806907173, 0.09828197595356883, -0.07219191143361614, 0.01377994067625582, 0.06918819109814964, -0.1171010949176435, 0.10022885919182692, -0.03128634908350872, -0.08909180848249548, 0.0778585047352218, 0.027564688567319168, -0.12146862096266209, -0.03206121273944118, -0.047313172860915076]
0
0.485
1
0.517
2
0.576
3
0.639
4
0.601
5
0.74
6
0.886
7
0.905
8
0.935
9
0.964
10
0.98
11
0.991


KeyboardInterrupt: 