In [1]:
%matplotlib inline

import numpy as np
from sklearn.datasets import load_boston
from sklearn.utils import shuffle, resample
from miniflow import *
import matplotlib.pyplot as plt

In [2]:
# Load data
data = load_boston()
X_ = data['data']
y_ = data['target']
X_, y_ = shuffle(X_, y_)

In [3]:
# Explore data
print ("Total datasets: {}".format(X_.shape[0]))
print ("Features: {}".format(X_.shape[1]))

Total datasets: 506
Features: 13


In [4]:
# Normalized data
X_ = (X_ - np.mean(X_, axis=0)) / np.std(X_, axis=0)

In [5]:
# Split data
t = int(X_.shape[0] * 0.9)
X_, X_test = X_[:t], X_[t:]
y_, y_test = y_[:t], y_[t:]

t = int(X_.shape[0] * 0.9)
X_train, X_validation = X_[:t], X_[t:]
y_train, y_validation = y_[:t], y_[t:]

In [6]:
n_input = X_.shape[1]
n_output = 1
n_hidden1 = 64
n_hidden2 = 128

In [7]:
W1_ = np.random.normal(0, n_hidden1**-0.5, (n_input, n_hidden1))
b1_ = np.random.randn(n_hidden1)

W2_ = np.random.normal(n_hidden1, n_hidden2)
b2_ = np.random.randn(n_hidden2)

W3_ = np.random.normal(n_hidden2, n_output)
b3_ = np.random.randn(n_output)

In [8]:
X, y = Input(), Input()  
W1, b1 = Input(), Input()  
W2, b2 = Input(), Input()  
W3, b3 = Input(), Input()  

In [9]:
l1 = Linear(X, W1, b1)
s1 = Sigmoid(l1)
l2 = Linear(s1, W2, b2)
s2 = Sigmoid(l2)
l3 = Linear(s2, W3, b3)
cost = MSE(y, l3)

In [10]:
feed_dict = {
    X: X_train,
    y: y_train,
    W1: W1_,
    b1: b1_,
    W2: W2_,
    b2: b2_,
    W3: W3_,
    b3: b3_
}

In [11]:
# Hyperparameter
epochs = 5000
lr = 0.001
batch_size = 100

In [12]:
m = X_train.shape[0]
steps_per_epoch = m // batch_size
graph = topological_sort(feed_dict)
trainables = [W1, b1, W2, b2, W3, b3]
losses = {'train': [], 'validation': []}

In [13]:
#Training
for i in range(epochs):
    train_loss = 0
    val_loss = 0
    for j in range(steps_per_epoch):
        X_batch, y_batch = resample(X_train, y_train, n_samples=batch_size)
        X.value, y.value = X_batch, y_batch
        
        forward_and_backward(graph)
        sgd_update(trainables, lr)
        train_loss += graph[-1].value
        
    train_loss = train_loss / steps_per_epoch
    losses['train'].append(train_loss)
    
    X.value, y.value = X_validation, y_validation
    forward_and_backward(graph, training=False)
    val_loss = graph[-1].value
    losses['validation'].append(val_loss)
    
    #if (i+1) % batch_size == 0:
    print("Epoch: {}, Train Loss: {:.3f}, Validation Loss: {:.3f}".format(i+1, train_loss, val_loss))

fig, ax = plt.subplots()
x = np.arange(len(losses['train']))
ax.plot(x, losses['train'], label='Train')
ax.plot(x, losses['validation'], label='Validation')
ax.legend()

ValueError: operands could not be broadcast together with shapes (100,64) (128,) 

In [14]:
# Test
feed_dict[X] = X_test
feed_dict[y] = y_test

graph = topological_sort(feed_dict)
forward_and_backward(graph, training=False)
loss = graph[-1].value
print("Test loss: {:.3f}".format(loss))

Test loss: 35.341
