In [1]:
import random
import numpy as np

from keras.models import Sequential
from keras.optimizers import Adam
from keras.layers import Dense, LSTM

Using TensorFlow backend.


### Learning simplest test: learn to multiply by 2

In [2]:
x,y = np.arange(10), 2*np.arange(10)
print("input:",x)
print("target:", y)
w = -3.0                 # initialize the weight 
alpha = 0.01             # learning rate

input: [0 1 2 3 4 5 6 7 8 9]
target: [ 0  2  4  6  8 10 12 14 16 18]


In [99]:
i = np.random.randint(len(x))        # choose i.i.d training sample 
w += alpha*(y[i]-w*x[i])             # w = w + alpha*error  
error = np.sum(np.power(x[i]*w - y[i],2))
print("Current w:", w, "  Error:", error)

Current w: 1.9518768524322019   Error: 0.14821358923725272


#### Test Phase: Samples not in the training set!

In [100]:
x = np.random.uniform(10,size=5)
y = 2*x                                 # usually DataSet is divided between training set and test set
print("Test Set", x)
error = np.sum(np.power(x*w-y,2))
print("Error on test set: ", error)

Test Set [9.30579349 8.54358114 5.27636348 2.98318257 4.77010884]
Error on test set:  0.5073626727991621


### Learning simplest test with biass: learn to multiply and sum

In [101]:
x = np.arange(10)
y = 2*x+1
print("input:",x)
print("target:", y)

input: [0 1 2 3 4 5 6 7 8 9]
target: [ 1  3  5  7  9 11 13 15 17 19]


In [193]:
# one weight cannot learn biass
i = np.random.randint(len(x))     # choose i.i.d sample 
w += alpha*(y[i]-w*x[i])          # w = w + alpha*error    
error = np.sum(np.power(x*w - y,2))
print("Current w:", w, "  Error:", error)

Current w: 2.2628196240162732   Error: 6.032367947431148


In [194]:
# lets consider two weights
w = [0.1, 3.3]  # w0 = biass, w1 = the weight
alpha = 0.01

In [197]:
def g(x,w):
    return w[1]*x + w[0]

gsteps, error = 0, 1000000
while gsteps < 500 and error > 0.05:
    i = np.random.randint(len(x))               # choose random training sample: must be i.i.d. 
    w[0] += alpha*(y[i]-g(x[i],w))      
    w[1] += alpha*(y[i]-g(x[i],w))*x[i] 
    error = np.sum(np.power(g(x,w) - y,2))
    print(gsteps, "w:", w, "  Error:", error)
    gsteps += 1

0 w: [0.8734382847155945, 2.0188684172399727]   Error: 0.04672183840934185


## Introducing one layer Neural Network in keras (Similar to Perceptron)

In [2]:
class Perceptron_function():
    
    def __init__(self, inputs=2, outputs=1):
        self.inputs, self.outputs = inputs, outputs
        self.learning_rate = 0.001        
        self.model = Sequential(name="PerceptronNetwork")
        self.model.add(Dense(outputs, input_shape=(inputs,), activation="linear", kernel_initializer='random_uniform'))        
        self.model.compile(loss="mse", optimizer=Adam(lr=self.learning_rate))
        self.model.summary()

    def predict(self, s):
        s = s.flatten()         # we can discard input dimension
        s_batch = np.reshape(s, [1, s.shape[0]])
        return self.model.predict(s_batch)[0][0]

    def update(self, inputs, targets):
        inputs = inputs.flatten()
        inputs_batch = np.reshape(inputs, [1, inputs.shape[0]])
        targets_batch = np.array([targets])
        self.model.train_on_batch(inputs_batch, targets_batch)

    def update_batch(self, inputs, targets):
        self.model.train_on_batch(inputs, targets)
        
    def print_weights(self):
        w = self.model.get_weights()
        print("w:",w[0].flatten(), "b:",w[1])

### Learn logic gates

In [3]:
def gen_input(igate=0):
    or_gate = {(0,0):0, (0,1):1, (1,0):1, (1,1):1}
    and_gate = {(0,0):0, (0,1):0, (1,0):0, (1,1):1}
    xor_gate = {(0,0):0, (0,1):1, (1,0):1, (1,1):0}
    
    if igate == 0: gate = or_gate
    elif igate == 1: gate = and_gate
    else: gate = xor_gate
    
    key = random.sample(list(gate.keys()),1)[0]
    s = np.array(key) 
    y = gate[key]
    return s,y

f = Perceptron_function(inputs=2)
f.print_weights()

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Model: "PerceptronNetwork"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 1)                 3         
Total params: 3
Trainable params: 3
Non-trainable params: 0
_________________________________________________________________
w: [-0.00557284 -0.04531506] b: [0.]


###### And Train

In [4]:
igate = 1 # OR Gate
for _ in range(5000):          # train the network with input-output pairs
    s,y = gen_input(igate)
    f.update(s, y)
    #s_batch, y_batch = np.array([s]), np.array([y])
    #f.update_batch(s_batch, y_batch)

f.print_weights()

error = 0
for _ in range(10):          # test the network with mean squared error (MSE)
    s,y = gen_input(igate)
    print(s,y,f.predict(s))
    error += np.power(f.predict(s)-y,2)  # MSE
    
print("MSE error =", error)


w: [0.48819682 0.49553928] b: [-0.23419617]
[1 0] 0 0.25400066
[0 1] 0 0.26134312
[1 0] 0 0.25400066
[1 0] 0 0.25400066
[0 0] 0 -0.23419617
[1 0] 0 0.25400066
[1 0] 0 0.25400066
[1 0] 0 0.25400066
[1 0] 0 0.25400066
[1 0] 0 0.25400066
MSE error = 0.6392787712717338


### Multi Layer NN

In [80]:
class NN_function():
    
    def __init__(self, inputs=4, outputs=1):
        self.inputs, self.outputs = inputs, outputs
        self.model = Sequential(name="NN")
        self.model.add(Dense(8, input_shape=(inputs,), activation="relu"))
        self.model.add(Dense(16, activation="relu"))
        self.model.add(Dense(outputs, activation="linear"))

        self.learning_rate = 0.001        
        self.model.compile(loss="mse", optimizer=Adam(lr=self.learning_rate))
        self.model.summary()

    def predict(self, s):
        s = s.flatten()         # we can discard input dimension
        s_batch = np.reshape(s, [1, s.shape[0]])
        return self.model.predict(s_batch)[0][0]

    def update(self, inputs, targets):
        inputs = inputs.flatten()
        inputs_batch = np.reshape(inputs, [1, inputs.shape[0]])
        targets_batch = np.array([targets])
        self.model.train_on_batch(inputs_batch, targets_batch)
    
    def update_batch(self, inputs, targets):
        self.model.train_on_batch(inputs, targets)
        
    def print_weights(self):
        w = self.model.get_weights()
        print("w:",w[0].flatten(), "b:",w[1])

##### Learn logic gates

In [8]:
# Exercise: Train using NN_function (not the perceptron) all logic gates


### Train a Neural Network to Sum 
$f(x=[...]) = \sum_{i \in x}{i}$

In [2]:
# Exercise: Make a NN (use NN_function) learn to sum 4 input numbers 

### Define an LSTM Network 

In [87]:
class LSTM_function():
    
    def __init__(self, inputs=4, outputs=1, steps=10):
        self.inputs, self.outputs = inputs, outputs
        self.model = Sequential(name="LSTM_network")
        self.model.add(LSTM(steps, activation='relu', input_shape=(steps, inputs), return_sequences=False, stateful=False))
        self.model.add(Dense(16, activation="relu"))
        self.model.add(Dense(outputs, activation='linear'))
        
        self.learning_rate = 0.001        
        self.model.compile(loss="mse", optimizer=Adam(lr=self.learning_rate))
        self.model.summary()

    def predict(self, s):
        s_batch = np.reshape(s, [1]+list(s.shape))
        return self.model.predict(s_batch)[0][0]

    def update(self, s, y):
        s_batch = np.reshape(s, [1]+list(s.shape))
        y_batch = np.reshape(np.array([y]), [1, self.outputs])
        self.model.fit(s_batch, y_batch, verbose=0)

    def update_batch(self, inputs, targets):
        self.model.train_on_batch(inputs, targets)

### Train an LSTM Network to predict the sin function
$f([sin(x_{t-4}+dx),...,sin(x_{t-1}+dx)])=sin(x_t)$

In [93]:
def gen_input(nsteps):
    x_ini = np.random.rand()
    x_step = np.random.rand()
    sin_seq = [np.sin(x_ini+i*x_step)  for i in range(nsteps+1)]
    s = np.array(sin_seq[:-1]) 
    s = s[:, np.newaxis]         # array (N,) needs conversion to row vector
    y = sin_seq[-1]  
    return np.array(s),y

nsteps = 15
f = NN_function(inputs=nsteps, outputs=1)
f_lstm = LSTM_function(inputs=1, outputs=1, steps=nsteps)

n = 1000
for _ in range(n):            # train the network with input-output pairs
    s,y = gen_input(nsteps)
    f.update(s, y)
    f_lstm.update(s, y)

error, error_lstm = 0, 0
for _ in range(int(n/10)):         # test the network with mean squared error (MSE)
    s,y = gen_input(nsteps)    
    error += np.power(f.predict(s)-y,2)  # MSE
    error_lstm += np.power(f_lstm.predict(s)-y,2)  # MSE
    
print("MSE errors (NN,LSTM) =", error, error_lstm)

Model: "NN"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_140 (Dense)            (None, 8)                 128       
_________________________________________________________________
dense_141 (Dense)            (None, 16)                144       
_________________________________________________________________
dense_142 (Dense)            (None, 1)                 17        
Total params: 289
Trainable params: 289
Non-trainable params: 0
_________________________________________________________________
Model: "LSTM_network"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_28 (LSTM)               (None, 15)                1020      
_________________________________________________________________
dense_143 (Dense)            (None, 16)                256       
____________________________________________