In [1]:
import random
import numpy as np

from keras.models import Sequential
from keras.optimizers import Adam
from keras.layers import Dense, LSTM

# remove Deprecation Warnings
import os
import tensorflow as tf
from tensorflow.python.util import deprecation
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
deprecation._PRINT_DEPRECATION_WARNINGS = False
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

Using TensorFlow backend.


### Learning simplest test: learn to multiply by 2

In [2]:
x,y = np.arange(10), 2*np.arange(10)
print("input:",x)
print("target:", y)
w = -3.0                 # initialize the weight 
alpha = 0.01             # learning rate

input: [0 1 2 3 4 5 6 7 8 9]
target: [ 0  2  4  6  8 10 12 14 16 18]


In [3]:
i = np.random.randint(len(x))        # choose i.i.d training sample 
w += alpha*(y[i]-w*x[i])             # w = w + alpha*error  
error = np.sum(np.power(x[i]*w - y[i],2))
print("Current w:", w, "  Error:", error)

Current w: -2.75   Error: 564.0625


#### Test Phase: Samples not in the training set!

In [4]:
x = np.random.uniform(10,size=5)
y = 2*x                                 # usually DataSet is divided between training set and test set
print("Test Set", x)
error = np.sum(np.power(x*w-y,2))
print("Error on test set: ", error)

Test Set [7.56361447 8.12029168 6.42409207 5.02418941 9.36334402]
Error on test set:  6257.281820304739


### Learning simplest test with biass: learn to multiply and sum

In [5]:
x = np.arange(10)
y = 2*x+1
print("input:",x)
print("target:", y)

input: [0 1 2 3 4 5 6 7 8 9]
target: [ 1  3  5  7  9 11 13 15 17 19]


In [6]:
# one weight cannot learn biass
i = np.random.randint(len(x))     # choose i.i.d sample 
w += alpha*(y[i]-w*x[i])          # w = w + alpha*error    
error = np.sum(np.power(x*w - y,2))
print("Current w:", w, "  Error:", error)

Current w: -2.55   Error: 6319.7125


In [7]:
# lets consider two weights
w = [0.1, 3.3]  # w0 = biass, w1 = the weight
alpha = 0.01

In [8]:
def g(x,w):
    return w[1]*x + w[0]   # output computation

gsteps, error = 0, 1000000
while gsteps < 500 and error > 0.05:
    i = np.random.randint(len(x))               # choose random training sample: must be i.i.d. 
    w[0] += alpha*(y[i]-g(x[i],w))      
    w[1] += alpha*(y[i]-g(x[i],w))*x[i]         # the error in the direction of the derivative of the output wrt w[1] -> x
    error = np.sum(np.power(g(x,w) - y,2))
    print(gsteps, "w:", w, "  Error:", error)
    gsteps += 1

0 w: [0.08300000000000002, 3.26634]   Error: 360.9286935459999
1 w: [0.041516400000000016, 3.102064944]   Error: 260.26483749459794
2 w: [0.051101236000000015, 3.102064944]   Error: 261.0326980805787
3 w: [-0.016554322439999998, 2.6332119240108]   Error: 66.67418035327486
4 w: [-0.03171725617603201, 2.5731667064161132]   Error: 51.051487519166315
5 w: [-0.06725342012756072, 2.291720287920006]   Error: 7.623452896927059
6 w: [-0.06824969744308534, 2.287775029750528]   Error: 7.346294507387979
7 w: [-0.0690782016586756, 2.284494153056791]   Error: 7.123120324902607
8 w: [-0.0726121272949284, 2.2670012211573396]   Error: 6.047531301129443
9 w: [-0.08324610371456628, 2.1827801279138073]   Error: 3.4360376068640437
10 w: [-0.07241364267742062, 2.1827801279138073]   Error: 3.3807228800018665
11 w: [-0.07813971776288907, 2.1317607989022833]   Error: 3.786621777813439
12 w: [-0.07526396851939718, 2.148842749408625]   Error: 3.4717906873426054
13 w: [-0.07493032129280695, 2.1511549246888952]   

247 w: [0.41206883977668873, 2.0685110395249677]   Error: 1.169173068559173
248 w: [0.4172630409836722, 2.0736532987198815]   Error: 1.0790490929681578
249 w: [0.4223538775866366, 2.0786932269568164]   Error: 1.0105328914599156
250 w: [0.4218348806542249, 2.074582771252116]   Error: 1.0471830202694545
251 w: [0.42761653184768267, 2.074582771252116]   Error: 1.0194713552869665
252 w: [0.4333403665292058, 2.074582771252116]   Error: 0.9926953619450929
253 w: [0.43751530743887146, 2.0828491542532537]   Error: 0.926000971074326
254 w: [0.43816920510928753, 2.086733306415525]   Error: 0.9148485329554888
255 w: [0.44378751305819464, 2.086733306415525]   Error: 0.8958899166183111
256 w: [0.4484823048634574, 2.0913811503027353]   Error: 0.8857654247726572
257 w: [0.446686989790604, 2.0771622549257365]   Error: 0.9159075851834046
258 w: [0.45222011989269795, 2.0771622549257365]   Error: 0.8934080746100294
259 w: [0.45769791869377097, 2.0771622549257365]   Error: 0.8717367156738314
260 w: [0.461

383 w: [0.6248769475864395, 2.0802668213760467]   Error: 0.5334662356672127
384 w: [0.6230095006142519, 2.067325413858786]   Error: 0.42874737610151903
385 w: [0.6207201183608186, 2.0469270179806958]   Error: 0.4642809715962476
386 w: [0.6235743768175965, 2.052578449725116]   Error: 0.42357429656239404
387 w: [0.6247097105631647, 2.058198351765679]   Error: 0.4080218706047646
388 w: [0.6249707123515923, 2.059748702388939]   Error: 0.40721708899624737
389 w: [0.6239411090369612, 2.051594244137061]   Error: 0.4266405320014356
390 w: [0.6261538706224797, 2.058166146046051]   Error: 0.4047829696618049
391 w: [0.6298923319162549, 2.058166146046051]   Error: 0.3965412146042557
392 w: [0.6312667627552503, 2.0636088921684728]   Error: 0.40185383850155637
393 w: [0.6311375615975894, 2.0628414372919672]   Error: 0.3998870420035451
394 w: [0.629798870998256, 2.0522390077452477]   Error: 0.4077246611810143
395 w: [0.6308889319010111, 2.057634809213885]   Error: 0.3945064683955589
396 w: [0.6305456

## Introducing one layer Neural Network in keras (Similar to Perceptron)

In [9]:
class Perceptron_function():
    
    def __init__(self, inputs=2, outputs=1):
        self.inputs, self.outputs = inputs, outputs
        self.learning_rate = 0.001        
        self.model = Sequential(name="PerceptronNetwork")
        self.model.add(Dense(outputs, input_shape=(inputs,), activation="linear", kernel_initializer='random_uniform'))        
        self.model.compile(loss="mse", optimizer=Adam(lr=self.learning_rate))
        self.model.summary()

    def predict(self, s):
        s = s.flatten()         # we can discard input dimension
        s_batch = np.reshape(s, [1, s.shape[0]])
        return self.model.predict(s_batch)[0][0]

    def update(self, inputs, targets):
        inputs = inputs.flatten()
        inputs_batch = np.reshape(inputs, [1, inputs.shape[0]])
        targets_batch = np.array([targets])
        self.model.train_on_batch(inputs_batch, targets_batch)

    def update_batch(self, inputs, targets):
        self.model.train_on_batch(inputs, targets)
        
    def print_weights(self):
        w = self.model.get_weights()
        print("Network weights   w:",w[0].flatten(), "b:",w[1])

### Learn logic gates

In [25]:
def gen_input(igate=0):
    or_gate = {(0,0):0, (0,1):1, (1,0):1, (1,1):1}
    and_gate = {(0,0):0, (0,1):0, (1,0):0, (1,1):1}
    xor_gate = {(0,0):0, (0,1):1, (1,0):1, (1,1):0}
    
    if igate == 0: gate = or_gate
    elif igate == 1: gate = and_gate
    else: gate = xor_gate
    
    key = random.sample(list(gate.keys()),1)[0]
    s = np.array(key) 
    y = gate[key]
    return s,y

perceptron = Perceptron_function(inputs=2)
perceptron.print_weights()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_25 (Dense)             (None, 1)                 3         
Total params: 3
Trainable params: 3
Non-trainable params: 0
_________________________________________________________________
Network weights   w: [ 0.01388923 -0.02316765] b: [0.]


###### And Train

In [11]:
igate = 0 # OR Gate
for _ in range(5000):          # train the network with input-output pairs
    s,y = gen_input(igate)
    perceptron.update(s, y)

perceptron.print_weights()
print("\nTest:\n-----")
error = 0
for _ in range(10):          # test the network with mean squared error (MSE)
    s,y = gen_input(igate)
    print(s,y,perceptron.predict(s))
    error += np.power(perceptron.predict(s)-y,2)  # MSE
    
print("\nMSE error =", error)

Network weights   w: [0.47002763 0.4955667 ] b: [0.26184595]

Test:
--------
[0 1] 1 0.7574127
[1 1] 1 1.2274402
[1 1] 1 1.2274402
[0 0] 0 0.26184595
[1 1] 1 1.2274402
[1 0] 1 0.7318736
[0 0] 0 0.26184595
[1 0] 1 0.7318736
[0 0] 0 0.26184595
[1 0] 1 0.7318736

MSE error = 0.6354010404493629


### Multi Layer NN

In [22]:
class NN_function():
    
    def __init__(self, inputs=4, outputs=1):
        self.inputs, self.outputs = inputs, outputs
        self.model = Sequential(name="NN")
        self.model.add(Dense(8, input_shape=(inputs,), activation="relu"))
        self.model.add(Dense(16, activation="relu"))
        self.model.add(Dense(outputs, activation="linear"))

        self.learning_rate = 0.01        
        self.model.compile(loss="mse", optimizer=Adam(lr=self.learning_rate))
        self.model.summary()

    def predict(self, s):
        s = s.flatten()         # we can discard input dimension
        s_batch = np.reshape(s, [1, s.shape[0]])
        return self.model.predict(s_batch)[0][0]

    def update(self, inputs, targets):
        inputs = inputs.flatten()
        inputs_batch = np.reshape(inputs, [1, inputs.shape[0]])
        targets_batch = np.array([targets])
        self.model.train_on_batch(inputs_batch, targets_batch)
    
    def update_batch(self, inputs, targets):
        self.model.train_on_batch(inputs, targets)
        
    def print_weights(self):
        w = self.model.get_weights()
        print("w:",w[0].flatten(), "b:",w[1])

##### Learn logic gates

In [29]:
# Exercise: Train using NN_function (not the perceptron) all logic gates


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_26 (Dense)             (None, 8)                 24        
_________________________________________________________________
dense_27 (Dense)             (None, 16)                144       
_________________________________________________________________
dense_28 (Dense)             (None, 1)                 17        
Total params: 185
Trainable params: 185
Non-trainable params: 0
_________________________________________________________________
w: [-0.5298358   0.0582848  -1.0668472  -0.31442523  0.7308842   0.34775135
  0.4854556  -0.16777332 -0.46177095  0.42184255  0.8142855  -0.3049459
 -0.8104457   0.65985906 -0.13300207 -0.20622776] b: [ 0.         -0.08586695 -0.02056406  0.         -0.08816142 -0.12429226
 -0.1035414  -0.18486941]

Test:
-----
[0 0] 0 0.0
[1 0] 1 1.0
[0 0] 0 0.0
[0 0] 0 0.0
[1 0] 1 1.0
[0 0] 0 0.0
[1 1] 0 0.0
[1 1] 0 0.0

### Train a Neural Network to Sum 
$f(x=[...]) = \sum_{i \in x}{i}$

In [14]:
# Exercise: Make a NN (use NN_function) learn to sum 4 input numbers 

### Define an LSTM Network 

In [15]:
class LSTM_function():
    
    def __init__(self, inputs=4, outputs=1, steps=10):
        self.inputs, self.outputs = inputs, outputs
        self.model = Sequential(name="LSTM_network")
        self.model.add(LSTM(steps, activation='relu', input_shape=(steps, inputs), return_sequences=False, stateful=False))
        self.model.add(Dense(16, activation="relu"))
        self.model.add(Dense(outputs, activation='linear'))
        
        self.learning_rate = 0.001        
        self.model.compile(loss="mse", optimizer=Adam(lr=self.learning_rate))
        self.model.summary()

    def predict(self, s):
        s_batch = np.reshape(s, [1]+list(s.shape))
        return self.model.predict(s_batch)[0][0]

    def update(self, s, y):
        s_batch = np.reshape(s, [1]+list(s.shape))
        y_batch = np.reshape(np.array([y]), [1, self.outputs])
        self.model.fit(s_batch, y_batch, verbose=0)

    def update_batch(self, inputs, targets):
        self.model.train_on_batch(inputs, targets)

### Train an LSTM Network to predict the sin function
$f([sin(x_{t-4}),...,sin(x_{t-1})])=sin(x_t)$

In [30]:
def gen_input_sum(nsteps):
    x_ini = np.random.rand()
    x_step = np.random.rand()
    sin_seq = [np.sin(x_ini+i*x_step)  for i in range(nsteps+1)]
    s = np.array(sin_seq[:-1]) 
    s = s[:, np.newaxis]         # array (N,) needs conversion to row vector
    y = sin_seq[-1]  
    return np.array(s),y

nsteps = 15
f_nn = NN_function(inputs=nsteps, outputs=1)
f_lstm = LSTM_function(inputs=1, outputs=1, steps=nsteps)

n = 1000
for _ in range(n):            # train the network with input-output pairs
    s,y = gen_input_sum(nsteps)
    f_nn.update(s, y)
    f_lstm.update(s, y)

error, error_lstm = 0, 0
for _ in range(int(n/10)):         # test the network with mean squared error (MSE)
    s,y = gen_input_sum(nsteps)    
    error += np.power(f_nn.predict(s)-y,2)  # MSE
    error_lstm += np.power(f_lstm.predict(s)-y,2)  # MSE
    
print("MSE errors (NN,LSTM) =", error, error_lstm)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_29 (Dense)             (None, 8)                 128       
_________________________________________________________________
dense_30 (Dense)             (None, 16)                144       
_________________________________________________________________
dense_31 (Dense)             (None, 1)                 17        
Total params: 289
Trainable params: 289
Non-trainable params: 0
_________________________________________________________________
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 15)                1020      
_________________________________________________________________
dense_32 (Dense)             (None, 16)                256       
_________________________________________________________________
dense_33 (De