In [None]:
import tensorflow as tf
import matplotlib.pylab as plt
import numpy as np

In [None]:
#Let's try to fit a sinusoid using a neural network

x = np.random.uniform(0,1,(60,)).astype('float32')
y = np.sin(2.*np.pi*x)
x_val = np.random.uniform(0,1,(20,)).astype('float32')
y_val = np.sin(2.*np.pi*x_val)
x_test = np.random.uniform(0,1,(20,)).astype('float32')
y_test = np.sin(2.*np.pi*x_test)

plt.plot(x,y,'o')

In [None]:
#We'll first build one from scratch
#Let's reshape our data so that it mimics more common use cases
#Usually, the first index is reserved for the sample index
#In this example, we have 60 training samples of the curve. Each sample has dimension 1

x = np.reshape(x,(60,1))
y = np.reshape(y,(60,1))
x_val = np.reshape(x_val,(20,1))
y_val = np.reshape(y_val,(20,1))
x_test = np.reshape(x_test,(20,1))
y_test = np.reshape(y_test,(20,1))

In [None]:
#Let's make a two hidden layer NN. We'll need three weight matrices and 
#three bias vectors. We'll make the width of both hidden layers 10

W1 = tf.Variable(tf.random.normal((10,1)))
W2 = tf.Variable(tf.random.normal((10,10)))
W3 = tf.Variable(tf.random.normal((1,10)))

b1 = tf.Variable(tf.random.normal((10,)))
b2 = tf.Variable(tf.random.normal((10,)))
b3 = tf.Variable(tf.random.normal((1,)))

p = [W1,W2,W3,b1,b2,b3]

In [None]:
#We'll make a function to evaluate the neural network

def NN(x):
    y = tf.nn.elu(tf.einsum('ij,bj->bi',W1,x)+b1)
    y = tf.nn.elu(tf.einsum('ij,bj->bi',W2,y)+b2)
    y = tf.einsum('ij,bj->bi',W3,y)+b3
    return y

#at initialization we get
plt.plot(x_test,y_test,'o')
plt.plot(x_test,NN(x_test),'o')

In [None]:
#Defining a loss function

def loss(x,y):
    return tf.reduce_mean((NN(x)-y)**2)

loss(x,y)

In [None]:
#We can compute the gradient of the parameters using backprop

def grad(x,y):
    with tf.GradientTape() as tape:
        tape.watch(p)
        loss_ = loss(x,y)
    return tape.gradient(loss_,p)

In [None]:
#define a function to update the parameters using sgd
alpha=.02
@tf.function
def update(x,y):
    [pi.assign(pi-alpha*gi) for pi,gi in zip(p,grad(x,y))]

In [None]:
#update using minibatching. tracking validation loss

batch_size=10
batches = 100//10
epochs = 200

loss_val = []
for _ in range(epochs):
    for i in range(batches):
        update(x[i*batch_size:(i+1)*batch_size],y[i*batch_size:(i+1)*batch_size])
    loss_val.append(loss(x_val,y_val).numpy())

In [None]:
fig,ax = plt.subplots(1,2,figsize=(8,5))
ax[0].plot(x_test,y_test,'o')
ax[0].plot(x_test,NN(x_test),'o')
ax[1].loglog(loss_val)
print('test loss: ',loss(x_test,y_test).numpy())

In [None]:
#define a function to update the parameters using Adam
opt = tf.keras.optimizers.Adam(1e-2)
@tf.function
def update_adam(x,y):
    opt.apply_gradients(zip(grad(x,y),p))

In [None]:
#update using minibatching. tracking validation loss

batch_size=10
batches = 100//10
epochs = 200

loss_val = []
for _ in range(epochs):
    for i in range(batches):
        update_adam(x[i*batch_size:(i+1)*batch_size],y[i*batch_size:(i+1)*batch_size])
    loss_val.append(loss(x_val,y_val).numpy())

In [None]:
fig,ax = plt.subplots(1,2,figsize=(8,5))
ax[0].plot(x_test,y_test,'o')
ax[0].plot(x_test,NN(x_test),'o')
ax[1].loglog(loss_val)
print('test loss: ',loss(x_test,y_test).numpy())

In [None]:
#Now let's try out the Keras library
#We'll need an Input layer for `x` and three NN layers, 2 hidden and 1 output

Layer0 = tf.keras.layers.Input((1,))
Layer1 = tf.keras.layers.Dense(10,activation='elu')
Layer2 = tf.keras.layers.Dense(10,activation='elu')
Layer3 = tf.keras.layers.Dense(1)

In [None]:
#We combine these into a model,

model = tf.keras.models.Sequential([Layer0,Layer1,Layer2,Layer3])
model.summary()

In [None]:
loss=tf.keras.losses.mse
opt = tf.keras.optimizers.Adam(1e-2)
metrics = [tf.keras.metrics.RootMeanSquaredError()]
model.compile(loss =loss,optimizer=opt,metrics=metrics)
batch_size=10

In [None]:
model.fit(x=x,y=y,validation_data=(x_val,y_val),batch_size=10,epochs=200)

In [None]:
val_rmse = model.history.history['val_root_mean_squared_error']
fig,ax = plt.subplots(1,2,figsize=(8,5))
ax[0].loglog(val_rmse)
ax[1].plot(x_test,y_test,'o')
ax[1].plot(x_test,model(x_test),'o')

In [None]:
#we can also mix and match Keras with the low level interface
opt = tf.keras.optimizers.Adam(1e-2)
@tf.function
def update_model(x,y):
    with tf.GradientTape() as tape:
        tape.watch(model.trainable_variables)
        loss_ = loss(model(x),y)
    grad_ = tape.gradient(loss_,model.trainable_variables)
    opt.apply_gradients(zip(grad_,model.trainable_variables))

#update using minibatching. tracking validation loss
batch_size=10
batches = 100//10
epochs = 200

loss_val = []
for _ in range(epochs):
    for i in range(batches):
        update_model(x[i*batch_size:(i+1)*batch_size],y[i*batch_size:(i+1)*batch_size])
    loss_val.append(tf.reduce_sum(loss(model(x_val),y_val)).numpy())

fig,ax = plt.subplots(1,2,figsize=(8,5))
ax[0].plot(x_test,y_test,'o')
ax[0].plot(x_test,model(x_test),'o')
ax[1].loglog(loss_val)
print('test loss: ',loss(x_test,y_test).numpy())

In [None]:
#subclassing keras.Model let's you have more flexibility vs. tf.keras.models.Sequential

class SkipLayer(tf.keras.Model):
    def __init__(self,width):
        super().__init__()
        self.width=width

    def build(self,input_shape):
        self.Layer1 = tf.keras.layers.Dense(self.width,activation='elu')
        self.Layer2 = tf.keras.layers.Dense(self.width,activation='elu')
        self.Layer3 = tf.keras.layers.Dense(1)
        self.Layer4 = tf.keras.layers.Dense(self.width,activation='elu')

        
    def call(self,x):
        y = self.Layer1(x)
        return self.Layer3(self.Layer2(y) + self.Layer4(y))
        

model = SkipLayer(4)
model(x)
model.summary()

In [None]:
#You can also subclass a Layer

class Residual(tf.keras.layers.Layer):
    def __init__(self,activation):
        super().__init__()
        self.activation = activation
    def build(self, input_shape):
        self.w = self.add_weight(
            shape=(input_shape[-1], input_shape[-1]),
            initializer="random_normal",
            trainable=True,
        )
        self.b = self.add_weight(
            shape=(input_shape[-1],), initializer="random_normal", trainable=True
        )

    def call(self, inputs):
        return self.activation(tf.matmul(inputs, self.w) + self.b)

res = Residual(tf.nn.elu)

res(x)