# Tensorflow learning

## 1 Basics

https://www.tensorflow.org/guide/basics

### 1.1 Tensors

In [None]:
import tensorflow as tf

In [None]:
x = tf.constant([[1., 2., 3.],
                 [4., 5., 6.]])

print(x)
print(x.shape)
print(x.dtype)

In [None]:
tf.nn.softmax(x, axis= -1)

In [None]:
if tf.config.list_physical_devices('GPU'):
  print("TensorFlow **IS** using the GPU")
else:
  print("TensorFlow **IS NOT** using the GPU")

### 1.2 Variables

`tf.Tensor` zijn onveanderlijk <-> `tf.Variable` zijn veranderlijk = nodig voor modelgewichten

In [None]:
var = tf.Variable([0.0, 0.0, 0.0])
var.assign([1, 2, 3])

### 1.3 Automatic differentiation

Use calculus to compute the gradients!

In [None]:
x = tf.Variable(1.0) #variable since mutable

def f(x):
  y = x**2 + 2*x - 5
  return y

In [None]:
f(x)

In [None]:
with tf.GradientTape() as tape: #only works on variables
  y = f(x)

g_x = tape.gradient(y, x)  # g(x) = dy/dx

g_x

Also works for non scalars

### 1.4 Graphs and tf.function

In [None]:
#seperate the pure tensorflow code!
class test(tf.Module):
  @tf.function #compiles to tf graph
  def my_func(x):
    print('Tracing.\n')
    return tf.reduce_sum(x)

In [None]:
x = tf.constant([1, 2, 3])
test_obj = test()
# test_obj.my_func(x)

You can export for system not having python!

In [None]:
tf.saved_model.save(test_obj, 'data/')

### 1.5 Modules, layers and models

`tf.Module` is a class for managing your `tf.Variable` objects, and the `tf.function` objects that operate on them

In [None]:
class MyModule(tf.Module):
  def __init__(self, value):
    self.weight = tf.Variable(value)

  @tf.function
  def multiply(self, x):
    return x * self.weight

In [None]:
mod = MyModule(3)
mod.weight.numpy()

In [None]:
mod.multiply(tf.constant([1,2,3]))

In [None]:
save_path = './data'
tf.saved_model.save(mod, save_path)

In [None]:
reloaded = tf.saved_model.load(save_path)
reloaded.multiply(tf.constant([1,2,3]))

In [None]:
reloaded.weight

### 1.6 Training loops

In [None]:
import matplotlib
from matplotlib import pyplot as plt

matplotlib.rcParams['figure.figsize'] = [9, 6]

In [None]:
x = tf.linspace(-2, 2, 201)
x

In [None]:
x = tf.cast(x, tf.float32) #from float64 to float32
x

In [None]:
def f(x):
  y = x**2 + 2*x - 5
  return y

y = f(x) + tf.random.normal(shape=[201])

plt.plot(x.numpy(), y.numpy(), '.', label='Data')
#so .numpy() is method to convert to numpy!
plt.plot(x, f(x), label='Ground truth')
plt.legend()

In [None]:
x.numpy()

In [None]:
class Model(tf.Module):

  def __init__(self): #initialise the model! 
    # Randomly generate weight and bias terms
    rand_init = tf.random.uniform(shape=[3], minval=0., maxval=5., seed=22)
    # Initialize model parameters
    #Needs to be variables since needs to have possibility to update 
    self.w_q = tf.Variable(rand_init[0])
    self.w_l = tf.Variable(rand_init[1])
    self.b = tf.Variable(rand_init[2])
    print('I am initalised')

  @tf.function #always need this decorator!
  def __call__(self, x): #this allows calss to act like function
    # Quadratic Model : quadratic_weight * x^2 + linear_weight * x + bias
    print('I am being called as a function')
    return self.w_q * (x**2) + self.w_l * x + self.b
  

In [None]:
model_instance = Model()

In [None]:
y_mod = model_instance(x)
# y is a tensor

In [None]:
plt.figure()
plt.plot(x, y, '.', label='Data')
plt.plot(x, f(x), label='Ground truth')
plt.plot(x, model_instance(x), label='Predictions')
plt.title('Before training')
plt.legend()

In [None]:
def mse_loss(y_pred, y):
  return tf.reduce_mean(tf.square(y_pred - y))

In [None]:
batch_size = 32 #standard batch size!
dataset = tf.data.Dataset.from_tensor_slices((x,y))
list(dataset.as_numpy_iterator()) #so all pairs of features and outputs

"For perfect shuffling, a buffer size greater than or equal to the full size of the dataset is required." Here equal to is uzsed

In [None]:
list(dataset.shuffle(buffer_size = x.shape[0]).as_numpy_iterator())

So clearly this just shuffeled the above dataset!

In [None]:
dataset = dataset.shuffle(buffer_size = x.shape[0]).batch(batch_size)
batch_list = list(dataset.as_numpy_iterator())

In [None]:
batch_list[0]

Example trainig

In [None]:
model_instance.variables

In [None]:
epochs = 200
learning_rate = 0.01
losses = []

# Format training loop
for epoch in range(epochs):
    for x_batch, y_batch in dataset:
        with tf.GradientTape() as tape:
            batch_loss = mse_loss(model_instance(x_batch), y_batch) #so cacluate loss
        grads = tape.gradient(batch_loss, model_instance.variables)
        for g,v in zip(grads, model_instance.variables):
            v.assign_sub(learning_rate*g)
    loss = mse_loss(model_instance(x),y) #loss per epoch!
    losses.append(loss)
    if epoch % 10 == 0:
        print(f'Mean squared error for step {epoch}: {loss.numpy():0.3f}')

# Plot model results
print("\n")
plt.plot(range(epochs), losses)
plt.xlabel("Epoch")
plt.ylabel("Mean Squared Error (MSE)")
plt.title('MSE loss vs training iterations');


In [None]:
plt.figure()
plt.plot(x, y, '.', label='Data')
plt.plot(x, f(x), label='Ground truth')
plt.plot(x, model_instance(x), label='Predictions')
plt.title('After training')
plt.legend()

A lot of these things are availbale with Keras API!

- start with `tf.keras.Sequential` for sequential group of layers! https://www.tensorflow.org/api_docs/python/tf/keras/Sequential 
- `tf.keras.layers.Dense` is the standard linear regression layer: $Y = WX + \mathbf{b}$. Note that you CAN use an activatio function here by specifying e..g 'acitvation = relu' https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dense 

Here we want both $x$ and $x^2$ as input!


In [None]:
test_fun = lambda x: tf.stack([x, x**2], axis=1)
test_fun(tf.constant([3,4,5]))

In [None]:
new_model = tf.keras.Sequential([
    tf.keras.layers.Lambda(lambda x: tf.stack([x, x**2], axis=1)), #Wraps arbitrary expressions as a Layer object.! 
    tf.keras.layers.Dense(units=1, kernel_initializer=tf.random.normal)]) #units1

In [None]:
new_model.compile(
    optimizer = tf.keras.optimizers.SGD(learning_rate = 0.01), #SGD = Stochastic Gradient Descent
    loss = tf.keras.losses.MSE
)

history = new_model.fit(x, y,
                         epochs = 100,
                         batch_size = 32,
                         )

In [None]:
plt.plot(history.epoch, history.history['loss'])
plt.xlabel('Epoch')
plt.ylim([0, max(plt.ylim())])
plt.ylabel('Loss [Mean Squared Error]')
plt.title('Keras training progress');

## 2 Tensors

https://www.tensorflow.org/guide/tensor

tensors may have 0 (scalr) or 1 (vector),2, 3... (matrix) axes

`np.array` for tensor to numpy array

`.shape` voor vomr

## 3 Variables

https://www.tensorflow.org/guide/variable


## 4 Automatic differentiation

https://www.tensorflow.org/guide/autodiff

Needed for backpropagation:

Network: ${\displaystyle g(x):=f^{L}(W^{L}f^{L-1}(W^{L-1}\cdots f^{1}(W^{1}x)\cdots ))}$

Cost: ${\displaystyle C(y_{i},g(x_{i}))}$

"Backpropagation efficiently computes the gradient by avoiding duplicate calculations and not computing unnecessary intermediate values, by computing the gradient of each layer – specifically, the gradient of the weighted input of each layer, denoted by  ${\displaystyle \delta ^{l}}$ – from back to front."

*TensorFlow "records" relevant operations executed inside the context of a `tf.GradientTape` onto a "tape". TensorFlow then uses that tape to compute the gradients of a "recorded" computation using reverse mode differentiation.*

In [None]:
x = tf.Variable(3.0)

with tf.GradientTape() as tape:
  y = x**2

# dy = 2x * dx
dy_dx = tape.gradient(y, x)  #(taget, source)
dy_dx.numpy() #dy/dx == 2*x = 3

In [None]:
layer = tf.keras.layers.Dense(2, activation='relu')
x = tf.constant([[1., 2., 3.]])

with tf.GradientTape() as tape:
  # Forward pass
  y = layer(x)
  loss = tf.reduce_mean(y**2)

# Calculate gradients with respect to every trainable variable
grad = tape.gradient(loss, layer.trainable_variables)
grad

In [None]:
for var, g in zip(layer.trainable_variables, grad):
  print(f'{var.name}, shape: {g.shape}')

So both for bias and the $w$ values a gradient!

# Keras

## 1 The Sequential model

https://www.tensorflow.org/guide/keras/sequential_model



In [None]:
from tensorflow import keras
from keras import layers


goed voor 'plain stack', dus één input en één output tensor per laag!

In [None]:
model = keras.Sequential(
    [
    layers.Dense(2, activation = "relu", name = 'layer1'), #first number = dimensionality of the output space!
    layers.Dense(3, activation = "relu", name = 'layer2'),
    layers.Dense(4, name = 'layer3'),
    ]
)

x = tf.ones((3,3))
print(x)
y = model(x)
print(y)

you can equivalent also stack the layers (one in the other)

In [None]:
model.layers

alternative below!

In [None]:
model_alt = keras.Sequential()
model_alt.add(layers.Dense(2, activation = 'relu'))
model_alt.add(layers.Dense(3, activation = 'relu'))
model_alt.add(layers.Dense(4))

`model.pop()` to remove a layer

In [None]:
model_alt.weights

Before you give an input, no weights are given as these depend on the input size of your vector!

In [None]:
x.shape

In [None]:
y = model_alt(x)
#now the model has been build
model_alt.summary()

You can define the shape of your input in advance!

In [None]:
model = keras.Sequential()
model.add(keras.Input(shape=(4,)))
model.add(layers.Dense(2, activation="relu"))

model.summary()

In [None]:
#equivalent
model = keras.Sequential()
model.add(layers.Dense(2, activation='relu', input_shape = (4,)))
model.summary()

**In general, it's a recommended best practice to always specify the input shape of a Sequential model in advance if you know what it is.**

## 2 RNN in Keras

https://www.tensorflow.org/guide/keras/rnn

In [None]:
import numpy as np

In [None]:
model = keras.Sequential()
model.add(layers.Embedding(input_dim = 1000, output_dim = 64))
#1000 characters input vocab to output embedding

model.add(layers.LSTM(128)) #128 internal units

model.add(layers.Dense(10))

model.summary()


In [None]:
model_test = keras.Sequential()
model_test.add(layers.LSTM(5, input_shape = (100,11,)))
model_test.summary()

Default: only return last value of the OUTPUT so that output is of size `(batch_size, units)`
If you want value for every timestep set `return_sequences = True` and then you get output of size `(batch_size, timesteps, units)`

In [None]:
model = keras.Sequential()
model.add(layers.Embedding(input_dim=1000, output_dim=64))

# The output of GRU will be a 3D tensor of shape (batch_size, timesteps, 256)
model.add(layers.GRU(256, return_sequences=True))

# The output of SimpleRNN will be a 2D tensor of shape (batch_size, 128)
model.add(layers.SimpleRNN(128))

model.add(layers.Dense(10))

model.summary()

If you also want to retrieve the (FINAl) internal states, you should specify this!

In [None]:
encoder_vocab = 1000
decoder_vocab = 2000

encoder_input = layers.Input(shape=(None,))
encoder_embedded = layers.Embedding(input_dim=encoder_vocab, output_dim=64)(
    encoder_input
)

# Return states in addition to output
output, state_h, state_c = layers.LSTM(64, return_state=True, name="encoder")(
    encoder_embedded
)
encoder_state = [state_h, state_c]

decoder_input = layers.Input(shape=(None,))
decoder_embedded = layers.Embedding(input_dim=decoder_vocab, output_dim=64)(
    decoder_input
)

# Pass the 2 states to a new LSTM layer, as initial state
decoder_output = layers.LSTM(64, name="decoder")(
    decoder_embedded, initial_state=encoder_state
)
output = layers.Dense(10)(decoder_output)

model = keras.Model([encoder_input, decoder_input], output)
model.summary()

RNN cell = 1 timpestap at a time processes <=> RNN default all at the same time!

`RNN(LSTMCel(10))` is a RNN 'for loop' around the `LSTMCel` structure!! 

Maybe useful: https://www.tensorflow.org/guide/keras/rnn#cross-batch_statefulness however not sure that this is the goal...

# Basic regression with Keras

https://www.tensorflow.org/tutorials/keras/regression

# Frederik Kratzert LSTM for rainfall runoff modellin

