# TensorFlow for ICT Applications

## Lab session 3: Neural networks

**Content for this lab session:**

1. We analyze a simple medical scenario for predictive diagnostics.
2. How to use tf.data to load the dataset.
3. How to build models with tf.keras.layers.
4. How to train and optimize these models.
5. How to use the high-level Keras training interface.

In [None]:
# Get the dataset from here (wdbc.data):
# https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+%28Diagnostic%29

### COLAB ONLY: Upload the dataset

In [None]:
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data

### Load and preprocess the dataset

In [None]:
# Pandas is a great library to load and process text data
import pandas as pd
df = pd.read_csv('wdbc.data', header=None)

In [None]:
# Inspect the first rows
df.head()

In [None]:
df.describe()

In [None]:
# Separate targets and inputs
import numpy as np
X = data.values[:, 2:].astype(np.float64)
y = data.values[:, 1].astype(np.float64)

In [None]:
# Normalize data
from sklearn import preprocessing
X = preprocessing.scale(X)

In [None]:
# Transform targets into binary numbers
y_num = np.zeros((y.shape[0], 1))
y_num[y == 'B'] = 1.0

In [None]:
# Get a test split of the dataset
from sklearn import model_selection
Xtrain, Xtest, ytrain, ytest = model_selection.train_test_split(X, y, test_size=0.25, stratify=y)

In [None]:
# Sanity check (there should be no NaNs)
import numpy as np
np.sum(np.isnan(X))

### Loading data with tf.data

In [None]:
import tensorflow as tf

In [None]:
# tf.data has many ways of loading data, this one is good for tensors
train_dataset = tf.data.Dataset.from_tensor_slices((Xtrain, ytrain))

In [None]:
# One single pass over the dataset (does nothing)
for xb, yb in train_dataset.batch(4):
    print(xb.shape)
    break

### Defining a model with tf.layers

In [None]:
# Logistic regression can be modeled as a single layer
model = tf.keras.layers.Dense(1, activation='sigmoid')

### Defining an optimizer

In [None]:
# Define an optimizer and loss
opt = tf.keras.optimizers.SGD(learning_rate=0.1)
loss = tf.losses.binary_crossentropy

### Main train loop

In [None]:
epochs = 250

In [None]:
loss_history = []

In [None]:
# Main training loop
for epoch in range(epochs):
    
    for xb, yb in train_dataset.shuffle(1000).batch(32):
        
        # Loss computation
        with tf.GradientTape() as tape:
            ypred = model(xb)
            l = tf.reduce_mean(loss(yb, ypred))
        
        loss_history.append(l.numpy())
        
        # Gradient computation
        grad = tape.gradient(l, model.trainable_variables)
        
        # Optimization
        opt.apply_gradients(zip(grad, model.trainable_variables))

In [None]:
import matplotlib.pyplot as plt
plt.plot(loss_history)
plt.show()