# Introduction to Neural Network Regression with Tensorflow

There are many definitions for a regression problem but in our case we are going to simplify it: predicting numerical variable based on some othere combination of variables.

In [1]:
%load_ext tensorboard

In [2]:
import tensorflow as tf

In [3]:
from gpu import print_gpu_info
print_gpu_info()

Tensorflow version: 2.10.0
Tensorflow GPU support: True
GPU device(s): [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


## Creating data to view and fit

In [4]:
import numpy as np
import matplotlib.pyplot as plt

# Create features
X = np.array([-7.0, -4.0, -1.0, 2.0, 5.0, 8.0, 11.0, 14.0])

# Create labels
y = np.array([3.0, 6.0, 9.0, 12.0, 15.0, 18.0, 21.0, 24.0])

# Visualize it
plt.scatter(X, y)

<matplotlib.collections.PathCollection at 0x7f9f14fe85b0>

## Input and Ouput Shapes

In [5]:
# Create a demo tensor for housing price prediction problem
house_info = tf.constant(["bedroom", "bathroom", "garage"])
house_price = tf.constant([939700])

house_info, house_price

(<tf.Tensor: shape=(3,), dtype=string, numpy=array([b'bedroom', b'bathroom', b'garage'], dtype=object)>,
 <tf.Tensor: shape=(1,), dtype=int32, numpy=array([939700], dtype=int32)>)

In [6]:
input_shape = X[0].shape
output_shape = y[0].shape
input_shape, output_shape

((), ())

In [7]:
# Turn our np arrays into tensors
X = tf.constant(tf.cast(X, dtype=tf.float32))
y = tf.constant(tf.cast(y, dtype=tf.float32))
X.shape, y.shape

(TensorShape([8]), TensorShape([8]))

## Steps in modeling with Tensorflow

1. **Creating a model** - define the input and output layers, as well as the hidden layers of a deep learning model
2. **Compile a model** - define the loss function - the function wich tells our model how wrong it is - and the optimizer, as well as evaluation metrics
3. **Fitting a model** - letting a model try to find patterns between X and y (features and labels)

In [8]:
tf.random.set_seed(42)

# 1. Create a model using sequential API
model = tf.keras.Sequential([
    tf.keras.layers.Dense(1)
])

# 2. Compile the model
model.compile(
    loss=tf.keras.losses.mae, # mae = mean absolute error
    optimizer=tf.keras.optimizers.SGD(),
    metrics=["mae"]
)

# 3. fit the model
model.fit(X, y, epochs=5)

Epoch 1/5


ValueError: in user code:

    File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 993, in train_step
        y_pred = self(x, training=True)
    File "/usr/local/lib/python3.8/dist-packages/keras/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/usr/local/lib/python3.8/dist-packages/keras/engine/input_spec.py", line 250, in assert_input_compatibility
        raise ValueError(

    ValueError: Exception encountered when calling layer "sequential" "                 f"(type Sequential).
    
    Input 0 of layer "dense" is incompatible with the layer: expected min_ndim=2, found ndim=1. Full shape received: (None,)
    
    Call arguments received by layer "sequential" "                 f"(type Sequential):
      • inputs=tf.Tensor(shape=(None,), dtype=float32)
      • training=True
      • mask=None


In [None]:
# Try to make a prediction with this model
y_pred = model.predict([17.0])
y_pred

## Improving the model

Because right now it sucks

In [None]:
# 1. Create a model using sequential API
model = tf.keras.Sequential([
    tf.keras.layers.Dense(50, activation=None),
    tf.keras.layers.Dense(1)
])

# 2. Compile the model
model.compile(
    loss=tf.keras.losses.mae, # mae = mean absolute error
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    metrics=["mae"]
)

# 3. fit the model
model.fit(X, y, epochs=100, verbose=0)

In [None]:
# Try to make a prediction with this model
y_pred = model.predict([17.0])
y_pred

## Evaluating a model


In [None]:
X = tf.range(-100, 100, 4, dtype=tf.float32)

y = X + 10.0
y = y + tf.random.uniform(y.shape, 0, 10, dtype=tf.float32)
plt.scatter(X, y)

## The three sets

* **Training set**: The data to train the model
* **Validation set**: The data to tune the model
* **Test set**: The data to evaluate the model

Proportions are usually 70% - 15% - 15% (or 70/30 if you skip the validation set)

In [None]:
# Split the data into train and test sets
X_train = X[:40] # first 40 samples
y_train = y[:40]

X_test = X[40:] # last 10 samples
y_test = y[40:]

In [None]:
# Visualize train data
plt.figure(figsize=(10, 7))
plt.scatter(X_train, y_train, c="b", label="Training data")
plt.scatter(X_test, y_test, c="g", label="Testing data")
plt.legend();

In [None]:
# Build a neural network to model the data
# import datetime
tf.random.set_seed(42)

# 1. Create a model using sequential API
model = tf.keras.Sequential([
    tf.keras.layers.Dense(100),
    tf.keras.layers.Dense(1, name="output_layer"),
], name="linear_regression_model")

# 2. Compile the model
model.compile(
    loss=tf.keras.losses.mae, # mae = mean absolute error
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    metrics=["mae"]
)

# log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
# tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

# # 3. fit the model
model.fit(X_train, y_train, epochs=500, callbacks=[], verbose=0)

## Visualizing the model

In [None]:
model.summary()

* Total params - total number of parameters in the model
* Trainable params - these are the parameters (patterns) the model can update as it trains
* Non trainable params - parameters that are not updated during training (typical when bringin in already learn patterns ot parameters from other models during **transfer learning**)

In [None]:
from tensorflow.keras.utils import plot_model

plot_model(model, show_shapes=True)

## Visualizing model predictions

In [None]:
# Make some predictions
y_pred = model.predict(X_test)
y_pred

In [None]:
def plot_predictions(train_data=X_train,
                     train_labels=y_train,
                     test_data=X_test,
                     test_labels=y_test,
                     predictions=y_pred):
    """
    Plots training, test and predictions
    """
    plt.figure(figsize=(10, 7))
    plt.scatter(train_data, train_labels, c="b", label="Training data")
    plt.scatter(test_data, test_labels, c="g", label="Testing data")
    plt.scatter(test_data, predictions, c="r", label="Predicted data")
    plt.legend();

In [None]:
plot_predictions()

## Evaluating model predictions with regression evaluation metrics

In [None]:
model.evaluate(X_test, y_test)

In [None]:
# Calculate the mean absolute error
mae = tf.metrics.mean_absolute_error(y_true=y_test, y_pred=tf.constant(y_pred, shape=(10,)))
mae

In [None]:
mse = tf.metrics.mean_squared_error(y_true=y_test, y_pred=tf.constant(y_pred, shape=(10,)))
mse

In [None]:
# Tensorboard?
# %tensorboard --logdir logs/fit

### Saving models

In [None]:
model.save("saved_models/model1") # use .h5 for h5 format

## A larger example

In [None]:
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
insurances = pd.read_csv("./datasets/insurance.csv")
insurances.head(5)

### Numerical encoding
We need to encode the data in order to pass it to a dnn

In [None]:
insurances_one_hot = pd.get_dummies(insurances)
insurances_one_hot.head(5)

In [None]:
X = insurances_one_hot.drop("charges", axis=1)
y = insurances_one_hot["charges"]
X.head(1), y.head(1)

In [None]:
# Create train data

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, train_size=0.80, random_state=42)
len(X), len(X_train), len(X_test)

In [None]:
# Build a dnn
tf.random.set_seed(42)

insurance_model = tf.keras.Sequential([
    tf.keras.layers.Dense(10),
    tf.keras.layers.Dense(1)
])

insurance_model.compile(
    loss=tf.keras.losses.mae,
    optimizer=tf.keras.optimizers.SGD(),
    metrics=["mae"]
)

insurance_model.fit(X_train, y_train, epochs=100, verbose=0)

In [None]:
# Check the results of the model against test data
insurance_model.evaluate(X_test, y_test)

In [None]:
tf.reduce_mean(y_train)

### Improve the model

1. Add an extra layer
2. Train for longer
3. ...

In [None]:
insurance_model_2 = tf.keras.Sequential([
    tf.keras.layers.Dense(100),
    tf.keras.layers.Dense(10),
    tf.keras.layers.Dense(1)
])

insurance_model_2.compile(
    loss=tf.keras.losses.mae,
    optimizer=tf.keras.optimizers.Adam(),
    metrics=["mae"]
)

insurance_model_2.fit(X_train, y_train, epochs=100, verbose=0)

In [None]:
insurance_model_2.evaluate(X_test, y_test)

In [None]:
insurance_model_3 = tf.keras.Sequential([
    tf.keras.layers.Dense(100),
    tf.keras.layers.Dense(10),
    tf.keras.layers.Dense(1)
])

insurance_model_3.compile(
    loss=tf.keras.losses.mae,
    optimizer=tf.keras.optimizers.Adam(),
    metrics=["mae"]
)

history = insurance_model_3.fit(X_train, y_train, epochs=200, verbose=0)

In [None]:
insurance_model_3.evaluate(X_test, y_test)

In [None]:
# Plot history (the loss/training curve)
pd.DataFrame(history.history).plot()
plt.ylabel("loss")
plt.xlabel("epochs")

### Preprocessing data (normalization and standardization)

In terms of scaling values neural networks tend to prefer normalization.

If you're not sure on which to use, you could try both and see wich performs better.

In [None]:
insurance = pd.read_csv("./datasets/insurance.csv")
insurance.head()

Use sklearn to prepare the data

In [None]:
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.model_selection import train_test_split

# Create a column transformer
ct = make_column_transformer(
    (MinMaxScaler(), ["age", "bmi", "children"]), # turn these columns in values from 0 to 1
    (OneHotEncoder(handle_unknown="ignore"), ["sex", "smoker", "region"])
)

# Create X and y
X = insurance.drop("charges", axis=1)
y = insurance["charges"]

# Split the sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, train_size=0.80, random_state=42)
len(X), len(X_train), len(X_test)

# Fit the column transformer to train our data
ct.fit(X_train)

# Transform trainig and test data with normalization (MinMaxScaler and OneHotEncoder)
X_train_normal = ct.transform(X_train)
X_test_normal = ct.transform(X_test)

In [None]:
# What does our data look like now?
X_train.loc[0], X_train_normal[0], X_train.shape, X_train_normal.shape

In [None]:
from plot import plot_loss_curve

tf.random.set_seed(42)

# Create the model
insurance_model_4 = tf.keras.Sequential([
    tf.keras.layers.Dense(100),
    tf.keras.layers.Dense(10),
    tf.keras.layers.Dense(1)
])

# Compile
insurance_model_4.compile(
    loss=tf.keras.losses.mae,
    optimizer=tf.keras.optimizers.Adam(),
    metrics=["mae"]
)

halt_when_no_longer_learning = tf.keras.callbacks.EarlyStopping(
    monitor="loss", min_delta=5, patience=5, verbose=0,
    mode='auto', baseline=None, restore_best_weights=False
)

# Fit
history = insurance_model_4.fit(
    X_train_normal, y_train,
    epochs=200, verbose=0,
    callbacks=[halt_when_no_longer_learning]
)

plot_loss_curve(history)

In [None]:
insurance_model_4.evaluate(X_test_normal, y_test)

In [None]:
X_test_normal[0], insurance_model_4.predict(X_test_normal)[0]
# X_test_normal[0].shape

In [None]:
y_test.head(1)