In [None]:
import tensorflow as tf
print(tf.__version__)

# Data Pipeline

 ## Coding tutorials
 #### [1. Keras datasets](#coding_tutorial_1)
 #### [2. Dataset generators](#coding_tutorial_2)
 #### [3. Keras image data augmentation](#coding_tutorial_3)
 #### [4. The Dataset class](#coding_tutorial_4)
 #### [5. Training with Datasets](#coding_tutorial_5)

***
<a id="coding_tutorial_1"></a>
## Keras datasets

For a list of Keras datasets and documentation on recommended usage, see [this link](https://keras.io/datasets/).

In [None]:
import numpy as np
import matplotlib.pyplot as plt

#### Load the CIFAR-100 Dataset

In [None]:
from tensorflow.keras.datasets import cifar100

In [None]:
# Load the CIFAR-100 dataset

(train_images, train_labels), (test_images, test_labels) = cifar100.load_data(label_mode = 'fine')

In [None]:
# Confirm that reloading the dataset does not require a download

for i in train_images, train_labels, test_images, test_labels:
    print(i.shape)

#### Examine the Dataset

#### Import the data

The additional files required for this tutorial can be downloaded from the following link:

cifar100_fine_labels: https://drive.google.com/open?id=1WFW1cj8v_5z1pGvq6htQyFUPrJP-Z2v5

cifar100_coarse_labels: https://drive.google.com/open?id=1Jmt7o-6sP85D7iRORk5tJqJMN3wCP12p

You should store these files in Drive for use in this Colab notebook.

In [None]:
# Run this cell to connect to your Drive folder

from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
# Examine the shape of the data.


In [None]:
# Examine one of the images and its corresponding label

plt.imshow(train_images[500])
print(train_labels[500])

In [None]:
# Load the list of labels from a JSON file

import json

with open('data/cifar100_fine_labels.json', 'r') as fine_labels:
    cifar100_fine_labels = json.load(fine_labels)

The list of labels for the CIFAR-100 dataset are available [here](https://www.cs.toronto.edu/~kriz/cifar.html).

In [None]:
# Print a few of the labels

cifar100_fine_labels[:10]

In [None]:
# Print the corresponding label for the example above

cifar100_fine_labels[41]

#### Load the data using different label modes

In [None]:
# Display a few examples from category 87 (index 86) and the list of labels

examples = train_images[(train_labels.T == 86)[0]][:3]
fig, ax = plt.subplots(1,3)
ax[0].imshow(examples[0])
ax[1].imshow(examples[1])
ax[2].imshow(examples[2])

In [None]:
# Reload the data using the 'coarse' label mode


(train_images, train_labels), (test_images, test_labels) = cifar100.load_data(label_mode = 'coarse')

In [None]:
# Display three images from the dataset with the label 6 (index 5)

examples = train_images[(train_labels.T == 5)[0]][:3]
fig, ax = plt.subplots(1,3)
ax[0].imshow(examples[0])
ax[1].imshow(examples[1])
ax[2].imshow(examples[2])

In [None]:
# Load the list of coarse labels from a JSON file

with open('data/cifar100_coarse_labels.json', 'r') as coarse_labels:
    cifar100_coarse_labels = json.load(coarse_labels)

In [None]:
# Print a few of the labels

cifar100_coarse_labels[:10]

In [None]:
# Print the corresponding label for the example above

print(cifar100_fine_labels)
cifar100_coarse_labels[5]

#### Load the IMDB Dataset

In [None]:
from tensorflow.keras.datasets import imdb

In [None]:
# Load the IMDB dataset

(train_data, train_labels), (test_data, test_labels) = imdb.load_data()

In [None]:
# Print an example from the training dataset, along with its corresponding label

print(train_data[0])
print(train_labels[0])

In [None]:
# Get the lengths of the input sequences

sequence_lengths = [len(seq) for seq in train_data]

In [None]:
# Determine the maximum and minimum sequence length

print(np.max(sequence_lengths))
print(np.min(sequence_lengths))

#### Using Keyword Arguments

In [None]:
# Load the data ignoring the 50 most frequent words, use oov_char=2 (this is the default)

(train_data, train_labels), (test_data, test_labels) = imdb.load_data(skip_top = 50, oov_char = 2)

In [None]:
# Get the lengths of the input sequences

sequence_lengths = [len(seq) for seq in train_data]

In [None]:
# Determine the maximum and minimum sequence length

print(np.max(sequence_lengths))
print(np.min(sequence_lengths))

In [None]:
# Define functions for filtering the sequences

def remove_oov_char(element):
    ''' Filter function for removing the oov_char. '''
    return [word for word in element if word!=2]

def filter_list(lst):
    ''' Run remove_oov_char on elements in a list. '''
    return [remove_oov_char(element) for element in lst]

In [None]:
# Remove the oov_char from the sequences using the filter_list function

train_data = filter_list(train_data)

In [None]:
# Get the lengths of the input sequences

sequence_lengths = [len(seq) for seq in train_data]

In [None]:
# Determine the maximum and minimum sequence length

print(np.max(sequence_lengths))
print(np.min(sequence_lengths))

***
<a id="coding_tutorial_2"></a>
## Dataset generators

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

#### Load the UCI Fertility Dataset

We will be using a dataset available at https://archive.ics.uci.edu/ml/datasets/Fertility from UC Irvine.

#### Import the data

The dataset required for this tutorial can be downloaded from the following link:

https://drive.google.com/open?id=1OA0lwa5YLDs1njS377jbqPpMSlH5TzQV

You should store this file in Drive for use in this Colab notebook.

In [None]:
# Run this cell to connect to your Drive folder

from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
# Load the fertility dataset

headers = ['Season', 'Age', 'Diseases', 'Trauma', 'Surgery', 'Fever', 'Alcohol', 'Smoking', 'Sitting', 'Output']
fertility = pd.read_csv('data/fertility_diagnosis.txt', delimiter=',', header=None, names=headers)

In [None]:
# Print the shape of the DataFrame

fertility.shape

In [None]:
# Show the head of the DataFrame

fertility.head()

#### Process the data

In [None]:
# Map the 'Output' feature from 'N' to 0 and from 'O' to 1

fertility['Output'] = fertility['Output'].map(lambda x : 0.0 if x=='N' else 1.0)

In [None]:
# Show the head of the DataFrame

fertility.head()

In [None]:
# Convert the DataFrame so that the features are mapped to floats

fertility = fertility.astype('float32')

In [None]:
# Shuffle the DataFrame

fertility = fertility.sample(frac=1).reset_index(drop=True)

In [None]:
# Show the head of the DataFrame

fertility.head()

In [None]:
# Convert the field Season to a one-hot encoded vector

fertility = pd.get_dummies(fertility, prefix='Season', columns=['Season'])

In [None]:
# Show the head of the DataFrame

fertility.head()

In [None]:
# Move the Output column such that it is the last column in the DataFrame

fertility.columns = [col for col in fertility.columns if col != 'Output'] + ['Output']

In [None]:
# Show the head of the DataFrame

fertility.head()

In [None]:
# Convert the DataFrame to a numpy array.

fertility = fertility.to_numpy()


In [None]:
fertility

#### Split the Data

In [None]:
# Split the dataset into training and validation set

training = fertility[0:70]
validation = fertility[70:100]

In [None]:
# Verify the shape of the training data

print(training.shape, validation.shape)

In [None]:
# Separate the features and labels for the validation and training data

training_features = training[:,0:-1]
training_labels = training[:,-1]
validation_features = validation[:,0:-1]
validation_labels = validation[:,-1]

#### Create the Generator

In [None]:
# Create a function that returns a generator producing inputs and labels

def get_generator(features, labels, batch_size=1):
        for n in range(int(len(features)/batch_size)):
            yield (features[n*batch_size: (n+1)*batch_size], labels[n*batch_size: (n+1)*batch_size])

In [None]:
# Apply the function to our training features and labels with a batch size of 10

train_generator = get_generator(training_features, training_labels, batch_size=10)

In [None]:
# Test the generator using the next() function

next(train_generator)

#### Build the model

In [None]:
# Create a model using Keras with 3 layers

from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Input, BatchNormalization

input_shape = (12,)
output_shape = (1,)

model_input = Input(input_shape)
batch_1 = BatchNormalization(momentum=0.8)(model_input)
dense_1 = Dense(100, activation='relu')(batch_1)
batch_2 = BatchNormalization(momentum=0.8)(dense_1)
output = Dense(1, activation='sigmoid', kernel_regularizer = 'l1')(batch_2)

model = Model([model_input], output)

In [None]:
# Display the model summary to show the resultant structure

model.summary()

#### Compile the model

In [None]:
# Create the optimizer object

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-2)

In [None]:
# Compile the model with loss function and metric

model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

#### Train and evaluate the model using the generator

In [None]:
# Calculate the number of training steps per epoch for the given batch size.

batch_size = 5
train_steps = len(training) // batch_size

In [None]:
# Set the epochs to 3

epochs = 3

In [None]:
# Train the model

"""
training_features = training[:,0:-1]
training_labels = training[:,-1]
validation_features = validation[:,0:-1]
validation_labels = validation[:,-1]
"""
for epoch in range(epochs):
    train_generator = get_generator(training_features, training_labels, batch_size=batch_size)
    validation_generator = get_generator(validation_features, validation_labels, batch_size=batch_size)
    model.fit_generator(train_generator, steps_per_epoch=train_steps, validation_data = validation_generator, validation_steps = 1)

In [None]:
# Try to run the fit_generator function once more; observe what happens

model.fit_generator(train_generator, steps_per_epoch=train_steps)

In [None]:
model.layers[-1].get_weights()[0]

#### Make an infinitely looping generator

In [None]:
# Create a function that returns an infinitely looping generator

def get_generator_cyclic(features, labels, batch_size=1):
    # Add in while True to make it cyclic, infinitely generating
    while True:
        for n in range(int(len(features)/batch_size)):
            yield (features[n*batch_size: (n+1)*batch_size], labels[n*batch_size: (n+1)*batch_size])
    # After yielding, the generator will permute the data
    permuted = np.random.permutation(len(features))
    features = features[permuted]
    labels = labels[permuted]

In [None]:
# Create a generator using this function.

train_generator_cyclic = get_generator_cyclic(training_features, training_labels, batch_size=batch_size)

In [None]:
# Assert that the new cyclic generator does not raise a StopIteration

for i in range(2*train_steps):
    next(train_generator_cyclic)

In [None]:
# Generate a cyclic validation generator

validation_generator_cyclic = get_generator_cyclic(validation_features, validation_labels, batch_size=batch_size)

In [None]:
# Train the model

model.fit_generator(train_generator_cyclic, steps_per_epoch=train_steps, validation_data = validation_generator_cyclic,
                    validation_steps = 1, epochs = 20)

#### Evaluate the model and get predictions

In [None]:
# Let's obtain a validation data generator.

validation_generator = get_generator(validation_features, validation_labels, batch_size=30)

In [None]:
# Get predictions on the validation data

predictions = model.predict_generator(validation_generator, steps = 1)
print(np.round(predictions.T[0]))

In [None]:
# Print the corresponding validation labels

print(validation_labels)

In [None]:
# Obtain a validation data generator

validation_generator = get_generator(validation_features, validation_labels, batch_size=30)

In [None]:
# Evaluate the model

evaluations = model.evaluate_generator(validation_generator, steps = 1)

In [None]:
evaluations

***
<a id="coding_tutorial_3"></a>
## Keras image data augmentation

#### Import the data

The dataset required for this tutorial can be downloaded from the following link:

https://drive.google.com/open?id=11Y43ta5gT672L3sfJFR2DvPs-ralY5Pd

You should store these files in Drive for use in this Colab notebook.

In [None]:
# Run this cell to connect to your Drive folder

from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
import matplotlib.pyplot as plt
import numpy as np

#### Load the CIFAR-10 Dataset

In [None]:
from tensorflow.keras.datasets import cifar10

In [None]:
# Load the CIFAR-10 dataset

(training_features, training_labels), (test_features, test_labels) = cifar10.load_data()

In [None]:
# Convert the labels to a one-hot encoding

num_classes = 10

training_labels = tf.keras.utils.to_categorical(training_labels, num_classes)
test_labels = tf.keras.utils.to_categorical(test_labels, num_classes)

#### Create a generator function

In [None]:
# Create a function that returns a data generator

def get_generator(features, labels, batch_size=1):
    for n in range(int(len(features)/batch_size)):
        yield (features[n*batch_size:(n+1)*batch_size], labels[n*batch_size:(n+1)*batch_size])

In [None]:
# Use the function we created to get a training data generator with a batch size of 1

training_generator = get_generator(training_features, training_labels)

In [None]:
# Assess the shape of the items generated by training_generator using the `next` function to yield an item.

image, label = next(training_generator)
print(image.shape)
print(label.shape)

In [None]:
# Test the training generator by obtaining an image using the `next` generator function, and then using imshow to plot it.
# Print the corresponding label

from matplotlib.pyplot import imshow

image, label = next(training_generator)
image_unbatched = image[0,:,:,:]
imshow(image_unbatched)
print(label)

In [None]:
# Reset the generator by re-running the `get_generator` function.

train_generator = get_generator(training_features, training_labels)

#### Create a data augmention generator

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
# Create a function to convert an image to monochrome

def monochrome(x):
    def func_bw(a):
        average_colour = np.mean(a)
        return [average_colour, average_colour, average_colour]
    x = np.apply_along_axis(func_bw, -1, x)
    return x

In [None]:
# Create an ImageDataGenerator object

image_generator = ImageDataGenerator(preprocessing_function = monochrome, # Preprocessing function
                                    rotation_range=180,
                                    rescale = (1/255.0))

image_generator.fit(training_features)

Check [the documentation](https://keras.io/preprocessing/image/) for the full list of image data augmentation options. 

In [None]:
# Create an iterable generator using the `flow` function

image_generator_iterable = image_generator.flow(training_features, training_labels, shuffle = False, batch_size = 1)

In [None]:
# Show a sample from the generator and compare with the original

image, label = next(image_generator_iterable)
image_orig, label_orig = next(train_generator)
figs, axes = plt.subplots(1,2)
axes[0].imshow(image[0,:,:,:])
axes[0].set_title('Transformed')
axes[1].imshow(image_orig[0,:,:,:])
axes[1].set_title('Original')
plt.show()

#### Flow from directory

In [None]:
# Inspect the directory structure

train_path = 'data/flowers-recognition-split/train'
val_path = 'data/flowers-recognition-split/val'

In [None]:
# Create an ImageDataGenerator object

datagenerator = ImageDataGenerator(rescale=(1/255.0))

In [None]:
classes = ['daisy', 'dandelion', 'rose', 'sunflower', 'tulip']

In [None]:
# Create a training data generator

# Classes Parameter:
# Optional list of class subdirectories
# (e.g. `['dogs', 'cats']`). Default: None.
# If not provided, the list of classes will be automatically
# inferred from the subdirectory names/structure
# under `directory`, where each subdirectory will
# be treated as a different class

train_generator = datagenerator.flow_from_directory(train_path, # Specify where to take the images from
                                                   batch_size = 64, # Put them into batches
                                                   classes = classes, # Identify their classes
                                                   target_size = (16,16))

In [None]:
# Create a validation data generator

val_generator = datagenerator.flow_from_directory(val_path, # Specify where to take the images from
                                                   batch_size = 64, # Put them into batches
                                                   classes = classes, # Identify their classes
                                                   target_size = (16,16))

In [None]:
# Get and display an image and label from the training generator

x = next(train_generator)
imshow(x[0][4])
print(x[1][4])

In [None]:
# Reset the training generator

train_generator = datagenerator.flow_from_directory(train_path, # Specify where to take the images from
                                                   batch_size = 64, # Put them into batches
                                                   classes = classes, # Identify their classes
                                                   target_size = (16,16))

#### Create a model to train

In [None]:
# Build a CNN model

from tensorflow.keras.layers import Conv2D, MaxPooling2D, Input, Flatten, Dense

model = tf.keras.Sequential()
model.add(Input((16,16,3)))
model.add(Conv2D(8, (8, 8), padding='same', activation='relu'))
model.add(MaxPooling2D((4,4)))
model.add(Conv2D(8, (8, 8), padding='same', activation='relu'))
model.add(MaxPooling2D((2,2)))
model.add(Conv2D(4, (4, 4), padding='same', activation='relu'))
model.add(Flatten())
model.add(Dense(16, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(5, activation='softmax'))

In [None]:
# Create an optimizer object

optimizer = tf.keras.optimizers.Adam(1e-3)

In [None]:
# Compile the model

model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Print the model summary

model.summary()

#### Train the model

In [None]:
# Calculate the training generator and test generator steps per epoch

train_steps_per_epoch = train_generator.n // train_generator.batch_size
val_steps = val_generator.n // val_generator.batch_size
print(train_steps_per_epoch, val_steps)

In [None]:
# Fit the model

model.fit_generator(train_generator,
                   steps_per_epoch = train_steps_per_epoch,
                   epochs = 5)

#### Evaluate the model

In [None]:
# Evaluate the model

# steps: Total number of steps (batches of samples)
#         to yield from `generator` before stopping.

model.evaluate_generator(val_generator, # Evaluate => Validation
                        steps = val_steps 
                        )

#### Predict using the generator

In [None]:
# Predict labels with the model

predictions = model.predict_generator(val_generator,
                                     steps = 1)
print(np.argmax(np.round(predictions, 2), axis = 1))

***
<a id="coding_tutorial_4"></a>
## The Dataset Class

#### Import the data

The dataset required for this tutorial can be downloaded from the following link:

https://drive.google.com/open?id=1BAjGPFlpqsDdWof50Ng3Fmju5O8F1_uZ

You should store these files in Drive for use in this Colab notebook.

In [None]:
# Run this cell to connect to your Drive folder

from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
import tensorflow as tf

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os

#### Create a simple dataset

In [None]:
x = np.zeros((100,10,2,2))

In [None]:
# Create a dataset from the tensor x

dataset1 = tf.data.Dataset.from_tensor_slices(x) # from_tensor_slices

In [None]:
# Inspect the Dataset object

dataset1.element_spec

In [None]:
x2 = [np.zeros((10,2,2)), np.zeros((5,2,2))]

In [None]:
# Try creating a dataset from the tensor x2

dataset2 = tf.data.Dataset.from_tensor_slices(x2)

In [None]:
x2 = [np.zeros((10,1)), np.zeros((10,1)), np.zeros((10,1))]

In [None]:
# Create another dataset from the new x2 and inspect the Dataset object

dataset2 = tf.data.Dataset.from_tensor_slices(x2)

In [None]:
# Print the element_spec

print(dataset2.element_spec)

#### Create a zipped dataset

In [None]:
# Combine the two datasets into one larger dataset

dataset_zipped = tf.data.Dataset.zip((dataset1, dataset2))

In [None]:
# Print the element_spec

print(dataset_zipped.element_spec)

In [None]:
# Define a function to find the number of batches in a dataset

def get_batches(dataset):
    iter_dataset = iter(dataset)
    i = 0
    try:
        while next(iter_dataset):
            i = i+1
    except:
        return i

In [None]:
# Find the number of batches in the zipped Dataset

get_batches(dataset_zipped)

# The larger dataset will be trimmed to accomodate the smaller dataset size.

#### Create a dataset from numpy arrays

In [None]:
# Load the MNIST dataset

(train_features, train_labels), (test_features, test_labels) = tf.keras.datasets.mnist.load_data()

print(type(train_features), type(train_labels))

In [None]:
# Create a Dataset from the MNIST data

mnist_dataset = tf.data.Dataset.from_tensor_slices((train_features, train_labels) # Pass in a tuple as arg
                                                  )

In [None]:
# Inspect the Dataset object

print(mnist_dataset.element_spec)

In [None]:
# Inspect the length of an element using the take method

print(mnist_dataset.take(1))
print(iter(mnist_dataset.take(1)))
print(next(iter(mnist_dataset.take(1))))
element = next(iter(mnist_dataset.take(1)))
print(len(element))

In [None]:
# Examine the shapes of the data

print(element[0].shape)
print(element[1].shape)

#### Create a dataset from text data

In [None]:
# Print the list of text files

text_files = sorted([f.path for f in os.scandir('data/shakespeare')])

print(text_files)

In [None]:
# Load the first file using python and print the first 5 lines.

with open(text_files[0], 'r') as fil:
    contents = [fil.readline() for i in range(5)]
    for line in contents:
        print(line)

In [None]:
# Load the lines from the files into a dataset using TextLineDataset

shakespeare_dataset = tf.data.TextLineDataset(text_files)

In [None]:
# Use the take method to get and print the first 5 lines of the dataset

first_5_lines_dataset = iter(shakespeare_dataset.take(5))
lines = [line for line in first_5_lines_dataset]
for line in lines:
    print(line)

In [None]:
# Compute the number of lines in the first file

lines = []
with open(text_files[0], 'r') as fil:
    line = fil.readline()
    while line:
        lines.append(line)
        line = fil.readline()
    print(len(lines))

In [None]:
# Compute the number of lines in the shakespeare dataset we created

shakespeare_dataset_iterator = iter(shakespeare_dataset)
lines = [line for line in shakespeare_dataset_iterator]
print(len(lines))

#### Interleave lines from the text data files

In [None]:
# Create a dataset of the text file strings

text_files_dataset = tf.data.Dataset.from_tensor_slices(text_files)
files = [file for file in text_files_dataset]
for file in files:
    print(file)

In [None]:
# Interleave the lines from the text files
"""
cycle_length: (Optional.) The number of input elements that will be
    processed concurrently. If not specified, the value will be derived from
    the number of available CPU cores. If the `num_parallel_calls` argument
    is set to `tf.data.experimental.AUTOTUNE`, the `cycle_length` argument
    also identifies the maximum degree of parallelism.
"""

interleaved_shakespeare_dataset = text_files_dataset.interleave(tf.data.TextLineDataset, cycle_length = 9)
interleaved_shakespeare_dataset.element_spec

In [None]:
# Print the first 10 elements of the interleaved dataset

lines = [line for line in iter(interleaved_shakespeare_dataset.take(20))]
for line in lines:
    print(line)

***
<a id="coding_tutorial_5"></a>
## Training with Datasets

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd

#### Load the UCI Bank Marketing Dataset

#### Import the data

The dataset required for this tutorial can be downloaded from the following link:

https://drive.google.com/open?id=1cNtP4iDyGhF620ZbmJdmJWYQrRgJTCum

You should store these files in Drive for use in this Colab notebook.

In [None]:
# Run this cell to connect to your Drive folder

from google.colab import drive
drive.mount('/content/gdrive')

In [1]:
# Load the CSV file into a pandas DataFrame
import pandas as pd
import tensorflow as tf

bank_dataframe = pd.read_csv('data/bank/bank-full.csv', delimiter=';')

In [2]:
# Show the head of the DataFrame

bank_dataframe.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,no
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,no
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,no
4,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no


In [3]:
# Print the shape of the DataFrame

print(bank_dataframe.shape)

(45211, 17)


In [4]:
# Select features from the DataFrame

features = ['age', 'job', 'marital', 'education', 'default', 'balance', 'housing',
            'loan', 'contact', 'campaign', 'pdays', 'poutcome']
labels = ['y']

bank_dataframe = bank_dataframe.filter(features + labels)

In [5]:
# Show the head of the DataFrame

bank_dataframe.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,campaign,pdays,poutcome,y
0,58,management,married,tertiary,no,2143,yes,no,unknown,1,-1,unknown,no
1,44,technician,single,secondary,no,29,yes,no,unknown,1,-1,unknown,no
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,1,-1,unknown,no
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,1,-1,unknown,no
4,33,unknown,single,unknown,no,1,no,no,unknown,1,-1,unknown,no


#### Preprocess the data

In [6]:
# Convert the categorical features in the DataFrame to one-hot encodings

from sklearn.preprocessing import LabelBinarizer

encoder = LabelBinarizer()
categorical_features = ['default', 'housing', 'job', 'loan', 'education', 'contact', 'poutcome']

for feature in categorical_features:
    bank_dataframe[feature] = tuple(encoder.fit_transform(bank_dataframe[feature]))

In [7]:
# Show the head of the DataFrame

bank_dataframe.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,campaign,pdays,poutcome,y
0,58,"(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0)",married,"(0, 0, 1, 0)","(0,)",2143,"(1,)","(0,)","(0, 0, 1)",1,-1,"(0, 0, 0, 1)",no
1,44,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0)",single,"(0, 1, 0, 0)","(0,)",29,"(1,)","(0,)","(0, 0, 1)",1,-1,"(0, 0, 0, 1)",no
2,33,"(0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0)",married,"(0, 1, 0, 0)","(0,)",2,"(1,)","(1,)","(0, 0, 1)",1,-1,"(0, 0, 0, 1)",no
3,47,"(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)",married,"(0, 0, 0, 1)","(0,)",1506,"(1,)","(0,)","(0, 0, 1)",1,-1,"(0, 0, 0, 1)",no
4,33,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1)",single,"(0, 0, 0, 1)","(0,)",1,"(0,)","(0,)","(0, 0, 1)",1,-1,"(0, 0, 0, 1)",no


In [8]:
# Shuffle the DataFrame

bank_dataframe = bank_dataframe.sample(frac=1).reset_index(drop=True)
bank_dataframe

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,campaign,pdays,poutcome,y
0,38,"(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)",married,"(0, 0, 0, 1)","(0,)",5,"(1,)","(0,)","(0, 0, 1)",2,-1,"(0, 0, 0, 1)",no
1,36,"(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0)",divorced,"(0, 0, 1, 0)","(0,)",133,"(1,)","(0,)","(0, 0, 1)",1,-1,"(0, 0, 0, 1)",no
2,25,"(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)",single,"(0, 1, 0, 0)","(0,)",53,"(1,)","(0,)","(1, 0, 0)",6,-1,"(0, 0, 0, 1)",no
3,41,"(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)",married,"(1, 0, 0, 0)","(0,)",407,"(1,)","(1,)","(0, 1, 0)",2,-1,"(0, 0, 0, 1)",no
4,32,"(0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0)",single,"(1, 0, 0, 0)","(0,)",1770,"(0,)","(1,)","(1, 0, 0)",2,188,"(1, 0, 0, 0)",no
...,...,...,...,...,...,...,...,...,...,...,...,...,...
45206,44,"(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)",single,"(0, 0, 0, 1)","(0,)",0,"(1,)","(0,)","(0, 0, 1)",4,-1,"(0, 0, 0, 1)",no
45207,54,"(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0)",married,"(0, 1, 0, 0)","(0,)",2895,"(1,)","(0,)","(1, 0, 0)",2,256,"(1, 0, 0, 0)",no
45208,33,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0)",single,"(0, 1, 0, 0)","(0,)",-32,"(0,)","(0,)","(1, 0, 0)",12,-1,"(0, 0, 0, 1)",no
45209,41,"(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)",married,"(0, 1, 0, 0)","(0,)",114,"(1,)","(0,)","(0, 0, 1)",3,-1,"(0, 0, 0, 1)",no


#### Create the Dataset object

In [9]:
# Convert the DataFrame to a Dataset

bank_dataset = tf.data.Dataset.from_tensor_slices(dict(bank_dataframe))

In [10]:
# Inspect the Dataset object

bank_dataset.element_spec

{'age': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'job': TensorSpec(shape=(12,), dtype=tf.int32, name=None),
 'marital': TensorSpec(shape=(), dtype=tf.string, name=None),
 'education': TensorSpec(shape=(4,), dtype=tf.int32, name=None),
 'default': TensorSpec(shape=(1,), dtype=tf.int32, name=None),
 'balance': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'housing': TensorSpec(shape=(1,), dtype=tf.int32, name=None),
 'loan': TensorSpec(shape=(1,), dtype=tf.int32, name=None),
 'contact': TensorSpec(shape=(3,), dtype=tf.int32, name=None),
 'campaign': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'pdays': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'poutcome': TensorSpec(shape=(4,), dtype=tf.int32, name=None),
 'y': TensorSpec(shape=(), dtype=tf.string, name=None)}

#### Filter the Dataset

In [11]:
# First check that there are records in the dataset for non-married individuals

def check_divorced():
    bank_dataset_iterable = iter(bank_dataset)
    for x in bank_dataset_iterable:
        if x['marital'] != 'divorced':
            print('Found a person with marital status: {}'.format(x['marital']))
            return
    print('No non-divorced people were found!')

check_divorced()

Found a person with marital status: b'married'


In [37]:
for x in bank_dataset:
    print(x)
    print(tf.equal(x['marital'], tf.constant([b'divorced'])))

{'age': <tf.Tensor: id=185087, shape=(), dtype=int32, numpy=36>, 'job': <tf.Tensor: id=185094, shape=(12,), dtype=int32, numpy=array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0])>, 'education': <tf.Tensor: id=185092, shape=(4,), dtype=int32, numpy=array([0, 0, 1, 0])>, 'default': <tf.Tensor: id=185091, shape=(1,), dtype=int32, numpy=array([0])>, 'balance': <tf.Tensor: id=185088, shape=(), dtype=int32, numpy=133>, 'housing': <tf.Tensor: id=185093, shape=(1,), dtype=int32, numpy=array([1])>, 'loan': <tf.Tensor: id=185095, shape=(1,), dtype=int32, numpy=array([0])>, 'contact': <tf.Tensor: id=185090, shape=(3,), dtype=int32, numpy=array([0, 0, 1])>, 'campaign': <tf.Tensor: id=185089, shape=(), dtype=int32, numpy=1>, 'pdays': <tf.Tensor: id=185096, shape=(), dtype=int32, numpy=-1>, 'poutcome': <tf.Tensor: id=185097, shape=(4,), dtype=int32, numpy=array([0, 0, 0, 1])>, 'y': <tf.Tensor: id=185098, shape=(), dtype=int32, numpy=0>}


KeyError: 'marital'

In [12]:
# Filter the Dataset to retain only entries with a 'divorced' marital status

# The input to the lambda function, x, is always referring to the DATASET object.

bank_dataset = bank_dataset.filter(lambda x : tf.equal(x['marital'], tf.constant([b'divorced']))[0])

In [40]:
for x in bank_dataset.take(5):
    print(len(x))
    
print(bank_dataset)

12
12
12
12
12
<MapDataset shapes: {age: (), job: (12,), education: (4,), default: (1,), balance: (), housing: (1,), loan: (1,), contact: (3,), campaign: (), pdays: (), poutcome: (4,), y: ()}, types: {age: tf.int32, job: tf.int32, education: tf.int32, default: tf.int32, balance: tf.int32, housing: tf.int32, loan: tf.int32, contact: tf.int32, campaign: tf.int32, pdays: tf.int32, poutcome: tf.int32, y: tf.int32}>


In [13]:
# Check the records in the dataset again

check_divorced()

No non-divorced people were found!


#### Map a function over the dataset

In [14]:
# Convert the label ('y') to an integer instead of 'yes' or 'no'

# The input to the lambda function, x, is always referring to the DATASET object.

def map_label(x):
    
    x['y'] = 0 if (x['y'] == tf.constant([b'no'], dtype = tf.string)) else 1
    return x

In [15]:
# Inspect the Dataset object
bank_dataset = bank_dataset.map(map_label)
bank_dataset.element_spec

{'age': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'job': TensorSpec(shape=(12,), dtype=tf.int32, name=None),
 'marital': TensorSpec(shape=(), dtype=tf.string, name=None),
 'education': TensorSpec(shape=(4,), dtype=tf.int32, name=None),
 'default': TensorSpec(shape=(1,), dtype=tf.int32, name=None),
 'balance': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'housing': TensorSpec(shape=(1,), dtype=tf.int32, name=None),
 'loan': TensorSpec(shape=(1,), dtype=tf.int32, name=None),
 'contact': TensorSpec(shape=(3,), dtype=tf.int32, name=None),
 'campaign': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'pdays': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'poutcome': TensorSpec(shape=(4,), dtype=tf.int32, name=None),
 'y': TensorSpec(shape=(), dtype=tf.int32, name=None)}

In [16]:
print(next(iter(bank_dataset.take(2))))

{'age': <tf.Tensor: id=78217, shape=(), dtype=int32, numpy=36>, 'job': <tf.Tensor: id=78224, shape=(12,), dtype=int32, numpy=array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0])>, 'marital': <tf.Tensor: id=78226, shape=(), dtype=string, numpy=b'divorced'>, 'education': <tf.Tensor: id=78222, shape=(4,), dtype=int32, numpy=array([0, 0, 1, 0])>, 'default': <tf.Tensor: id=78221, shape=(1,), dtype=int32, numpy=array([0])>, 'balance': <tf.Tensor: id=78218, shape=(), dtype=int32, numpy=133>, 'housing': <tf.Tensor: id=78223, shape=(1,), dtype=int32, numpy=array([1])>, 'loan': <tf.Tensor: id=78225, shape=(1,), dtype=int32, numpy=array([0])>, 'contact': <tf.Tensor: id=78220, shape=(3,), dtype=int32, numpy=array([0, 0, 1])>, 'campaign': <tf.Tensor: id=78219, shape=(), dtype=int32, numpy=1>, 'pdays': <tf.Tensor: id=78227, shape=(), dtype=int32, numpy=-1>, 'poutcome': <tf.Tensor: id=78228, shape=(4,), dtype=int32, numpy=array([0, 0, 0, 1])>, 'y': <tf.Tensor: id=78229, shape=(), dtype=int32, numpy=0>}


In [17]:
# Remove the 'marital' column

bank_dataset = bank_dataset.map(lambda x: {key:val for key,val in x.items() if key != 'marital'})

In [18]:
# Inspect the Dataset object
print(next(iter(bank_dataset.take(2))))
bank_dataset.element_spec

{'age': <tf.Tensor: id=78265, shape=(), dtype=int32, numpy=36>, 'job': <tf.Tensor: id=78272, shape=(12,), dtype=int32, numpy=array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0])>, 'education': <tf.Tensor: id=78270, shape=(4,), dtype=int32, numpy=array([0, 0, 1, 0])>, 'default': <tf.Tensor: id=78269, shape=(1,), dtype=int32, numpy=array([0])>, 'balance': <tf.Tensor: id=78266, shape=(), dtype=int32, numpy=133>, 'housing': <tf.Tensor: id=78271, shape=(1,), dtype=int32, numpy=array([1])>, 'loan': <tf.Tensor: id=78273, shape=(1,), dtype=int32, numpy=array([0])>, 'contact': <tf.Tensor: id=78268, shape=(3,), dtype=int32, numpy=array([0, 0, 1])>, 'campaign': <tf.Tensor: id=78267, shape=(), dtype=int32, numpy=1>, 'pdays': <tf.Tensor: id=78274, shape=(), dtype=int32, numpy=-1>, 'poutcome': <tf.Tensor: id=78275, shape=(4,), dtype=int32, numpy=array([0, 0, 0, 1])>, 'y': <tf.Tensor: id=78276, shape=(), dtype=int32, numpy=0>}


{'age': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'job': TensorSpec(shape=(12,), dtype=tf.int32, name=None),
 'education': TensorSpec(shape=(4,), dtype=tf.int32, name=None),
 'default': TensorSpec(shape=(1,), dtype=tf.int32, name=None),
 'balance': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'housing': TensorSpec(shape=(1,), dtype=tf.int32, name=None),
 'loan': TensorSpec(shape=(1,), dtype=tf.int32, name=None),
 'contact': TensorSpec(shape=(3,), dtype=tf.int32, name=None),
 'campaign': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'pdays': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'poutcome': TensorSpec(shape=(4,), dtype=tf.int32, name=None),
 'y': TensorSpec(shape=(), dtype=tf.int32, name=None)}

#### Create input and output data tuples

In [19]:
# Create an input and output tuple for the dataset

def map_feature_label(x):
    features = [
        [x['age']], [x['balance']], [x['campaign']], x['contact'], x['default'],
        x['education'], x['housing'], x['job'], x['loan'], [x['pdays']], x['poutcome']
    ]
    return (tf.concat(features, axis=0), x['y'])

In [20]:
# Map this function over the dataset

bank_dataset.map(map_feature_label)

<MapDataset shapes: ((30,), ()), types: (tf.int32, tf.int32)>

In [21]:
# Inspect the Dataset object
print(next(iter(bank_dataset.take(2))))
bank_dataset.element_spec

{'age': <tf.Tensor: id=78307, shape=(), dtype=int32, numpy=36>, 'job': <tf.Tensor: id=78314, shape=(12,), dtype=int32, numpy=array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0])>, 'education': <tf.Tensor: id=78312, shape=(4,), dtype=int32, numpy=array([0, 0, 1, 0])>, 'default': <tf.Tensor: id=78311, shape=(1,), dtype=int32, numpy=array([0])>, 'balance': <tf.Tensor: id=78308, shape=(), dtype=int32, numpy=133>, 'housing': <tf.Tensor: id=78313, shape=(1,), dtype=int32, numpy=array([1])>, 'loan': <tf.Tensor: id=78315, shape=(1,), dtype=int32, numpy=array([0])>, 'contact': <tf.Tensor: id=78310, shape=(3,), dtype=int32, numpy=array([0, 0, 1])>, 'campaign': <tf.Tensor: id=78309, shape=(), dtype=int32, numpy=1>, 'pdays': <tf.Tensor: id=78316, shape=(), dtype=int32, numpy=-1>, 'poutcome': <tf.Tensor: id=78317, shape=(4,), dtype=int32, numpy=array([0, 0, 0, 1])>, 'y': <tf.Tensor: id=78318, shape=(), dtype=int32, numpy=0>}


{'age': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'job': TensorSpec(shape=(12,), dtype=tf.int32, name=None),
 'education': TensorSpec(shape=(4,), dtype=tf.int32, name=None),
 'default': TensorSpec(shape=(1,), dtype=tf.int32, name=None),
 'balance': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'housing': TensorSpec(shape=(1,), dtype=tf.int32, name=None),
 'loan': TensorSpec(shape=(1,), dtype=tf.int32, name=None),
 'contact': TensorSpec(shape=(3,), dtype=tf.int32, name=None),
 'campaign': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'pdays': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'poutcome': TensorSpec(shape=(4,), dtype=tf.int32, name=None),
 'y': TensorSpec(shape=(), dtype=tf.int32, name=None)}

#### Split into a training and a validation set

In [22]:
# Determine the length of the Dataset

dataset_length = 0
for _ in bank_dataset:
    dataset_length += 1
print(dataset_length)

5207


In [23]:
# Make training and validation sets from the dataset

training_elements = int(dataset_length * 0.7)
train_dataset = bank_dataset.take(training_elements)
validation_dataset = bank_dataset.skip(training_elements)

In [24]:
train_dataset_length = 0
for _ in train_dataset:
    train_dataset_length += 1
print(train_dataset_length)

3644


In [25]:
# Inspect the Dataset object
print(next(iter(bank_dataset.take(2))))
bank_dataset.element_spec

{'age': <tf.Tensor: id=184552, shape=(), dtype=int32, numpy=36>, 'job': <tf.Tensor: id=184559, shape=(12,), dtype=int32, numpy=array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0])>, 'education': <tf.Tensor: id=184557, shape=(4,), dtype=int32, numpy=array([0, 0, 1, 0])>, 'default': <tf.Tensor: id=184556, shape=(1,), dtype=int32, numpy=array([0])>, 'balance': <tf.Tensor: id=184553, shape=(), dtype=int32, numpy=133>, 'housing': <tf.Tensor: id=184558, shape=(1,), dtype=int32, numpy=array([1])>, 'loan': <tf.Tensor: id=184560, shape=(1,), dtype=int32, numpy=array([0])>, 'contact': <tf.Tensor: id=184555, shape=(3,), dtype=int32, numpy=array([0, 0, 1])>, 'campaign': <tf.Tensor: id=184554, shape=(), dtype=int32, numpy=1>, 'pdays': <tf.Tensor: id=184561, shape=(), dtype=int32, numpy=-1>, 'poutcome': <tf.Tensor: id=184562, shape=(4,), dtype=int32, numpy=array([0, 0, 0, 1])>, 'y': <tf.Tensor: id=184563, shape=(), dtype=int32, numpy=0>}


{'age': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'job': TensorSpec(shape=(12,), dtype=tf.int32, name=None),
 'education': TensorSpec(shape=(4,), dtype=tf.int32, name=None),
 'default': TensorSpec(shape=(1,), dtype=tf.int32, name=None),
 'balance': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'housing': TensorSpec(shape=(1,), dtype=tf.int32, name=None),
 'loan': TensorSpec(shape=(1,), dtype=tf.int32, name=None),
 'contact': TensorSpec(shape=(3,), dtype=tf.int32, name=None),
 'campaign': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'pdays': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'poutcome': TensorSpec(shape=(4,), dtype=tf.int32, name=None),
 'y': TensorSpec(shape=(), dtype=tf.int32, name=None)}

#### Build a classification model

Now let's build a model to classify the features.

In [26]:
# Build a classifier model

from tensorflow.keras.layers import Dense, Input, Concatenate, BatchNormalization
from tensorflow.keras import Sequential

model = Sequential()
model.add(Input(shape=(30,)))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(400, activation='relu'))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(400, activation='relu'))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(1, activation='sigmoid'))

In [27]:
# Compile the model

optimizer = tf.keras.optimizers.Adam(1e-4)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

In [28]:
# Show the model summary

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization (BatchNo (None, 30)                120       
_________________________________________________________________
dense (Dense)                (None, 400)               12400     
_________________________________________________________________
batch_normalization_1 (Batch (None, 400)               1600      
_________________________________________________________________
dense_1 (Dense)              (None, 400)               160400    
_________________________________________________________________
batch_normalization_2 (Batch (None, 400)               1600      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 401       
Total params: 176,521
Trainable params: 174,861
Non-trainable params: 1,660
______________________________________________

#### Train the model

In [29]:
# Create batched training and validation datasets

train_dataset = train_dataset.shuffle(1000)
train_dataset = train_dataset.batch(20, drop_remainder = True)
train_dataset = train_dataset.repeat()
validation_dataset = validation_dataset.batch(100)

In [51]:
print(next(iter(train_dataset.take(2))))
train_dataset.element_spec

{'age': <tf.Tensor: id=326232, shape=(), dtype=int32, numpy=36>, 'job': <tf.Tensor: id=326239, shape=(12,), dtype=int32, numpy=array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0])>, 'education': <tf.Tensor: id=326237, shape=(4,), dtype=int32, numpy=array([0, 0, 1, 0])>, 'default': <tf.Tensor: id=326236, shape=(1,), dtype=int32, numpy=array([0])>, 'balance': <tf.Tensor: id=326233, shape=(), dtype=int32, numpy=7821>, 'housing': <tf.Tensor: id=326238, shape=(1,), dtype=int32, numpy=array([0])>, 'loan': <tf.Tensor: id=326240, shape=(1,), dtype=int32, numpy=array([0])>, 'contact': <tf.Tensor: id=326235, shape=(3,), dtype=int32, numpy=array([0, 0, 1])>, 'campaign': <tf.Tensor: id=326234, shape=(), dtype=int32, numpy=1>, 'pdays': <tf.Tensor: id=326241, shape=(), dtype=int32, numpy=-1>, 'poutcome': <tf.Tensor: id=326242, shape=(4,), dtype=int32, numpy=array([0, 0, 0, 1])>, 'y': <tf.Tensor: id=326243, shape=(), dtype=int32, numpy=0>}


{'age': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'job': TensorSpec(shape=(12,), dtype=tf.int32, name=None),
 'education': TensorSpec(shape=(4,), dtype=tf.int32, name=None),
 'default': TensorSpec(shape=(1,), dtype=tf.int32, name=None),
 'balance': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'housing': TensorSpec(shape=(1,), dtype=tf.int32, name=None),
 'loan': TensorSpec(shape=(1,), dtype=tf.int32, name=None),
 'contact': TensorSpec(shape=(3,), dtype=tf.int32, name=None),
 'campaign': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'pdays': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'poutcome': TensorSpec(shape=(4,), dtype=tf.int32, name=None),
 'y': TensorSpec(shape=(), dtype=tf.int32, name=None)}

In [66]:
# Shuffle the training data

train_dataset = train_dataset.shuffle(1000)
train_dataset = train_dataset.repeat()

In [69]:
type(train_dataset)

tensorflow.python.data.ops.dataset_ops.RepeatDataset

In [34]:
# Fit the model

history = model.fit(train_dataset, steps_per_epoch = train_dataset_length//20, epochs = 5)

Train for 182 steps
  1/182 [..............................] - ETA: 4s

KeyError: 'input_1'

In [None]:
# Plot the training and validation accuracy

