In [50]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

### Load the UCI Fertility Dataset

We will be using the dataset available at https://archive.ics.uci.edu/ml/machine-learning-databases/00244/

In [11]:
# Load the fertility dataset

headers = ['Season', 'Age', 'Diseases', 'Trauma', 'Surgery', 'Fever', 'Alcohol', 'Smoking', 'Sitting', 'Output']
fertility = pd.read_csv('data/fertility_Diagnosis.txt', delimiter=',', header=None, names=headers)

In [13]:
# Print the shape of the dataFrame

print(fertility.shape)

(100, 10)


In [14]:
# Show the head of the dataframe

fertility.head()

Unnamed: 0,Season,Age,Diseases,Trauma,Surgery,Fever,Alcohol,Smoking,Sitting,Output
0,-0.33,0.69,0,1,1,0,0.8,0,0.88,N
1,-0.33,0.94,1,0,1,0,0.8,1,0.31,O
2,-0.33,0.5,1,0,0,0,1.0,-1,0.5,N
3,-0.33,0.75,0,1,1,0,1.0,-1,0.38,N
4,-0.33,0.67,1,1,0,0,0.8,-1,0.5,O


### Process the data

In [15]:
# Map the output feature from 'N' to 0 and 'O' to 1

fertility['Output'] = fertility['Output'].map(lambda x: 0.0 if x=='N' else 1.0)

In [16]:
fertility.head()

Unnamed: 0,Season,Age,Diseases,Trauma,Surgery,Fever,Alcohol,Smoking,Sitting,Output
0,-0.33,0.69,0,1,1,0,0.8,0,0.88,0.0
1,-0.33,0.94,1,0,1,0,0.8,1,0.31,1.0
2,-0.33,0.5,1,0,0,0,1.0,-1,0.5,0.0
3,-0.33,0.75,0,1,1,0,1.0,-1,0.38,0.0
4,-0.33,0.67,1,1,0,0,0.8,-1,0.5,1.0


In [17]:
# Convert the dataFrame so that the features are mapped to floats

fertility = fertility.astype('float32')

In [18]:
# Shuffle the dataFrame

fertility = fertility.sample(frac=1).reset_index(drop=True)

In [19]:
# Show the head of the dataFrame

fertility.head()

Unnamed: 0,Season,Age,Diseases,Trauma,Surgery,Fever,Alcohol,Smoking,Sitting,Output
0,-0.33,0.5,1.0,0.0,1.0,-1.0,0.8,-1.0,0.5,0.0
1,1.0,0.64,0.0,0.0,1.0,0.0,0.8,-1.0,0.25,0.0
2,1.0,0.67,1.0,0.0,0.0,0.0,1.0,-1.0,0.25,0.0
3,0.33,0.78,1.0,0.0,0.0,0.0,1.0,1.0,0.06,0.0
4,-1.0,0.53,1.0,1.0,0.0,1.0,0.8,-1.0,0.38,0.0


In [20]:
# Convert the field Season to a one-hot encoded vector

fertility = pd.get_dummies(fertility, prefix='Season', columns=['Season'])

In [21]:
# Show the head of the DataFrame

fertility.head()

Unnamed: 0,Age,Diseases,Trauma,Surgery,Fever,Alcohol,Smoking,Sitting,Output,Season_-1.0,Season_-0.33000001311302185,Season_0.33000001311302185,Season_1.0
0,0.5,1.0,0.0,1.0,-1.0,0.8,-1.0,0.5,0.0,0,1,0,0
1,0.64,0.0,0.0,1.0,0.0,0.8,-1.0,0.25,0.0,0,0,0,1
2,0.67,1.0,0.0,0.0,0.0,1.0,-1.0,0.25,0.0,0,0,0,1
3,0.78,1.0,0.0,0.0,0.0,1.0,1.0,0.06,0.0,0,0,1,0
4,0.53,1.0,1.0,0.0,1.0,0.8,-1.0,0.38,0.0,1,0,0,0


In [23]:
# Move the Output column such that it is the last column in the dataFrame

fertility.columns = [col for col in fertility.columns if col!='Output']+['Output']

In [24]:
fertility.head()

Unnamed: 0,Age,Diseases,Trauma,Surgery,Fever,Alcohol,Smoking,Sitting,Season_-1.0,Season_-0.33000001311302185,Season_0.33000001311302185,Season_1.0,Output
0,0.5,1.0,0.0,1.0,-1.0,0.8,-1.0,0.5,0.0,0,1,0,0
1,0.64,0.0,0.0,1.0,0.0,0.8,-1.0,0.25,0.0,0,0,0,1
2,0.67,1.0,0.0,0.0,0.0,1.0,-1.0,0.25,0.0,0,0,0,1
3,0.78,1.0,0.0,0.0,0.0,1.0,1.0,0.06,0.0,0,0,1,0
4,0.53,1.0,1.0,0.0,1.0,0.8,-1.0,0.38,0.0,1,0,0,0


In [25]:
# Convert the dataFrame to a numpy array

fertility = fertility.to_numpy()

### Split the data

In [27]:
# Split the dataset into training and validation set

training = fertility[0:70]
validation = fertility[70:100]

In [28]:
# Verify the shape of the training data

training.shape

(70, 13)

In [29]:
# Separate the features and labels for the validation and training data

training_features = training[:,0:-1]
training_labels = training[:,-1]
validation_features = validation[:,0:-1]
validation_labels = validation[:,-1]

### Create the generator

In [42]:
# Create a function that returns a generator producing inputs and labels

def get_generator(features, labels, batch_size=1):
    for n in range(int(len(features)/batch_size)):
        yield (features[n*batch_size: (n+1)*batch_size], labels[n*batch_size: (n+1)*batch_size])

In [43]:
# Apply the function to our training features and labels with a batch size of 10

train_generator = get_generator(training_features, training_labels, batch_size=10)

In [44]:
# Test the generator with the next() function

next(train_generator)

(array([[ 0.5 ,  1.  ,  0.  ,  1.  , -1.  ,  0.8 , -1.  ,  0.5 ,  0.  ,
          0.  ,  1.  ,  0.  ],
        [ 0.64,  0.  ,  0.  ,  1.  ,  0.  ,  0.8 , -1.  ,  0.25,  0.  ,
          0.  ,  0.  ,  0.  ],
        [ 0.67,  1.  ,  0.  ,  0.  ,  0.  ,  1.  , -1.  ,  0.25,  0.  ,
          0.  ,  0.  ,  0.  ],
        [ 0.78,  1.  ,  0.  ,  0.  ,  0.  ,  1.  ,  1.  ,  0.06,  0.  ,
          0.  ,  0.  ,  1.  ],
        [ 0.53,  1.  ,  1.  ,  0.  ,  1.  ,  0.8 , -1.  ,  0.38,  0.  ,
          1.  ,  0.  ,  0.  ],
        [ 0.81,  1.  ,  1.  ,  1.  ,  1.  ,  0.8 , -1.  ,  0.38,  0.  ,
          0.  ,  1.  ,  0.  ],
        [ 0.75,  1.  ,  1.  ,  1.  ,  0.  ,  1.  ,  1.  ,  0.25,  0.  ,
          0.  ,  0.  ,  0.  ],
        [ 0.67,  0.  ,  0.  ,  1.  ,  0.  ,  0.8 , -1.  ,  0.25,  0.  ,
          0.  ,  0.  ,  0.  ],
        [ 0.81,  1.  ,  0.  ,  0.  ,  0.  ,  1.  , -1.  ,  0.38,  0.  ,
          0.  ,  0.  ,  0.  ],
        [ 0.61,  1.  ,  0.  ,  0.  ,  0.  ,  0.8 ,  0.  ,  0.5 ,  0.  ,
 

### Build the model

In [45]:
# Create a model using Keras with 3 layers

from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Input, BatchNormalization

input_shape = (12,)
output_shape = (1,)

model_input = Input(input_shape)
batch_1 = BatchNormalization(momentum=0.8)(model_input)
dense_1 = Dense(100, activation='relu')(batch_1)
batch_2 = BatchNormalization(momentum=0.8)(dense_1)
output = Dense(1, activation='sigmoid')(batch_2)

model = Model([model_input], output)

In [47]:
# Display the model summary to show the resultant structure

model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 12)]              0         
_________________________________________________________________
batch_normalization (BatchNo (None, 12)                48        
_________________________________________________________________
dense (Dense)                (None, 100)               1300      
_________________________________________________________________
batch_normalization_1 (Batch (None, 100)               400       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 101       
Total params: 1,849
Trainable params: 1,625
Non-trainable params: 224
_________________________________________________________________


### Compile the model

In [52]:
# Create the optimizer object

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-2)

In [53]:
# Compile the model with loss function and metric

model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

### Train and evaluate the model using the generator

In [54]:
# Calculate the number of training steps per epoch for the given batch size

batch_size=5
train_steps = len(training) // batch_size

In [55]:
# Set the epochs to 3

epochs = 3

In [57]:
# Train the model

for epoch in range(epochs):
    train_generator = get_generator(training_features, training_labels, batch_size=batch_size)
    validation_generator = get_generator(validation_features, validation_labels, batch_size=30)
    model.fit(train_generator, steps_per_epoch = train_steps, validation_data = validation_generator, validation_steps=1)



In [59]:
# Try to run the fit function once more; observe what happens

model.fit(train_generator, steps_per_epoch=train_steps)
# we run out of data

StopIteration: 

### Making an infinitely looping generator

In [69]:
# Create a function that returns an ininitely looping generator

def get_generator_cyclic(features, labels, batch_size=1):
    while True:
        for n in range(int(len(features)/batch_size)):
            yield (features[n*batch_size: (n+1)*batch_size], labels[n*batch_size: (n+1)*batch_size])
        permuted = np.random.permutation(len(features))
        features = features[permuted]
        labels = labels[permuted]

In [70]:
# Create a generator using this function

train_generator_cyclic = get_generator_cyclic(training_features, training_labels, batch_size=batch_size)

In [72]:
# Assert that the new cyclic generator does not raise a StopIteration

for i in range(2*train_steps):
    next(train_generator_cyclic)

In [73]:
# Generate a cyclic validation generator

validation_generator_cyclic = get_generator_cyclic(validation_features, validation_labels, batch_size=batch_size)

In [76]:
# Train the model

model.fit(train_generator_cyclic, steps_per_epoch=train_steps, validation_data=validation_generator_cyclic, validation_steps=1, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7ff8cc20fe80>

### Evaluate the model and get predictions

In [82]:
# Let's obtain a validation data generator

validation_generator = get_generator(validation_features, validation_labels, batch_size=30)

In [83]:
# Get predictions on the validation data

predictions = model.predict(validation_generator, steps=1)

In [84]:
print(np.round(predictions.T[0]))

[0. 0. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 0. 1. 0. 1. 0. 1. 0. 0. 1. 0. 0. 1.
 0. 0. 0. 0. 0. 1.]


In [85]:
# Print the corresponding validation labels

print(validation_labels)

[0. 0. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 0. 1. 0. 1. 0. 1. 0. 0. 1. 0. 0. 1.
 0. 0. 0. 0. 0. 1.]


In [86]:
# Obtain a validation data generator

validation_generator = get_generator(validation_features, validation_labels, batch_size=30)

In [87]:
# Evaluate the model

print(model.evaluate(validation_generator))

[0.0002040407562162727, 1.0]
