# Homework 8

In [9]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

### Prepare the data

In [43]:
# Define the function to load images

from tensorflow.keras.preprocessing.image import ImageDataGenerator

def setup_img(img_dir, target_size, batch_size, shuffle):
    
    # Create image data generator instance
    generator = ImageDataGenerator(rescale=1./255)
    
    # Create the batches of images
    img_ds = generator.flow_from_directory(
        img_dir, 
        target_size=target_size,
        batch_size= batch_size,
        shuffle=shuffle,
        class_mode='binary'
    )
    
    return img_ds

# Create the training dataset
train_ds = setup_img('train', (150, 150), 20, True)

# Create the testing dataset
test_ds = setup_img('test', (150, 150), 20, True)


Found 1594 images belonging to 2 classes.
Found 394 images belonging to 2 classes.


### Create Model

In [31]:
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense

# Create the model
def conv_model(input_shape):
    
    # Create input image
    inputs = tf.keras.Input(shape=input_shape)
    
    # First layer of Conv2D
    x = Conv2D(
        filters=32,
        kernel_size=(3,3),
        activation='relu',
        input_shape=input_shape
    )(inputs)
    
    # Maxpooling layer
    x = MaxPool2D(
        pool_size=(2, 2)
    )(x)
    
    # Flatten layer
    x = Flatten()(x)
    
    # Dense layer
    x = Dense(64, activation='relu')(x)
    
    # Output layer
    output_layer = Dense(1, activation='sigmoid')(x)
    
    # Compile the model
    model = tf.keras.Model(inputs=inputs, outputs=output_layer, name='ml_zoomcamp_model')
    
    return model

In [33]:
model = conv_model((150, 150, 3))

# Find the number of parameters
model.summary()

Model: "ml_zoomcamp_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 150, 150, 3)]     0         
                                                                 
 conv2d_2 (Conv2D)           (None, 148, 148, 32)      896       
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 74, 74, 32)       0         
 2D)                                                             
                                                                 
 flatten_2 (Flatten)         (None, 175232)            0         
                                                                 
 dense_4 (Dense)             (None, 64)                11214912  
                                                                 
 dense_5 (Dense)             (None, 1)                 65        
                                                 

### Train the Model

In [63]:
# Define the training function

def train_model(model, optimizer, loss, metrics, train_ds, batch_size, epochs, with_validation=False, test_ds=None):
    
    # Compile model with the correct loss, optimizer, and metric
    model.compile(
        loss=loss,
        optimizer=optimizer,
        metrics=metrics
    )
    
    # Train with validation
    if with_validation == True:
        history = model.fit(train_ds, epochs=epochs, validation_data=test_ds)
    
    # Train the model with no validation
    else:
        history = model.fit(train_ds, epochs=epochs)
    
    return history
    

In [50]:
# Train the model
history = train_model(
    model=model,
    optimizer=tf.keras.optimizers.SGD(lr=0.002, momentum=0.8),
    loss=tf.keras.losses.BinaryCrossentropy(),
    metrics=['accuracy'],
    train_ds=train_ds,
    batch_size=20,
    epochs=10  
)

Epoch 1/10


2022-11-18 01:57:40.424710: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8100
2022-11-18 01:57:41.068578: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-11-18 01:57:41.069578: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-11-18 01:57:41.069618: W tensorflow/stream_executor/gpu/asm_compiler.cc:80] Couldn't get ptxas version string: INTERNAL: Couldn't invoke ptxas --version
2022-11-18 01:57:41.070558: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-11-18 01:57:41.070667: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] INTERNAL: Failed to launch ptxas
Relying on driver to perform ptx compilation. 
Modify $PATH to customize ptxas location.
This message will be only logged once.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### Q3, Q4

In [58]:
# Save the loss and accuracy into two variables; loss, 
loss = history.history['loss']
acc = history.history['accuracy']

print('Median training accuracy: {}'.format(np.median(acc)))
print('Standard deviation of loss: {}'.format(np.std(loss)))

Median training accuracy: 0.8877038955688477
Standard deviation of loss: 0.12980510238129245


### Data Augmentation

In [59]:
# Create the data augmentation function
def augment_img(img_dir, target_size, batch_size, shuffle, params):
    
    # Create image data generator instance
    generator = ImageDataGenerator(
        rescale=1./255,
        rotation_range=params['rotation_range'],
        width_shift_range=params['width_shift_range'],
        height_shift_range=params['height_shift_range'],
        shear_range=params['shear_range'],
        zoom_range=params['zoom_range'],
        horizontal_flip=params['horizontal_flip'],
        fill_mode=params['fill_mode']
    )
    
    # Create the batches of images
    img_ds = generator.flow_from_directory(
        img_dir, 
        target_size=target_size,
        batch_size= batch_size,
        shuffle=shuffle,
        class_mode='binary'
    )
    
    return img_ds

In [61]:
# Create the data augmentation parameters
params = {
    'rotation_range': 40,
    'width_shift_range': 0.2,
    'height_shift_range': 0.2,
    'shear_range': 0.2,
    'zoom_range': 0.2,
    'horizontal_flip': True,
    'fill_mode': 'nearest'
}

# Create the data augmentation dataset
train_aug_ds = augment_img(
    img_dir='train',
    target_size=(150, 150),
    batch_size=20,
    shuffle=True,
    params=params
)

Found 1594 images belonging to 2 classes.


In [64]:
# Train the model with the augmented training data
history_aug = train_model(
    model=model,
    optimizer=tf.keras.optimizers.SGD(lr=0.002, momentum=0.8),
    loss=tf.keras.losses.BinaryCrossentropy(),
    metrics=['accuracy'],
    train_ds=train_aug_ds,
    batch_size=20,
    epochs=10,
    with_validation=True,
    test_ds=test_ds
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### Q5, Q6

In [71]:
# Save the losses and accuracy to variables
loss_aug = history_aug.history['loss']
test_loss_aug = history_aug.history['val_loss']
acc_aug = history_aug.history['accuracy']
test_acc_aug = history_aug.history['val_accuracy']

print("Mean test loss for all epochs: {}".format(np.mean(test_loss_aug)))
print("Mean test loss for last 5 epochs: {}".format(np.mean(test_acc_aug[6:])))

Mean test loss for all epochs: 0.46223168075084686
Mean test loss for last 5 epochs: 0.7899746298789978
