In [2]:
!wget https://github.com/SVizor42/ML_Zoomcamp/releases/download/bee-wasp-data/data.zip

--2023-11-19 13:31:22--  https://github.com/SVizor42/ML_Zoomcamp/releases/download/bee-wasp-data/data.zip
Resolving github.com (github.com)... 140.82.121.3
Connecting to github.com (github.com)|140.82.121.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/405934815/e6c56cb7-dce1-463f-865b-01e913c38485?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20231119%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231119T133122Z&X-Amz-Expires=300&X-Amz-Signature=33a10121417880150ce8a897d744ffa41791f2a7bf085eedf10fa7409a0ff23c&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=405934815&response-content-disposition=attachment%3B%20filename%3Ddata.zip&response-content-type=application%2Foctet-stream [following]
--2023-11-19 13:31:22--  https://objects.githubusercontent.com/github-production-release-asset-2e65be/405934815/e6c56cb7-dce1-463f-865b-01e913c38485?X-Amz-Algor

In [4]:
!unzip -qq data.zip

In [48]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator

## Model architecture

In [49]:
# Define the input layer
input_layer = Input(shape=(150, 150, 3))

# Convolutional layer
conv_layer = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(input_layer)

# MaxPooling layer
maxpool_layer = MaxPooling2D(pool_size=(2, 2))(conv_layer)

# Flatten layer
flatten_layer = Flatten()(maxpool_layer)

# Dense layer with 64 neurons and 'relu' activation
dense_layer = Dense(units=64, activation='relu')(flatten_layer)

# Output layer with 1 neuron and 'sigmoid' activation for binary classification
output_layer = Dense(units=1, activation='sigmoid')(dense_layer)

# Create the model
model = Model(inputs=input_layer, outputs=output_layer)

# Set the optimizer as SGD with specified parameters
sgd_optimizer = SGD(lr=0.002, momentum=0.8)
model.compile(optimizer=sgd_optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Print the model summary
model.summary()



Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 150, 150, 3)]     0         
                                                                 
 conv2d_2 (Conv2D)           (None, 148, 148, 32)      896       
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 74, 74, 32)        0         
 g2D)                                                            
                                                                 
 flatten_2 (Flatten)         (None, 175232)            0         
                                                                 
 dense_4 (Dense)             (None, 64)                11214912  
                                                                 
 dense_5 (Dense)             (None, 1)                 65        
                                                           

In [50]:
data_gen = ImageDataGenerator(rescale=1./255)

In [51]:
train_path = 'data/train/'
test_path = 'data/test/'

train_generator = data_gen.flow_from_directory(
    train_path,
    target_size=(150, 150),
    batch_size=20,
    class_mode='binary',
    shuffle=True
)

test_generator = data_gen.flow_from_directory(
    test_path,
    target_size=(150, 150),
    batch_size=20,
    class_mode='binary',
    shuffle=True
)

Found 3677 images belonging to 2 classes.
Found 918 images belonging to 2 classes.


In [52]:
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=test_generator
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Question 3

In [53]:
import numpy as np

In [54]:
training_accuracy = history.history['accuracy']
np.median(training_accuracy)

0.8199619352817535

## Question 4

In [55]:
training_loss = history.history['loss']
np.std(training_loss)

0.20138559953305632

## Data Augmentation

In [56]:
data_gen2 = ImageDataGenerator(
    rescale=1./255,             # Normalize pixel values to be between 0 and 1
    rotation_range=50,          # Degree range for random rotations
    width_shift_range=0.1,      # Fraction of total width for random horizontal shift
    height_shift_range=0.1,     # Fraction of total height for random vertical shift
    zoom_range=0.1,             # Random zoom range
    horizontal_flip=True,       # Randomly flip images horizontally
    fill_mode='nearest'         # Strategy for filling in newly created pixels
)

train_generator2 = data_gen2.flow_from_directory(
    train_path,
    target_size=(150, 150),
    batch_size=20,
    class_mode='binary',
    shuffle=True
)

Found 3677 images belonging to 2 classes.


In [57]:
history2 = model.fit(
    train_generator2,
    epochs=10,
    validation_data=test_generator
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Question 5

In [58]:
np.mean(history2.history['val_loss'])

0.5167007327079773

## Question 6

In [59]:
np.mean(history2.history['val_accuracy'][-5:])

0.7799564242362976