<a href="https://colab.research.google.com/github/spencer18001/machine-learning-zoomcamp/blob/main/08/hw_08.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
%%capture
!pip install tensorflow=2.17.1
!wget https://github.com/SVizor42/ML_Zoomcamp/releases/download/straight-curly-data/data.zip
!unzip data.zip

In [5]:
import numpy as np
import tensorflow as tf

SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)


In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(200, 200, 3)),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(
    optimizer=tf.keras.optimizers.SGD(learning_rate=0.002, momentum=0.8),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.summary()

In [7]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    'data/train',
    target_size=(200, 200),
    batch_size=20,
    class_mode='binary',
    shuffle=True
)

test_generator = test_datagen.flow_from_directory(
    'data/test',
    target_size=(200, 200),
    batch_size=20,
    class_mode='binary',
    shuffle=True
)


Found 800 images belonging to 2 classes.
Found 201 images belonging to 2 classes.


In [8]:
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=test_generator
)


Epoch 1/10


  self._warn_if_super_not_called()


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 1s/step - accuracy: 0.5703 - loss: 0.7064 - val_accuracy: 0.6219 - val_loss: 0.6334
Epoch 2/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 997ms/step - accuracy: 0.6693 - loss: 0.6066 - val_accuracy: 0.6318 - val_loss: 0.6238
Epoch 3/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 1s/step - accuracy: 0.6712 - loss: 0.6210 - val_accuracy: 0.6070 - val_loss: 0.6468
Epoch 4/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 963ms/step - accuracy: 0.7215 - loss: 0.5619 - val_accuracy: 0.6418 - val_loss: 0.6226
Epoch 5/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 959ms/step - accuracy: 0.6785 - loss: 0.5462 - val_accuracy: 0.6716 - val_loss: 0.6109
Epoch 6/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 979ms/step - accuracy: 0.7532 - loss: 0.5071 - val_accuracy: 0.6517 - val_loss: 0.6105
Epoch 7/10
[1m40/40[0m [32m━━━━━━━━━

In [9]:
import numpy as np

# 從 history 物件提取每個 epoch 的訓練準確率
train_accuracy = history.history['accuracy']

# 計算中位數
median_train_accuracy = np.median(train_accuracy)
print(f"Median training accuracy: {median_train_accuracy}")


Median training accuracy: 0.7193749845027924


In [10]:
# 從 history 物件提取每個 epoch 的訓練損失
train_loss = history.history['loss']

# 計算標準差
std_train_loss = np.std(train_loss)
print(f"Standard deviation of training loss: {std_train_loss}")


Standard deviation of training loss: 0.06906639938262656


In [11]:
augmented_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=50,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

augmented_train_generator = augmented_datagen.flow_from_directory(
    'data/train',
    target_size=(200, 200),
    batch_size=20,
    class_mode='binary',
    shuffle=True
)


Found 800 images belonging to 2 classes.


In [12]:
history_aug = model.fit(
    augmented_train_generator,
    epochs=10,
    validation_data=test_generator
)


Epoch 1/10


  self._warn_if_super_not_called()


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 1s/step - accuracy: 0.6462 - loss: 0.6379 - val_accuracy: 0.7114 - val_loss: 0.5738
Epoch 2/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 1s/step - accuracy: 0.6462 - loss: 0.6309 - val_accuracy: 0.6965 - val_loss: 0.5859
Epoch 3/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 1s/step - accuracy: 0.6977 - loss: 0.5779 - val_accuracy: 0.6866 - val_loss: 0.5695
Epoch 4/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 1s/step - accuracy: 0.6537 - loss: 0.6351 - val_accuracy: 0.6816 - val_loss: 0.5547
Epoch 5/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 1s/step - accuracy: 0.6974 - loss: 0.5744 - val_accuracy: 0.6965 - val_loss: 0.5523
Epoch 6/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 1s/step - accuracy: 0.6839 - loss: 0.5845 - val_accuracy: 0.7065 - val_loss: 0.5604
Epoch 7/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━

In [13]:
# 從 history_aug 物件提取每個 epoch 的測試損失
test_loss = history_aug.history['val_loss']

# 計算平均值
mean_test_loss = np.mean(test_loss)
print(f"Mean test loss: {mean_test_loss}")


Mean test loss: 0.563255661725998


In [14]:
# 從 history_aug 物件提取每個 epoch 的測試準確率
test_accuracy = history_aug.history['val_accuracy']

# 計算最後 5 個 epoch 的平均值
average_last_5_test_accuracy = np.mean(test_accuracy[-5:])
print(f"Average test accuracy for the last 5 epochs: {average_last_5_test_accuracy}")


Average test accuracy for the last 5 epochs: 0.7134328484535217
