In [21]:
import zipfile
import os
import numpy as np
from sklearn.decomposition import PCA
from keras.preprocessing.image import load_img, img_to_array
from keras.models import load_model
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score


In [22]:
# Unzip the archive
local_zip = 'D:/Code/python/mat3533/practice08/data/cats_and_dogs_filtered.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall()
zip_ref.close()

In [23]:
base_dir = 'D:/Code/python/mat3533/practice08/exercise/ex01/cats_and_dogs_filtered'
# Change the base_dir to where you put dataset
print("Contents of base directory:")
print(os.listdir(base_dir))
print("\nContents of train directory:")
print(os.listdir(f'{base_dir}\\train'))
print("\nContents of validation directory:")
print(os.listdir(f'{base_dir}\\validation'))

Contents of base directory:
['train', 'validation', 'vectorize.py']

Contents of train directory:
['cats', 'dogs']

Contents of validation directory:
['cats', 'dogs']


In [24]:
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
# Directory with training cat/dog pictures
train_cats_dir = os.path.join(train_dir, 'cats')
train_dogs_dir = os.path.join(train_dir, 'dogs')
# Directory with validation cat/dog pictures
validation_cats_dir = os.path.join(validation_dir, 'cats')
validation_dogs_dir = os.path.join(validation_dir, 'dogs')
print("\nContents of train directory:")
print(os.listdir(f'{base_dir}\\train'))
print("\nContents of validation directory:")
print(os.listdir(f'{base_dir}\\validation'))


Contents of train directory:
['cats', 'dogs']

Contents of validation directory:
['cats', 'dogs']


In [27]:
train_cat_fnames = os.listdir( train_cats_dir )
train_dog_fnames = os.listdir( train_dogs_dir )
print(train_cat_fnames[:10])
print(train_dog_fnames[:10])
print('total training cat images :', len(os.listdir( train_cats_dir ) ))
print('total training dog images :', len(os.listdir( train_dogs_dir ) ))
print('total validation cat images :', len(os.listdir( validation_cats_dir ) ))
print('total validation dog images :', len(os.listdir( validation_dogs_dir ) ))

['cat.0.jpg', 'cat.0.npy', 'cat.1.jpg', 'cat.1.npy', 'cat.10.jpg', 'cat.10.npy', 'cat.100.jpg', 'cat.100.npy', 'cat.101.jpg', 'cat.101.npy']
['dog.0.jpg', 'dog.0.npy', 'dog.1.jpg', 'dog.1.npy', 'dog.10.jpg', 'dog.10.npy', 'dog.100.jpg', 'dog.100.npy', 'dog.101.jpg', 'dog.101.npy']
total training cat images : 2000
total training dog images : 2000
total validation cat images : 1000
total validation dog images : 1000


In [28]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# All images will be rescaled by 1./255.
train_datagen = ImageDataGenerator( rescale = 1.0/255. )
test_datagen = ImageDataGenerator( rescale = 1.0/255. )
# --------------------
# Flow training images in batches of 20 using train_datagen generator
# --------------------
train_generator = train_datagen.flow_from_directory(train_dir,
batch_size=20,
class_mode='binary',
target_size=(150, 150))

# --------------------
# Flow validation images in batches of 20 using test_datagen generator
# --------------------
validation_generator = test_datagen.flow_from_directory(validation_dir,
                                                        batch_size=20,
                                                        class_mode = 'binary',
                                                        target_size = (150, 150))

Found 2000 images belonging to 2 classes.
Found 1000 images belonging to 2 classes.


In [49]:
train_images_reshaped = []
for i in range(len(train_generator)):
    batch_images = train_generator[i][0]  # Lấy batch ảnh
    batch_images_reshaped = batch_images.reshape((-1, 150 * 150 * 3))  # Reshape từng ảnh trong batch
    train_images_reshaped.append(batch_images_reshaped)

# Gộp tất cả các ảnh thành một array
train_images_reshaped = np.concatenate(train_images_reshaped, axis=0)

print("Reshaped train images shape:", train_images_reshaped.shape)

# Chuyển tất cả các ảnh từ validation_generator
validation_images_reshaped = []
for i in range(len(validation_generator)):
    batch_images = validation_generator[i][0]  # Lấy batch ảnh
    batch_images_reshaped = batch_images.reshape((-1, 150 * 150 * 3))  # Reshape từng ảnh trong batch
    validation_images_reshaped.append(batch_images_reshaped)

# Gộp tất cả các ảnh thành một array
validation_images_reshaped = np.concatenate(validation_images_reshaped, axis=0)

print("Reshaped validation images shape:", validation_images_reshaped.shape)


Reshaped train images shape: (2000, 67500)
Reshaped validation images shape: (1000, 67500)


In [50]:
# Khởi tạo PCA với số chiều mong muốn
pca = PCA(n_components=225)

# Fit PCA cho dữ liệu huấn luyện
pca.fit(train_images_reshaped)

# Áp dụng PCA cho dữ liệu huấn luyện và dữ liệu validation
train_images_pca = pca.transform(train_images_reshaped)
validation_images_pca = pca.transform(validation_images_reshaped)

print("Shape of train images after PCA:", train_images_pca.shape)
print("Shape of validation images after PCA:", validation_images_pca.shape)


Shape of train images after PCA: (2000, 225)
Shape of validation images after PCA: (1000, 225)


In [51]:
train_labels = train_generator.labels
val_labels = validation_generator.labels

In [53]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten

model = Sequential()

model.add(Flatten(input_shape = (225,)))
model.add(Dense(128,activation = 'relu'))
model.add(Dense(64,activation = 'relu'))
model.add(Dense(32,activation = 'relu'))
model.add(Dense(10,activation = 'softmax')) 

model.compile(loss = 'sparse_categorical_crossentropy', 
              optimizer = 'Adam', 
              metrics = ['accuracy'])

model.fit(train_images_pca,train_labels,epochs= 10 , validation_split = .2)

Epoch 1/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.4969 - loss: 1.6418 - val_accuracy: 0.0725 - val_loss: 1.8923
Epoch 2/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7245 - loss: 0.5844 - val_accuracy: 0.3225 - val_loss: 1.1473
Epoch 3/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8417 - loss: 0.4207 - val_accuracy: 0.3450 - val_loss: 1.2352
Epoch 4/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9109 - loss: 0.3044 - val_accuracy: 0.3625 - val_loss: 1.2951
Epoch 5/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9580 - loss: 0.2182 - val_accuracy: 0.5175 - val_loss: 1.0348
Epoch 6/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9665 - loss: 0.1628 - val_accuracy: 0.2550 - val_loss: 2.0962
Epoch 7/10
[1m50/50[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x17fac5ad390>

In [56]:
y_pred = model.predict(validation_images_pca)
y_pred = y_pred.argmax(axis = 1)

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


In [57]:
accuracy = accuracy_score(val_labels, y_pred)
print("Accuracy:", accuracy)

precision = precision_score(val_labels, y_pred, average='macro')
print("Precision:", precision)

recall = recall_score(val_labels, y_pred, average='macro')
print("Recall:", recall)

Accuracy: 0.501
Precision: 0.5012539184952978
Recall: 0.501
