In [1]:
import zipfile
import os
import numpy as np
from sklearn.decomposition import PCA
from keras.preprocessing.image import load_img, img_to_array
from keras.models import load_model
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score


In [2]:
# Unzip the archive
local_zip = 'D:/Code/python/mat3533/practice08/data/cats_and_dogs_filtered.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall()
zip_ref.close()

In [3]:
base_dir = 'D:/Code/python/mat3533/practice08/exercise/ex01/cats_and_dogs_filtered'
# Change the base_dir to where you put dataset
print("Contents of base directory:")
print(os.listdir(base_dir))
print("\nContents of train directory:")
print(os.listdir(f'{base_dir}\\train'))
print("\nContents of validation directory:")
print(os.listdir(f'{base_dir}\\validation'))

Contents of base directory:
['train', 'validation', 'vectorize.py']

Contents of train directory:
['cats', 'dogs']

Contents of validation directory:
['cats', 'dogs']


In [4]:
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
# Directory with training cat/dog pictures
train_cats_dir = os.path.join(train_dir, 'cats')
train_dogs_dir = os.path.join(train_dir, 'dogs')
# Directory with validation cat/dog pictures
validation_cats_dir = os.path.join(validation_dir, 'cats')
validation_dogs_dir = os.path.join(validation_dir, 'dogs')
print("\nContents of train directory:")
print(os.listdir(f'{base_dir}\\train'))
print("\nContents of validation directory:")
print(os.listdir(f'{base_dir}\\validation'))


Contents of train directory:
['cats', 'dogs']

Contents of validation directory:
['cats', 'dogs']


In [5]:
train_cat_fnames = os.listdir( train_cats_dir )
train_dog_fnames = os.listdir( train_dogs_dir )
print(train_cat_fnames[:10])
print(train_dog_fnames[:10])
print('total training cat images :', len(os.listdir( train_cats_dir ) ))
print('total training dog images :', len(os.listdir( train_dogs_dir ) ))
print('total validation cat images :', len(os.listdir( validation_cats_dir ) ))
print('total validation dog images :', len(os.listdir( validation_dogs_dir ) ))

['cat.0.jpg', 'cat.0.npy', 'cat.1.jpg', 'cat.1.npy', 'cat.10.jpg', 'cat.10.npy', 'cat.100.jpg', 'cat.100.npy', 'cat.101.jpg', 'cat.101.npy']
['dog.0.jpg', 'dog.0.npy', 'dog.1.jpg', 'dog.1.npy', 'dog.10.jpg', 'dog.10.npy', 'dog.100.jpg', 'dog.100.npy', 'dog.101.jpg', 'dog.101.npy']
total training cat images : 2000
total training dog images : 2000
total validation cat images : 1000
total validation dog images : 1000


In [6]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# All images will be rescaled by 1./255.
train_datagen = ImageDataGenerator( rescale = 1.0/255. )
test_datagen = ImageDataGenerator( rescale = 1.0/255. )
# --------------------
# Flow training images in batches of 20 using train_datagen generator
# --------------------
train_generator = train_datagen.flow_from_directory(train_dir,
batch_size=20,
class_mode='binary',
target_size=(150, 150))

# --------------------
# Flow validation images in batches of 20 using test_datagen generator
# --------------------
validation_generator = test_datagen.flow_from_directory(validation_dir,
                                                        batch_size=20,
                                                        class_mode = 'binary',
                                                        target_size = (150, 150))

Found 2000 images belonging to 2 classes.
Found 1000 images belonging to 2 classes.


In [7]:
train_images_reshaped = []
for i in range(len(train_generator)):
    batch_images = train_generator[i][0]  # Lấy batch ảnh
    batch_images_reshaped = batch_images.reshape((-1, 150 * 150 * 3))  # Reshape từng ảnh trong batch
    train_images_reshaped.append(batch_images_reshaped)

# Gộp tất cả các ảnh thành một array
train_images_reshaped = np.concatenate(train_images_reshaped, axis=0)

print("Reshaped train images shape:", train_images_reshaped.shape)

# Chuyển tất cả các ảnh từ validation_generator
validation_images_reshaped = []
for i in range(len(validation_generator)):
    batch_images = validation_generator[i][0]  # Lấy batch ảnh
    batch_images_reshaped = batch_images.reshape((-1, 150 * 150 * 3))  # Reshape từng ảnh trong batch
    validation_images_reshaped.append(batch_images_reshaped)

# Gộp tất cả các ảnh thành một array
validation_images_reshaped = np.concatenate(validation_images_reshaped, axis=0)

print("Reshaped validation images shape:", validation_images_reshaped.shape)


Reshaped train images shape: (2000, 67500)
Reshaped validation images shape: (1000, 67500)


Giảm số chiều dữ liệu về 225 chiều

In [29]:
pca = PCA(n_components=225)

pca.fit(train_images_reshaped)

train_images_pca = pca.transform(train_images_reshaped)
validation_images_pca = pca.transform(validation_images_reshaped)

print("Shape of train images after PCA:", train_images_pca.shape)
print("Shape of validation images after PCA:", validation_images_pca.shape)


Shape of train images after PCA: (2000, 225)
Shape of validation images after PCA: (1000, 225)


In [9]:
train_labels = train_generator.labels
val_labels = validation_generator.labels

Huấn luyện mô hình ANN

In [10]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten

model = Sequential()

model.add(Flatten(input_shape = (225,)))
model.add(Dense(128,activation = 'relu'))
model.add(Dense(64,activation = 'relu'))
model.add(Dense(32,activation = 'relu'))
model.add(Dense(10,activation = 'softmax')) 

model.compile(loss = 'sparse_categorical_crossentropy', 
              optimizer = 'Adam', 
              metrics = ['accuracy'])

model.fit(train_images_pca,train_labels,epochs= 10 , validation_split = .2)

Epoch 1/10


  super().__init__(**kwargs)


[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.5015 - loss: 1.6430 - val_accuracy: 0.2825 - val_loss: 1.1202
Epoch 2/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6838 - loss: 0.5939 - val_accuracy: 0.2450 - val_loss: 1.1964
Epoch 3/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7994 - loss: 0.4651 - val_accuracy: 0.1550 - val_loss: 1.4480
Epoch 4/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8841 - loss: 0.3582 - val_accuracy: 0.2275 - val_loss: 1.4945
Epoch 5/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9284 - loss: 0.2609 - val_accuracy: 0.3625 - val_loss: 1.2649
Epoch 6/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9701 - loss: 0.1898 - val_accuracy: 0.3025 - val_loss: 1.5665
Epoch 7/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x2135524ce90>

Chạy dự đoán cho tập dữ liệu test

In [11]:
y_pred = model.predict(validation_images_pca)
y_pred = y_pred.argmax(axis = 1)

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


Độ chính xác của mô hình

In [12]:
accuracy = accuracy_score(val_labels, y_pred)
print("Accuracy:", accuracy)

precision = precision_score(val_labels, y_pred, average='macro')
print("Precision:", precision)

recall = recall_score(val_labels, y_pred, average='macro')
print("Recall:", recall)

Accuracy: 0.535
Precision: 0.5453652289777529
Recall: 0.535


Chuẩn bị dữ liệu test mới

In [31]:
from PIL import Image

# Đường dẫn tới thư mục chứa ảnh
directory = 'D:/Code/python/mat3533/practice08/data/test_dog_cat'

# Tạo một danh sách để lưu trữ các tên file ảnh
image_files = []

# Lặp qua các số từ 1 đến 20 để tìm tên ảnh
for i in range(1, 21):
    # Tạo tên file ảnh dựa trên số thứ tự
    filename = f'pic{i}.jpg'
    # Kiểm tra xem file có tồn tại trong thư mục không
    if os.path.exists(os.path.join(directory, filename)):
        # Nếu tồn tại, thêm vào danh sách
        image_files.append(filename)

# Khởi tạo danh sách để lưu trữ dữ liệu hình ảnh
test_new_images = []

# Lặp qua các tên file ảnh đã tìm được
for filename in image_files:
    # Đọc ảnh từ file
    img = Image.open(os.path.join(directory, filename))
    # Resize ảnh về kích thước cố định, ví dụ (100, 100)
    img = img.resize((150, 150))
    # Chuyển ảnh thành mảng numpy
    img_array = np.array(img)
    # Thêm mảng numpy này vào danh sách dữ liệu
    test_new_images.append(img_array)

# Chuyển danh sách dữ liệu thành mảng numpy
test_new_images = np.array(test_new_images)

# Giảm số chiều về 225 chiều
test_new_images = test_new_images.reshape(20, -1)
test_new_images_pca = pca.transform(test_new_images)

# print(test_new_images.shape)
test_new_images_pca.shape

(20, 225)

Chạy dự đoán dữ liệu test mới

In [32]:
y_pred_new = model.predict(test_new_images_pca)
y_pred_new = y_pred_new.argmax(axis = 1)
y_pred_new

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step


array([1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1],
      dtype=int64)