SET UP

In [None]:
# 1. IMPORT THƯ VIỆN CẦN THIẾT
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.applications import ResNet50, MobileNetV2
from tensorflow.keras.applications.resnet50 import preprocess_input as resnet_preprocess
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
# 2. KHÁM PHÁ DỮ LIỆU
base_dir = r'F:\tailieu\HỌC_MÁY\datasetriceleaf'  
classes = os.listdir(base_dir)
print("Các lớp bệnh:", classes)

class_counts = {}
for cls in classes:
    class_counts[cls] = len(os.listdir(os.path.join(base_dir, cls)))

# Vẽ biểu đồ phân bố lớp
plt.figure(figsize=(10, 5))
sns.barplot(x=list(class_counts.keys()), y=list(class_counts.values()))
plt.title("Phân bố ảnh theo từng lớp")
plt.xlabel("Lớp bệnh")
plt.ylabel("Số lượng ảnh")
plt.xticks(rotation=45)
plt.show()

In [None]:
# 3. TIỀN XỬ LÝ VÀ CHUẨN HÓA DỮ LIỆU
img_size = (224, 224)
batch_size = 32

# Dùng ImageDataGenerator để chia và chuẩn hóa ảnh
datagen = ImageDataGenerator(
    preprocessing_function=resnet_preprocess,
    validation_split=0.2
)

train_generator = datagen.flow_from_directory(
    base_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'
)

val_generator = datagen.flow_from_directory(
    base_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'
)

In [None]:
# 4. XÂY DỰNG MÔ HÌNH CNN CƠ BẢN
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(train_generator.num_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Huấn luyện mô hình cơ bản
history = model.fit(train_generator, validation_data=val_generator, epochs=10)

In [None]:
# 5. MÔ HÌNH PRETRAINED: RESNET50
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False

model_resnet = Sequential([
    base_model,
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(train_generator.num_classes, activation='softmax')
])

model_resnet.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
resnet_history = model_resnet.fit(train_generator, validation_data=val_generator, epochs=10)

In [None]:
# 6. ĐÁNH GIÁ MÔ HÌNH
val_generator.reset()
pred = model_resnet.predict(val_generator)
y_pred = np.argmax(pred, axis=1)
y_true = val_generator.classes

print("Classification Report:")
print(classification_report(y_true, y_pred, target_names=val_generator.class_indices.keys()))

# Vẽ confusion matrix
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=val_generator.class_indices.keys(), yticklabels=val_generator.class_indices.keys())
plt.xlabel('Dự đoán')
plt.ylabel('Thực tế')
plt.title('Confusion Matrix')
plt.show()

In [None]:
# 7. BIỂU ĐỒ ACCURACY VÀ LOSS
plt.plot(resnet_history.history['accuracy'], label='Train Acc')
plt.plot(resnet_history.history['val_accuracy'], label='Val Acc')
plt.legend()
plt.title("Accuracy theo từng epoch")
plt.show()

plt.plot(resnet_history.history['loss'], label='Train Loss')
plt.plot(resnet_history.history['val_loss'], label='Val Loss')
plt.legend()
plt.title("Loss theo từng epoch")
plt.show()