In [2]:
import os
import shutil

# Define the source and target directories
source_dirs = {
    "train": {
        "blast": ["_LeafBlast", "Leaf Blast", "leaf_blast"],
        "blight": ["Bacterial leaf blight", "Bacterial Leaf Blight2", "Bacterialblight", "Bacterialblight1", "bacterial_leaf_blight"],
        "brown_spot": ["_BrownSpot", "Brown spot", "Brown Spot2", "Brownspot", "Brownspot1", "brown_spot"],
        "healthy": ["_Healthy", "Healthy Rice Leaf", "healthy"]
    },
    "test": {
        "blast": ["test/leaf_blast"],
        "blight": ["test/bacterial_leaf_blight"],
        "brown_spot": ["test/brown_spot"],
        "healthy": ["test/healthy"]
    }
}

target_base_dir = "dataset"

# Create target directories
for dataset_type in source_dirs:
    for category in source_dirs[dataset_type]:
        target_dir = os.path.join(target_base_dir, dataset_type, category)
        os.makedirs(target_dir, exist_ok=True)

# Move files to the target directories
for dataset_type, categories in source_dirs.items():
    for category, dirs in categories.items():
        target_dir = os.path.join(target_base_dir, dataset_type, category)
        for dir in dirs:
            if os.path.exists(dir):
                for file_name in os.listdir(dir):
                    file_path = os.path.join(dir, file_name)
                    target_file_path = os.path.join(target_dir, file_name)
                    if os.path.isfile(file_path):
                        if not os.path.exists(target_file_path):
                            shutil.move(file_path, target_dir)
                        else:
                            print(f"Duplicate file found and skipped: {file_path}")

# Remove empty directories
for dataset_type, categories in source_dirs.items():
    for dirs in categories.values():
        for dir in dirs:
            if os.path.exists(dir) and not os.listdir(dir):
                os.rmdir(dir)

print("Dataset organized successfully.")

Duplicate file found and skipped: Bacterialblight1/BACTERAILBLIGHT4_132.JPG
Duplicate file found and skipped: Bacterialblight1/BACTERAILBLIGHT4_126.jpg
Duplicate file found and skipped: Bacterialblight1/BACTERIALBLIGHT1_252.jpg
Duplicate file found and skipped: Bacterialblight1/BACTERIALBLIGHT1_246.jpg
Duplicate file found and skipped: Bacterialblight1/BACTERIALBLIGHT1_093.jpg
Duplicate file found and skipped: Bacterialblight1/BACTERIALBLIGHT2_150.jpg
Duplicate file found and skipped: Bacterialblight1/BACTERAILBLIGHT3_199.jpg
Duplicate file found and skipped: Bacterialblight1/BACTERIALBLIGHT1_087.jpg
Duplicate file found and skipped: Bacterialblight1/BACTERIALBLIGHT2_144.JPG
Duplicate file found and skipped: Bacterialblight1/BACTERAILBLIGHT5_123.jpg
Duplicate file found and skipped: Bacterialblight1/BACTERIALBLIGHT2_178.jpg
Duplicate file found and skipped: Bacterialblight1/BACTERAILBLIGHT5_137.JPG
Duplicate file found and skipped: Bacterialblight1/BACTERIALBLIGHT_131.jpg
Duplicate fil

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import os
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
import json  # Để lưu lịch sử huấn luyện

# Hàm tạo mô hình
def create_model():
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    for layer in base_model.layers:
        layer.trainable = False  # Đóng băng các lớp pre-trained
    x = Flatten()(base_model.output)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)
    predictions = Dense(4, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    return model

# Chuẩn bị dữ liệu
def prepare_data():
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        horizontal_flip=True,
        zoom_range=0.2,
        validation_split=0.2
    )
    test_datagen = ImageDataGenerator(rescale=1./255)

    train_generator = train_datagen.flow_from_directory(
        'dataset/train',
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical',
        subset='training'
    )
    validation_generator = train_datagen.flow_from_directory(
        'dataset/train',
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical',
        subset='validation'
    )
    test_generator = test_datagen.flow_from_directory(
        'dataset/test',
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical'
    )
    return train_generator, validation_generator, test_generator

# Huấn luyện mô hình
def train_model():
    model = create_model()
    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    train_generator, validation_generator, test_generator = prepare_data()

    # Cài đặt callback
    checkpoint = ModelCheckpoint('best_model.h5', monitor='val_accuracy', save_best_only=True, mode='max')
    early_stopping = EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)

    # Huấn luyện mô hình
    print("Bắt đầu quá trình huấn luyện...")
    history = model.fit(
        train_generator,
        steps_per_epoch=train_generator.samples // 32,
        validation_data=validation_generator,
        validation_steps=validation_generator.samples // 32,
        epochs=25,
        callbacks=[checkpoint, early_stopping]
    )

    # Lưu lịch sử huấn luyện
    with open('training_history.json', 'w') as f:
        json.dump(history.history, f)
    
    test_loss, test_accuracy = model.evaluate(test_generator)
    print(f"Test accuracy: {test_accuracy:.2%}")
    return model, history, test_generator

# Đánh giá mô hình
def evaluate_model(model, test_generator):
    test_generator.reset()
    Y_pred = model.predict(test_generator)
    y_pred = np.argmax(Y_pred, axis=1)

    print('Classification Report')
    print(classification_report(test_generator.classes, y_pred, target_names=test_generator.class_indices.keys()))
    print('Confusion Matrix')
    print(confusion_matrix(test_generator.classes, y_pred))

# Dự đoán bệnh từ ảnh
def predict_disease(model, image_path):
    try:
        img = tf.keras.preprocessing.image.load_img(image_path, target_size=(224, 224))
        img_array = tf.keras.preprocessing.image.img_to_array(img)
        img_array = np.expand_dims(img_array, 0)
        img_array /= 255.
        prediction = model.predict(img_array)
        classes = ['Blast', 'Blight', 'Brown Spot', 'Healthy']
        predicted_class = classes[np.argmax(prediction)]
        confidence = np.max(prediction)
        return predicted_class, confidence
    except Exception as e:
        print(f"Error during prediction: {e}")
        return None, None

if __name__ == "__main__":
    try:
        print("Khởi chạy chương trình...")
        model, history, test_generator = train_model()
        evaluate_model(model, test_generator)

        # Lưu mô hình
        model.save('rice_disease_model.h5')
        print("Mô hình đã được lưu thành công dưới dạng 'rice_disease_model.h5'.")

        # Dự đoán mẫu
        sample_image = 'sample_image.jpg'  # Đường dẫn đến ảnh mẫu
        predicted_class, confidence = predict_disease(model, sample_image)
        if predicted_class:
            print(f"Predicted Class: {predicted_class}, Confidence: {confidence:.2%}")
        else:
            print("Không thể dự đoán ảnh mẫu.")
    except Exception as e:
        print(f"Lỗi xảy ra: {e}")

In [1]:
import os

dataset_dir = '/Users/quanglekim/Downloads/Zipped/dataset/train'

for category in os.listdir(dataset_dir):
    category_path = os.path.join(dataset_dir, category)
    if os.path.isdir(category_path):
        print(f"{category}: {len(os.listdir(category_path))} images")

healthy: 2573 images
brown_spot: 3729 images
blast: 2527 images
blight: 2841 images


In [1]:
import os
import random
import shutil

dataset_dir = '/Users/quanglekim/Downloads/Zipped/dataset/train'
output_dir = '/Users/quanglekim/Downloads/Zipped/dataset/balanced_train'

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Get the minimum number of images in any class
min_images = min(len(os.listdir(os.path.join(dataset_dir, category))) for category in os.listdir(dataset_dir) if os.path.isdir(os.path.join(dataset_dir, category)))

for category in os.listdir(dataset_dir):
    category_path = os.path.join(dataset_dir, category)
    if os.path.isdir(category_path):
        images = os.listdir(category_path)
        random.shuffle(images)
        selected_images = images[:min_images]
        
        # Create category directory in the output directory
        output_category_path = os.path.join(output_dir, category)
        os.makedirs(output_category_path, exist_ok=True)
        
        for image in selected_images:
            src = os.path.join(category_path, image)
            dst = os.path.join(output_category_path, image)
            shutil.copy(src, dst)
        
        print(f"{category}: {len(selected_images)} images copied to balanced dataset")

healthy: 2527 images copied to balanced dataset
brown_spot: 2527 images copied to balanced dataset
blast: 2527 images copied to balanced dataset
blight: 2527 images copied to balanced dataset


In [3]:
import os

def clean_filenames(directory):
    """Remove trailing whitespace and space before extension from filenames"""
    count = 0
    for root, dirs, files in os.walk(directory):
        for filename in files:
            if filename.endswith('.jpg') or filename.endswith('.JPG'):
                # Remove space before extension and any trailing spaces
                new_filename = filename.replace(' .jpg', '.jpg').replace(' .JPG', '.JPG').strip()
                if new_filename != filename:
                    old_path = os.path.join(root, filename)
                    new_path = os.path.join(root, new_filename)
                    os.rename(old_path, new_path)
                    print(f"Renamed: {filename} -> {new_filename}")
                    count += 1
    print(f"\nTotal files renamed: {count}")

# Run the function
clean_filenames('dataset')

Renamed: shape 540 .jpg -> shape 540.jpg
Renamed: shape 794 .jpg -> shape 794.jpg
Renamed: shape 1308 .jpg -> shape 1308.jpg
Renamed: shape 48 .jpg -> shape 48.jpg
Renamed: shape 281 .jpg -> shape 281.jpg
Renamed: shape 1164 .jpg -> shape 1164.jpg
Renamed: shape 901 .jpg -> shape 901.jpg
Renamed: shape 452 .jpg -> shape 452.jpg
Renamed: shape 517 .jpg -> shape 517.jpg
Renamed: shape 147 .jpg -> shape 147.jpg
Renamed: shape 1021 .jpg -> shape 1021.jpg
Renamed: shape 844 .jpg -> shape 844.jpg
Renamed: shape 1471 .jpg -> shape 1471.jpg
Renamed: shape 628 .jpg -> shape 628.jpg
Renamed: shape 782 .jpg -> shape 782.jpg
Renamed: shape 278 .jpg -> shape 278.jpg
Renamed: shape 297 .jpg -> shape 297.jpg
Renamed: shape 444 .jpg -> shape 444.jpg
Renamed: shape 917 .jpg -> shape 917.jpg
Renamed: shape 1172 .jpg -> shape 1172.jpg
Renamed: shape 1488 .jpg -> shape 1488.jpg
Renamed: shape 1467 .jpg -> shape 1467.jpg
Renamed: shape 852 .jpg -> shape 852.jpg
Renamed: shape 1037 .jpg -> shape 1037.jpg
Re

In [7]:
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix

# Load the trained model
model = load_model('improved_rice_disease_model.keras')

# Prepare test data
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
    'dataset/test',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    shuffle=False
)

# Evaluate model
print("Evaluating the model...")
test_loss, test_accuracy = model.evaluate(test_generator)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

# Classification report
y_true = test_generator.classes
y_pred = np.argmax(model.predict(test_generator), axis=1)
print("\nClassification Report:")
print(classification_report(
    y_true, 
    y_pred, 
    target_names=list(test_generator.class_indices.keys())
))

# Confusion matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_true, y_pred))

Found 1509 images belonging to 4 classes.


  saveable.load_own_variables(weights_store.get(inner_path))
  self._warn_if_super_not_called()


Evaluating the model...


I0000 00:00:1733562988.021954 4854212 service.cc:148] XLA service 0x32e1ee050 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1733562988.021985 4854212 service.cc:156]   StreamExecutor device (0): Host, Default Version
2024-12-07 16:16:28.047303: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1733562988.248348 4854212 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m148s[0m 3s/step - accuracy: 0.7047 - loss: 1.7025
Test Accuracy: 77.07%
[1m 5/48[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m2:11[0m 3s/step

KeyboardInterrupt: 