### Bước 1: Cài đặt các thư viện cần thiết

In [None]:
!pip install tensorflow
!pip install split-folders
!pip install albumentations
!pip install tqdm
!pip install opencv-python-headless
!pip install torch torchvision torchaudio

import os
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm
import cv2
import datetime
import platform

### Bước 2: Đặt đường dẫn home chứa folder project hiện tại

In [None]:
import platform

if platform.system() == 'Linux':
    home_dir = '/content/drive/MyDrive/ClassificationofMangoDiseases'
elif platform.system() == 'Windows':
    home_dir = 'D:\Projects\ClassificationofMangoDiseases'
else:
    raise ValueError('Unsupported platform')

print(f'Home directory: {home_dir}')

### Bước 3: Kết nối với Google Drive nếu đang chạy trên Google Colab

In [None]:
if platform.system() == 'Linux':
    from google.colab import drive
    drive.mount('/content/drive')

### Bước 4: Đặt đường dẫn tới tập dữ liệu

In [None]:
# Đường dẫn tới tập dữ liệu đã chọn (gốc hoặc đã remove background)
data_dir = os.path.join(home_dir, 'data/MangoFruitDDS/SenMangoFruitDDS_bgremoved')  # Hoặc 'SenMangoFruitDDS_original'
output_dir = os.path.join(home_dir, 'data/processed')

print(f'Data directory: {data_dir}')
print(f'Output directory: {output_dir}')

### Bước 5: Tiền xử lý dữ liệu và phân chia tập dữ liệu

Sử dụng thư viện `split-folders` để phân chia tập dữ liệu thành các tập `train`, `valid`, và `test`.

In [None]:
# Xóa thư mục đầu ra nếu đã tồn tại
if os.path.exists(output_dir):
    import shutil
    shutil.rmtree(output_dir)

# Phân chia dữ liệu
splitfolders.ratio(data_dir, output=output_dir, seed=42, ratio=(.7, .2, .1), group_prefix=None)

### Bước 6: Hiển thị thông tin thư mục sau khi phân chia

In [None]:
# Kiểm tra các thư mục sau khi phân chia
for root, dirs, files in os.walk(output_dir):
    level = root.replace(output_dir, '').count(os.sep)
    indent = ' ' * 4 * (level)
    print('{}{}/'.format(indent, os.path.basename(root)))
    subindent = ' ' * 4 * (level + 1)
    for f in files:
        print('{}{}'.format(subindent, f))

### Bước 7: Xác nhận kết quả

Kiểm tra lại thư mục đã được phân chia đúng cách.

In [None]:
import matplotlib.pyplot as plt
import random
from PIL import Image

# Hiển thị một số hình ảnh từ tập train
train_dir = os.path.join(output_dir, 'train')
class_names = os.listdir(train_dir)
print(f'Classes: {class_names}')

# Chọn ngẫu nhiên một lớp
random_class = random.choice(class_names)
print(f'Random class: {random_class}')

# Chọn ngẫu nhiên một hình ảnh từ lớp đó
random_image_path = os.path.join(train_dir, random_class, random.choice(os.listdir(os.path.join(train_dir, random_class))))
print(f'Random image path: {random_image_path}')

# Hiển thị hình ảnh
image = Image.open(random_image_path)
plt.imshow(image)
plt.axis('off')
plt.show()

### Bước 8: Thực hiện tăng cường dữ liệu

In [None]:
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm
import cv2

# Định nghĩa các phép tăng cường dữ liệu
augmentations = A.Compose([
    A.RandomRotate90(),
    A.Flip(),
    A.Transpose(),
    A.OneOf([
        A.GaussNoise(),
        A.MultiplicativeNoise()
    ], p=0.2),
    A.OneOf([
        A.MotionBlur(p=0.2),
        A.MedianBlur(blur_limit=3, p=0.1),
        A.Blur(blur_limit=3, p=0.1),
    ], p=0.2),
    A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=20, p=0.2),
    A.OneOf([
        A.OpticalDistortion(p=0.3),
        A.GridDistortion(p=0.1),
        A.PiecewiseAffine(p=0.3),
    ], p=0.2),
    A.OneOf([
        A.CLAHE(clip_limit=2),
        A.Sharpen(),
        A.Emboss(),
        A.RandomBrightnessContrast(),
    ], p=0.3),
    A.HueSaturationValue(p=0.3),
])

# Tạo thư mục đầu ra cho dữ liệu tăng cường
augmented_dir = os.path.join(home_dir, 'data/augmented')
if not os.path.exists(augmented_dir):
    os.makedirs(augmented_dir)

# Áp dụng tăng cường dữ liệu
for class_name in class_names:
    class_dir = os.path.join(train_dir, class_name)
    augmented_class_dir = os.path.join(augmented_dir, class_name)
    if not os.path.exists(augmented_class_dir):
        os.makedirs(augmented_class_dir)
    for img_name in tqdm(os.listdir(class_dir)):
        img_path = os.path.join(class_dir, img_name)
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        augmented = augmentations(image=image)
        augmented_image = augmented['image']
        augmented_image = cv2.cvtColor(augmented_image, cv2.COLOR_RGB2BGR)
        augmented_img_name = f'aug_{img_name}'
        cv2.imwrite(os.path.join(augmented_class_dir, augmented_img_name), augmented_image)

### Bước 9: Xây dựng các mô hình và huấn luyện

In [None]:
import numpy as np
import tensorflow as tf
import random
import os

# Set random seed for reproducibility
random_seed = 42
np.random.seed(random_seed)
tf.random.set_seed(random_seed)
random.seed(random_seed)

# Define common hyperparameters
batch_size = 32
epochs = 10
learning_rate = 0.001

# Define common loss function and optimizer
loss_function = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

# Define image size and input shape
img_height = 224
img_width = 224
input_shape = (img_height, img_width, 3)

# Function to create a simple CNN model
def create_cnn_model(input_shape, num_classes):
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(512, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(num_classes, activation='softmax')
    ])
    return model

# Function to create a ResNet model
def create_resnet_model(input_shape, num_classes):
    base_model = tf.keras.applications.ResNet50(
        input_shape=input_shape,
        include_top=False,
        weights='imagenet'
    )
    base_model.trainable = False

    model = tf.keras.Sequential([
        base_model,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(512, activation='relu'),
        tf.keras.layers.Dense(num_classes, activation='softmax')
    ])
    return model

# Function to create an Inception model
def create_inception_model(input_shape, num_classes):
    base_model = tf.keras.applications.InceptionV3(
        input_shape=input_shape,
        include_top=False,
        weights='imagenet'
    )
    base_model.trainable = False

    model = tf.keras.Sequential([
        base_model,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(512, activation='relu'),
        tf.keras.layers.Dense(num_classes, activation='softmax')
    ])
    return model

# Function to create a DenseNet model
def create_densenet_model(input_shape, num_classes):
    base_model = tf.keras.applications.DenseNet121(
        input_shape=input_shape,
        include_top=False,
        weights='imagenet'
    )
    base_model.trainable = False

    model = tf.keras.Sequential([
        base_model,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(512, activation='relu'),
        tf.keras.layers.Dense(num_classes, activation='softmax')
    ])
    return model

# Function to create a model using Transfer Learning
def create_transfer_learning_model(input_shape, num_classes):
    base_model = tf.keras.applications.VGG16(
        input_shape=input_shape,
        include_top=False,
        weights='imagenet'
    )
    base_model.trainable = False

    model = tf.keras.Sequential([
        base_model,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(512, activation='relu'),
        tf.keras.layers.Dense(num_classes, activation='softmax')
    ])
    return model

# Example usage:
num_classes = len(class_names)  # Adjust this according to your dataset
cnn_model = create_cnn_model(input_shape, num_classes)
resnet_model = create_resnet_model(input_shape, num_classes)
inception_model = create_inception_model(input_shape, num_classes)
densenet_model = create_densenet_model(input_shape, num_classes)
transfer_learning_model = create_transfer_learning_model(input_shape, num_classes)

# Compile the models
models = [cnn_model, resnet_model, inception_model, densenet_model, transfer_learning_model]
for model in models:
    model.compile(optimizer=optimizer, loss=loss_function, metrics=['accuracy'])

### Bước 10: Huấn luyện và lưu mô hình

In [None]:
import datetime
import os
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc, precision_recall_curve
from sklearn.preprocessing import label_binarize
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

date_time = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')

def get_model_save_path(model_name, augmented=False):
    suffix = 'Aug' if augmented else 'Ori'
    model_name_with_suffix = f"{model_name}{suffix}"
    
    if platform.system() == 'Linux':
        base_path = f'/content/drive/MyDrive/ClassificationofMangoDiseases/models/{model_name_with_suffix}/{date_time}'
    elif platform.system() == 'Windows':
        base_path = f'D:\\Projects\\ClassificationofMangoDiseases\\models\\{model_name_with_suffix}\\{date_time}'
    else:
        raise ValueError('Unsupported platform')
    
    if not os.path.exists(base_path):
        os.makedirs(base_path)
    
    return base_path

def create_optimizer():
    return tf.keras.optimizers.Adam(learning_rate=learning_rate)

def plot_history(history, base_path):
    # Plot training & validation accuracy values
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')
    plt.savefig(os.path.join(base_path, 'accuracy.png'))
    plt.close()
    
    # Plot training & validation loss values
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')
    plt.savefig(os.path.join(base_path, 'loss.png'))
    plt.close()

def save_classification_report(y_true, y_pred, class_names, results_path):
    if not os.path.exists(results_path):
        os.makedirs(results_path)
    report = classification_report(y_true, y_pred, target_names=class_names, output_dict=True)
    df = pd.DataFrame(report).transpose()
    df.to_csv(os.path.join(results_path, 'classification_report.csv'), index=True)

def save_confusion_matrix(y_true, y_pred, class_names, results_path):
    if not os.path.exists(results_path):
        os.makedirs(results_path)
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10, 7))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.savefig(os.path.join(results_path, 'confusion_matrix.png'))
    plt.close()

def save_roc_curve(y_true, y_score, results_path, num_classes):
    if not os.path.exists(results_path):
        os.makedirs(results_path)
    # Binarize the output
    y_true_binarized = label_binarize(y_true, classes=[i for i in range(num_classes)])
    
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(num_classes):
        fpr[i], tpr[i], _ = roc_curve(y_true_binarized[:, i], y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Plot ROC curve for each class
    plt.figure()
    for i in range(num_classes):
        plt.plot(fpr[i], tpr[i], lw=2, label='Class {0} (area = {1:0.2f})'.format(i, roc_auc[i]))
    
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic')
    plt.legend(loc='lower right')
    plt.savefig(os.path.join(results_path, 'roc_curve.png'))
    plt.close()

def save_precision_recall_curve(y_true, y_score, results_path, num_classes):
    if not os.path.exists(results_path):
        os.makedirs(results_path)
    y_true_binarized = label_binarize(y_true, classes=[i for i in range(num_classes)])
    
    precision = dict()
    recall = dict()
    for i in range(num_classes):
        precision[i], recall[i], _ = precision_recall_curve(y_true_binarized[:, i], y_score[:, i])

    # Plot Precision-Recall curve for each class
    plt.figure()
    for i in range(num_classes):
        plt.plot(recall[i], precision[i], lw=2, label='Class {0}'.format(i))
    
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision-Recall Curve')
    plt.legend(loc='lower right')
    plt.savefig(os.path.join(results_path, 'precision_recall_curve.png'))
    plt.close()

def evaluate_model(model, valid_generator, class_names, base_path, results_path):
    y_true = valid_generator.classes
    y_pred = model.predict(valid_generator)
    y_pred_classes = np.argmax(y_pred, axis=1)
    
    save_classification_report(y_true, y_pred_classes, class_names, results_path)
    save_confusion_matrix(y_true, y_pred_classes, class_names, results_path)
    save_roc_curve(y_true, y_pred, results_path, num_classes=len(class_names))
    save_precision_recall_curve(y_true, y_pred, results_path, num_classes=len(class_names))

# Training data
train_datagen = ImageDataGenerator(rescale=1./255)
valid_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    os.path.join(output_dir, 'train'),
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='sparse'
)

valid_generator = valid_datagen.flow_from_directory(
    os.path.join(output_dir, 'val'),
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='sparse'
)

original_models = {
    'CNNCustomOri': cnn_model,
    'ResNet50Ori': resnet_model,
    'InceptionV3Ori': inception_model,
    'DenseNet121Ori': densenet_model,
    'VGG16Ori': transfer_learning_model
}

# Evaluate on original and augmented datasets
def train_and_evaluate(models, train_generator, valid_generator, class_names, augmented=False):
    for model_name, model in models.items():
        model.compile(optimizer=create_optimizer(), loss=loss_function, metrics=['accuracy'])
        base_path = get_model_save_path(model_name, augmented)
        history = model.fit(train_generator, validation_data=valid_generator, epochs=epochs)
        model.save(os.path.join(base_path, 'model.h5'))
        plot_history(history, base_path)
        
        # Evaluate on test sets
        results1_path = os.path.join(base_path, 'results1')
        results2_path = os.path.join(base_path, 'results2')
        evaluate_model(model, valid_generator, class_names, base_path, results1_path)
        evaluate_model(model, valid_generator, class_names, base_path, results2_path)

# Train and evaluate on original dataset
train_and_evaluate(original_models, train_generator, valid_generator, class_names, augmented=False)

# Augmented training data
augmented_train_dir = os.path.join(home_dir, 'data/augmented')
train_generator_aug = train_datagen.flow_from_directory(
    augmented_train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='sparse'
)

augmented_models = {
    'CNNCustomAug': create_cnn_model(input_shape, num_classes),
    'ResNet50Aug': create_resnet_model(input_shape, num_classes),
    'InceptionV3Aug': create_inception_model(input_shape, num_classes),
    'DenseNet121Aug': create_densenet_model(input_shape, num_classes),
    'VGG16Aug': create_transfer_learning_model(input_shape, num_classes)
}

# Train and evaluate on augmented dataset
train_and_evaluate(augmented_models, train_generator_aug, valid_generator, class_names, augmented=True)

# Evaluate on 'data/MangoFruitDDS/SenMangoFruitDDS_original'
original_data_dir = os.path.join(home_dir, 'data/MangoFruitDDS/SenMangoFruitDDS_original')
original_train_generator = train_datagen.flow_from_directory(
    original_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='sparse'
)

# Train and evaluate on original dataset without augmentation
train_and_evaluate(original_models, original_train_generator, valid_generator, class_names, augmented=False)

# Train and evaluate on augmented dataset without augmentation
train_and_evaluate(augmented_models, original_train_generator, valid_generator, class_names, augmented=True)
