# **(Modeling & Evaluation)**

### Number of images in train, test, validation data

In [None]:
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# ✅ クラスラベルを明示的に定義
labels = ['Healthy', 'Infected']

# ✅ データを保存する辞書を作成
data = {
    'Set': [],
    'Label': [],
    'Frequency': []
}

# ✅ データセットフォルダリスト
folders = ['train', 'validation', 'test']

# ✅ 各フォルダとラベルごとに画像の数をカウント
for folder in folders:
    for label in labels:
        label_path = os.path.join(my_data_dir, folder, label)

        # フォルダが存在するか確認 (もしデータがない場合のエラー防止)
        if os.path.exists(label_path):
            num_images = len(os.listdir(label_path))
            data['Set'].append(folder)
            data['Label'].append(label)
            data['Frequency'].append(num_images)
            print(f"✅ {folder} - {label}: {num_images} images")
        else:
            print(f"⚠️ WARNING: {label_path} not found!")

# ✅ Pandas DataFrame に変換
df_freq = pd.DataFrame(data)

# ✅ クラス分布を可視化
sns.set_style("whitegrid")
plt.figure(figsize=(8, 5))
sns.barplot(data=df_freq, x='Set', y='Frequency', hue='Label')
plt.title("Class Distribution in Train, Validation, and Test Sets")  # タイトルを追加
plt.savefig(f'{file_path}/labels_distribution.png', bbox_inches='tight', dpi=150)
plt.show()


### Image Data Augmentation

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

Initialize image dat generator

In [None]:
augmented_image_data = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.10,
    height_shift_range=0.10,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode="nearest",
    rescale=1.0 / 255,
)

### Augment train dataset

In [None]:
# Load training images with augmentation
batch_size = 20  # Number of images processed in each batch
train_set = augmented_image_data.flow_from_directory(
    train_path,
    target_size=image_shape[:2],
    color_mode="rgb",
    batch_size=batch_size,
    class_mode="binary",
    shuffle=True,
)

# Print dataset information
print("Class indices:", train_set.class_indices)  # Dictionary mapping labels to indices
print("Number of classes:", len(train_set.class_indices))  # Total number of classes
print(
    "Total number of images in dataset:", train_set.samples
)  # Total number of images (before augmentation)

In [None]:
# Preprocessing the validation images: Normalize pixel values to the range [0, 1]
validation_set = ImageDataGenerator(rescale=1.0 / 255).flow_from_directory(
    val_path,
    target_size=image_shape[:2],
    color_mode="rgb",
    batch_size=batch_size,
    class_mode="binary",
    shuffle=False,
)

# Display class indices (label mapping)
print(validation_set.class_indices)

In [None]:
# Preprocessing the test images: Normalize pixel values to the range [0, 1]
test_set = ImageDataGenerator(rescale=1.0 / 255).flow_from_directory(
    test_path,
    target_size=image_shape[:2],
    color_mode="rgb",
    batch_size=batch_size,
    class_mode="binary",
    shuffle=False,
)

# Display class indices (label mapping)
print(test_set.class_indices)

Plot augmented training images

In [None]:
for _ in range(3):
    img, label = next(train_set)
    print(img.shape)  # (1,256,256,3)
    plt.imshow(img[0])
    plt.show()

Plot augmented validatation images

In [None]:
for _ in range(3):
    img, label = next(validation_set)
    print(img.shape)  # (1,256,256,3)
    plt.imshow(img[0])
    plt.show()

In [None]:
for _ in range(3):
    img, label = next(test_set)
    print(img.shape)  # (1,256,256,3)
    plt.imshow(img[0])
    plt.show()

Save class_indices

In [None]:
joblib.dump(value=train_set.class_indices, filename=f"{file_path}/class_indices.pkl")

Model Creation

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam
from sklearn.utils.class_weight import compute_class_weight

In [None]:
# ✅ モデルの入力サイズ
image_shape = (128, 128, 3)

# ✅ モデルの保存先を定義
output_path_cnn = "outputs/v1.0"
os.makedirs(output_path_cnn, exist_ok=True)  # `outputs/v1.0/` を作成


# ✅ 初期の CNN モデルを再構築
def create_tf_model():
    """
    Build a CNN model with convolution, pooling, and dropout layers.
    """
    model = Sequential(
        [
            Conv2D(
                filters=32,
                kernel_size=(3, 3),
                activation="relu",
                input_shape=image_shape,
            ),
            MaxPooling2D(pool_size=(2, 2)),
            Conv2D(filters=64, kernel_size=(3, 3), activation="relu"),
            MaxPooling2D(pool_size=(2, 2)),
            Conv2D(filters=128, kernel_size=(3, 3), activation="relu"),
            MaxPooling2D(pool_size=(2, 2)),
            Conv2D(filters=128, kernel_size=(3, 3), activation="relu"),
            MaxPooling2D(pool_size=(2, 2)),
            Flatten(),
            Dense(128, activation="relu"),
            Dropout(0.3),
            Dense(1, activation="sigmoid"),  # 2クラス分類
        ]
    )

    # ✅ `Adam` オプティマイザの学習率をそのまま使用
    model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

    return model


# ✅ `cnn_model` を作成
cnn_model = create_tf_model()

# ✅ モデルの構造を確認
cnn_model.summary()

print("✅ `cnn_model` の修正 & 再作成が完了しました！")

In [None]:
# Early stopping
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)

### モデルの学習

In [None]:
model = create_tf_model()
model.fit(
    train_set,
    epochs=25,
    steps_per_epoch=len(train_set.classes) // batch_size,
    validation_data=validation_set,
    callbacks=[early_stop],
    verbose=1,
)

In [None]:
# Save Model

output_path_cnn = "outputs/v1.0"
os.makedirs(output_path_cnn, exist_ok=True)  # ✅ `outputs/v1.0/` を作成

cnn_model.save(f"{output_path_cnn}/mildew_detector_model.keras")
print(
    f"✅ CNN モデルのトレーニングと保存が完了しました！\nモデルの保存先: {output_path_cnn}/mildew_detector_model.keras"
)

### Model Learning Curve

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# ✅ 適切な保存ディレクトリを指定
save_dir = "outputs/v1.0"  # ここを `mildew_detector_model.keras` ではなくフォルダにする
os.makedirs(save_dir, exist_ok=True)  # ディレクトリがない場合は作成

# ✅ `model.history.history` から DataFrame を作成
losses = pd.DataFrame(model.history.history)

sns.set_style("whitegrid")

# ✅ Loss のプロット
losses[["loss", "val_loss"]].plot(style=".-")
plt.title("Loss")
plt.savefig(f"{save_dir}/model_training_losses.png", bbox_inches="tight", dpi=150)
plt.show()

print("\n")

# ✅ Accuracy のプロット
losses[["accuracy", "val_accuracy"]].plot(style=".-")
plt.title("Accuracy")
plt.savefig(f"{save_dir}/model_training_acc.png", bbox_inches="tight", dpi=150)
plt.show()

### Model Evaluation

In [None]:
from keras.models import load_model

model = load_model("outputs/v1.0/mildew_detector_model.keras")

### Evaluate Model on Test Set

In [None]:
evaluation = model.evaluate(test_set)

In [None]:
joblib.dump(value=evaluation, filename=f"outputs/v1.0/evaluation.pkl")

### Confusion Matrix & Classification Report

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report

# ✅ 適切なディレクトリに変更
save_dir = "outputs/v1.0"  # `file_path` を `save_dir` に修正
os.makedirs(save_dir, exist_ok=True)  # ディレクトリがない場合は作成

# ✅ 予測を取得
y_pred = model.predict(test_set)

# ✅ バイナリ分類か多クラス分類かを判定し、適切な処理を行う
if y_pred.shape[1] == 1:  # バイナリ分類の場合
    y_pred = (y_pred > 0.5).astype(int).flatten()
else:  # 多クラス分類の場合
    y_pred = np.argmax(y_pred, axis=1)

# ✅ 正解ラベルを取得
y_true = test_set.classes

# ✅ Classification Report の表示
print("Classification Report:\n")
print(classification_report(y_true, y_pred, target_names=["Healthy", "Infected"]))

# ✅ Confusion Matrix の計算
cm = confusion_matrix(y_true, y_pred)

# ✅ Confusion Matrix の可視化
plt.figure(figsize=(8, 6))
sns.heatmap(
    cm,
    annot=True,
    fmt="d",
    cmap="Blues",
    xticklabels=["Healthy", "Infected"],
    yticklabels=["Healthy", "Infected"],
)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")

# ✅ Confusion Matrix の保存 (修正後)
plt.savefig(f"{save_dir}/confusion_matrix.png", bbox_inches="tight", dpi=150)
plt.show()

モデルの構築　CNN-MobileNetV2

In [None]:
import os
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

# ✅ 新しいバージョンのフォルダ (`outputs/v1.1/`) を作成
version = "v1.1"  # ここでバージョンを変更
output_path = f"outputs/{version}"
os.makedirs(output_path, exist_ok=True)  # フォルダが存在しない場合は作成

# ✅ MobileNetV2 のベースモデルをロード (事前学習済みの重みを使用)
base_model = MobileNetV2(
    weights="imagenet", include_top=False, input_shape=(128, 128, 3)
)

# ✅ 最初の層は固定し、一部の層は学習可能にする
for layer in base_model.layers[:-20]:  # 最初の層は固定
    layer.trainable = False
for layer in base_model.layers[-20:]:  # 後半の層を学習可能にする
    layer.trainable = True

# ✅ 新しい出力層を追加
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation="relu")(x)
x = Dense(64, activation="relu")(x)
output_layer = Dense(1, activation="sigmoid")(x)  # 2クラス分類

# ✅ モデルの構築
mobilenet_model = Model(inputs=base_model.input, outputs=output_layer)

# ✅ モデルのコンパイル
mobilenet_model.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss="binary_crossentropy",
    metrics=["accuracy"],
)

# ✅ モデルのサマリーを表示
mobilenet_model.summary()

print("✅ MobileNetV2 モデルを新しい学習率で再コンパイルしました！")

# ✅ EarlyStopping の設定
early_stop = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)

# ✅ モデルの学習
history_mobilenet = mobilenet_model.fit(
    train_set,
    validation_data=validation_set,
    epochs=25,
    steps_per_epoch=len(train_set)
    // train_set.batch_size,  # ✅ 修正: ステップ数を適切に計算
    callbacks=[early_stop],
    verbose=1,
)

In [None]:
output_path_mobilenet = "outputs/v1.1"
os.makedirs(output_path_mobilenet, exist_ok=True)  # ✅ `outputs/v1.1/` を作成

mobilenet_model.save(f"{output_path_mobilenet}/mildew_mobilenet_model.keras")
print(
    f"✅ MobileNetV2 モデルのトレーニングと保存が完了しました！\nモデルの保存先: {output_path_mobilenet}/mildew_mobilenet_model.keras"
)

### モデルの評価（Accuracy & Loss）

In [None]:
from tensorflow.keras.models import load_model

# ✅ モデルをロード
cnn_model = load_model("outputs/v1.0/mildew_detector_model.keras")
mobilenet_model = load_model("outputs/v1.1/mildew_mobilenet_model.keras")

# ✅ モデルの評価
cnn_eval = cnn_model.evaluate(test_set, verbose=1)
mobilenet_eval = mobilenet_model.evaluate(test_set, verbose=1)

# ✅ 結果を表示
print(f"✅ CNN Model (v1.0) - Loss: {cnn_eval[0]:.4f}, Accuracy: {cnn_eval[1]:.4f}")
print(
    f"✅ MobileNetV2 Model (v1.1) - Loss: {mobilenet_eval[0]:.4f}, Accuracy: {mobilenet_eval[1]:.4f}"
)

### Confusion Matrix & Classification Report

In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix

# ✅ 予測を取得
cnn_preds = (cnn_model.predict(test_set) > 0.5).astype(int)
mobilenet_preds = (mobilenet_model.predict(test_set) > 0.5).astype(int)

# ✅ 正解ラベルを取得
y_true = test_set.classes

# ✅ Confusion Matrix の計算
cnn_cm = confusion_matrix(y_true, cnn_preds)
mobilenet_cm = confusion_matrix(y_true, mobilenet_preds)

# ✅ Confusion Matrix の可視化 (CNN)
plt.figure(figsize=(6, 5))
sns.heatmap(
    cnn_cm,
    annot=True,
    fmt="d",
    cmap="Blues",
    xticklabels=["Healthy", "Infected"],
    yticklabels=["Healthy", "Infected"],
)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix - CNN (v1.0)")
plt.show()

# ✅ Confusion Matrix の可視化 (MobileNetV2)
plt.figure(figsize=(6, 5))
sns.heatmap(
    mobilenet_cm,
    annot=True,
    fmt="d",
    cmap="Blues",
    xticklabels=["Healthy", "Infected"],
    yticklabels=["Healthy", "Infected"],
)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix - MobileNetV2 (v1.1)")
plt.show()

# ✅ Classification Report の表示
print("Classification Report - CNN Model:\n")
print(classification_report(y_true, cnn_preds, target_names=["Healthy", "Infected"]))

print("\nClassification Report - MobileNetV2 Model:\n")
print(
    classification_report(y_true, mobilenet_preds, target_names=["Healthy", "Infected"])
)

### モデルの可視化（Loss & Accuracyの推移）