In [1]:
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50, VGG16, InceptionV3
from tensorflow.keras.layers import Input, Dense, Dropout, concatenate, GlobalAveragePooling2D, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize
import matplotlib.pyplot as plt

In [2]:

# Enhanced Data Preparation
train_dir = "../../data5/train"
validation_dir = "../../data5/test"

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    zoom_range=0.3,
    horizontal_flip=True,
    brightness_range=[0.8, 1.2],
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2, 
    fill_mode="nearest"
)
validation_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir, target_size=(224, 224), batch_size=32, class_mode='categorical')
validation_generator = validation_datagen.flow_from_directory(
    validation_dir, target_size=(224, 224), batch_size=32, class_mode='categorical')

num_classes = train_generator.num_classes
    

Found 5999 images belonging to 4 classes.
Found 2576 images belonging to 4 classes.


In [3]:

def build_improved_hybrid_model(num_classes, input_shape=(224, 224, 3)):
    resnet_base = ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)
    vgg_base = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)
    inception_base = InceptionV3(weights='imagenet', include_top=False, input_shape=input_shape)

    for base_model in [resnet_base, vgg_base, inception_base]:
        base_model.trainable = False

    input_tensor = Input(shape=input_shape)
    resnet_output = GlobalAveragePooling2D()(resnet_base(input_tensor))
    vgg_output = GlobalAveragePooling2D()(vgg_base(input_tensor))
    inception_output = GlobalAveragePooling2D()(inception_base(input_tensor))

    # Add normalization before concatenation
    resnet_output = BatchNormalization()(resnet_output)
    vgg_output = BatchNormalization()(vgg_output)
    inception_output = BatchNormalization()(inception_output)

    merged = concatenate([resnet_output, vgg_output, inception_output])
    x = Dense(512, activation='relu')(merged)
    x = Dropout(0.5)(x)
    x = Dense(256, activation='relu')(x)  # Additional trainable layer
    output_tensor = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=input_tensor, outputs=output_tensor)
    return model

model = build_improved_hybrid_model(num_classes)
model.compile(optimizer=Adam(learning_rate=1e-3), loss='categorical_crossentropy', metrics=['accuracy', 'AUC'])
    

In [4]:
# Model Training
def train_model(model, train_generator, validation_generator, learning_rate=1e-3, epochs=10):
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='categorical_crossentropy', metrics=['accuracy', 'AUC'])

    early_stopping = EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)
    lr_scheduler = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=3, min_lr=1e-6)

    history = model.fit(
        train_generator,
        validation_data=validation_generator,
        epochs=epochs,
        callbacks=[early_stopping, lr_scheduler]
    )
    return history

 # Build and train model
model = build_improved_hybrid_model(num_classes)
train_history = train_model(model, train_generator, validation_generator, learning_rate=1e-3, epochs=10)


Epoch 1/10


  self._warn_if_super_not_called()


[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1510s[0m 8s/step - AUC: 0.8171 - accuracy: 0.5769 - loss: 1.1652 - val_AUC: 0.8985 - val_accuracy: 0.6883 - val_loss: 0.8060 - learning_rate: 0.0010
Epoch 2/10
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1283s[0m 7s/step - AUC: 0.8966 - accuracy: 0.6816 - loss: 0.7819 - val_AUC: 0.9094 - val_accuracy: 0.6976 - val_loss: 0.7484 - learning_rate: 0.0010
Epoch 3/10
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1296s[0m 7s/step - AUC: 0.9072 - accuracy: 0.6976 - loss: 0.7381 - val_AUC: 0.9166 - val_accuracy: 0.7038 - val_loss: 0.6971 - learning_rate: 0.0010
Epoch 4/10
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1305s[0m 7s/step - AUC: 0.9153 - accuracy: 0.7130 - loss: 0.7011 - val_AUC: 0.9254 - val_accuracy: 0.7372 - val_loss: 0.6770 - learning_rate: 0.0010
Epoch 5/10
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1296s[0m 7s/step - AUC: 0.9179 - accuracy: 0.7110 - loss: 0.

In [5]:
from sklearn.metrics import classification_report, confusion_matrix

# Classification report and confusion matrix
val_preds = np.argmax(model.predict(validation_generator), axis=1)
true_labels = validation_generator.classes
print(classification_report(true_labels, val_preds, target_names=validation_generator.class_indices.keys()))
print(confusion_matrix(true_labels, val_preds))
# Classification report and confusion matrix
val_preds = np.argmax(model.predict(train_generator), axis=1)
true_labels = train_generator.classes
print(classification_report(true_labels, val_preds, target_names=train_generator.class_indices.keys()))
print(confusion_matrix(true_labels, val_preds))

[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m371s[0m 5s/step
                 precision    recall  f1-score   support

1. Enfeksiyonel       0.24      0.27      0.25       650
      2. Ekzama       0.26      0.30      0.28       650
        3. Akne       0.24      0.21      0.22       626
      4. Malign       0.28      0.24      0.26       650

       accuracy                           0.25      2576
      macro avg       0.25      0.25      0.25      2576
   weighted avg       0.25      0.25      0.25      2576

[[175 186 147 142]
 [188 194 146 122]
 [186 175 132 133]
 [180 183 134 153]]
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m840s[0m 4s/step
                 precision    recall  f1-score   support

1. Enfeksiyonel       0.24      0.23      0.23      1500
      2. Ekzama       0.24      0.25      0.24      1500
        3. Akne       0.26      0.28      0.27      1499
      4. Malign       0.27      0.25      0.26      1500

       accuracy            

In [None]:
import matplotlib.pyplot as plt

def plot_training_results(history, metrics=["accuracy", "loss"]):
    """
    Plots training and validation metrics for each metric in the list.

    Parameters:
    - history: The history object returned by model.fit().
    - metrics: A list of metric names to plot (e.g., ["accuracy", "loss"]).
    """
    for metric in metrics:
        plt.figure()
        plt.plot(history.history[metric], label=f"Training {metric}")
        plt.plot(history.history[f"val_{metric}"], label=f"Validation {metric}")
        plt.title(f"Training vs Validation {metric.capitalize()}")
        plt.xlabel("Epochs")
        plt.ylabel(metric.capitalize())
        plt.legend()
        plt.show()

# Plot accuracy and loss for the initial training
plot_training_results(train_history, metrics=["accuracy", "loss"])


In [None]:

# Fine-Tuning the Model
def fine_tune_model(model, train_generator, validation_generator, base_models, fine_tune_layers, learning_rate=1e-5, epochs=10):
    for base_model, layers_to_unfreeze in zip(base_models, fine_tune_layers):
        for layer in base_model.layers[-layers_to_unfreeze:]:
            layer.trainable = True

    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='categorical_crossentropy', metrics=['accuracy', 'AUC'])

    early_stopping = EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)
    lr_scheduler = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=3, min_lr=1e-6)

    fine_tune_history = model.fit(
        train_generator,
        validation_data=validation_generator,
        epochs=epochs,
        callbacks=[early_stopping, lr_scheduler]
    )
    return fine_tune_history

resnet_base = model.get_layer("resnet50")
vgg_base = model.get_layer("vgg16")
inception_base = model.get_layer("inception_v3")
fine_tune_layers = [20, 10, 15]  # Increased unfreezing

fine_tune_history = fine_tune_model(
    model,
    train_generator,
    validation_generator,
    base_models=[resnet_base, vgg_base, inception_base],
    fine_tune_layers=fine_tune_layers,
    learning_rate=1e-5,
    epochs=20
)
    

In [None]:
import matplotlib.pyplot as plt

def plot_training_results(history, metrics=["accuracy", "loss"]):
    """
    Plots training and validation metrics for each metric in the list.

    Parameters:
    - history: The history object returned by model.fit().
    - metrics: A list of metric names to plot (e.g., ["accuracy", "loss"]).
    """
    for metric in metrics:
        plt.figure()
        plt.plot(history.history[metric], label=f"Training {metric}")
        plt.plot(history.history[f"val_{metric}"], label=f"Validation {metric}")
        plt.title(f"Training vs Validation {metric.capitalize()}")
        plt.xlabel("Epochs")
        plt.ylabel(metric.capitalize())
        plt.legend()
        plt.show()

# Plot accuracy and loss for the initial training
plot_training_results(fine_tune_history, metrics=["accuracy", "loss"])


In [7]:
model.save("../../models/improved_hybrid_model4060.keras")

In [None]:

# Evaluate the Improved Model
results = model.evaluate(validation_generator)
print(f"Validation Loss: {results[0]:.2f}")
print(f"Validation Accuracy: {results[1]:.2f}")

    

In [None]:
import joblib

# Improved Feature Extraction for KNN and RF
# feature_extractor = Model(inputs=model.input, outputs=model.layers[-3].output)
feature_extractor = Model(inputs=model.input, outputs=model.layers[-2].output)

def extract_features(model, generator):
    features = model.predict(generator, verbose=1)
    labels = generator.classes
    return features, labels

train_features, train_labels = extract_features(feature_extractor, train_generator)
val_features, val_labels = extract_features(feature_extractor, validation_generator)

pca = PCA(n_components=128)  # Dimensionality reduction
train_features_pca = pca.fit_transform(train_features)
val_features_pca = pca.transform(val_features)

pca_model_path = "../../models/pca_model4060.pkl"
joblib.dump(pca, pca_model_path)

print(f"PCA model saved at {pca_model_path}")

from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# KNN
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(train_features_pca, train_labels)
knn_accuracy = accuracy_score(val_labels, knn.predict(val_features_pca))
print(f"KNN Validation Accuracy: {knn_accuracy:.2f}")

# Random Forest
rf = RandomForestClassifier(n_estimators=200, random_state=42)
rf.fit(train_features_pca, train_labels)
rf_accuracy = accuracy_score(val_labels, rf.predict(val_features_pca))
print(f"Random Forest Validation Accuracy: {rf_accuracy:.2f}")
    

In [None]:

# Save the KNN model
joblib.dump(knn, '../../models/improved_knn_model4060.pkl')

# Save the Random Forest model
joblib.dump(rf, '../../models/improved_random_forest_model6040.pkl')

print("Models saved successfully!")

In [None]:
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize
import matplotlib.pyplot as plt
import numpy as np

# Plotting ROC Curve
def plot_roc_curve(true_labels, predicted_probabilities, class_names):
    if len(true_labels.shape) == 1:
        true_labels = label_binarize(true_labels, classes=range(len(class_names)))

    n_classes = len(class_names)
    fpr = {}
    tpr = {}
    roc_auc = {}

    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(true_labels[:, i], predicted_probabilities[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    fpr["micro"], tpr["micro"], _ = roc_curve(true_labels.ravel(), predicted_probabilities.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

    plt.figure(figsize=(10, 8))
    for i in range(n_classes):
        plt.plot(fpr[i], tpr[i], label=f"Class {class_names[i]} (AUC = {roc_auc[i]:.2f})")
    plt.plot(fpr["micro"], tpr["micro"], linestyle='--', label=f"Micro-average (AUC = {roc_auc['micro']:.2f})")
    plt.plot([0, 1], [0, 1], color="gray", linestyle="--")

    plt.title("Receiver Operating Characteristic (ROC) Curve")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.legend(loc="lower right")
    plt.grid()
    plt.show()

# Get Validation Results
def get_validation_results(model, validation_generator):
    validation_steps = validation_generator.samples // validation_generator.batch_size
    true_labels = []
    predicted_probabilities = []

    for i in range(validation_steps):
        x_batch, y_batch = next(validation_generator)
        true_labels.extend(np.argmax(y_batch, axis=1))
        predicted_probabilities.extend(model.predict(x_batch))

    true_labels = np.array(true_labels)
    predicted_probabilities = np.array(predicted_probabilities)
    return true_labels, predicted_probabilities

# Evaluate and plot ROC Curve
class_names = list(validation_generator.class_indices.keys())
true_labels, predicted_probabilities = get_validation_results(model, validation_generator)
plot_roc_curve(true_labels, predicted_probabilities, class_names)
