In [4]:
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Flatten
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
import pickle

# Step 1: Data Preprocessing and Augmentation
image_size = (224, 224)
batch_size = 32
train_dir = '../../data/train'
test_dir = '../../data/test'

# 2. Data Generators
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)
test_datagen  = ImageDataGenerator(preprocessing_function=preprocess_input)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=True
)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

num_classes = train_generator.num_classes
class_labels = list(train_generator.class_indices.keys())
print("Classes:", class_labels)

# Step 2: Load Pre-Trained CNN (VGG16) and Fine-Tune
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(*image_size, 3))
base_model.trainable = False  # Freeze base model layers

model = tf.keras.Sequential([
    base_model,
    Flatten(),
])

# Step 3: Extract Features
features = []
labels = []

for batch_images, batch_labels in test_generator:
    features.append(model.predict(batch_images))
    labels.append(batch_labels)
    if len(features) * batch_size >= test_generator.samples:
        break

features = np.vstack(features)
labels = np.vstack(labels)

# Step 4: Train Random Forest Classifier
labels = np.argmax(labels, axis=1)  # Convert one-hot to integer labels
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=42)

rf = RandomForestClassifier()
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

grid_search = GridSearchCV(rf, param_grid, cv=3, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Step 5: Evaluate Model
best_rf = grid_search.best_estimator_
y_pred = best_rf.predict(X_test)

print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Save Model
with open('cnn_rf_model_large.pkl', 'wb') as file:
    pickle.dump(best_rf, file)

print("Model saved successfully.")


Found 19001 images belonging to 4 classes.
Found 2431 images belonging to 4 classes.
Classes: ['1. Enfeksiyonel', '2. Ekzama', '3. Akne', '4. Malign']
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step


KeyboardInterrupt: 