In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, BatchNormalization, Dropout
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import numpy as np
import os

In [2]:
# Load your dataset into a DataFrame
base_dir = '../data-collection/dataset/data'

# Load the disease classification DataFrame
classification_df = pd.read_csv('../data-collection/dataset/disease_classification.csv')
classification_df.columns = ['disease_name', 'effect']

In [3]:
# Prepare dataset function
def prepare_dataset(base_dir):
    records = []
    for disease_name in os.listdir(base_dir):
        disease_dir = os.path.join(base_dir, disease_name)
        if os.path.isdir(disease_dir):
            for body_part in os.listdir(disease_dir):
                body_part_dir = os.path.join(disease_dir, body_part)
                if os.path.isdir(body_part_dir):
                    for image_name in os.listdir(body_part_dir):
                        image_path = os.path.join(body_part_dir, image_name)
                        if os.path.isfile(image_path):
                            records.append([disease_name, body_part, image_name, image_path])
    df = pd.DataFrame(records, columns=['disease_name', 'body_part', 'image_name', 'image_path'])
    return df

# Load the dataset
df = prepare_dataset(base_dir)

In [4]:
# Merge the datasets to add the 'effect' column
df = df.merge(classification_df, on='disease_name', how='left')

In [5]:
# Add 'disease_present' column
df['disease_present'] = df['disease_name'] != 'normal_skin'

In [6]:
# Encode labels
le_disease = LabelEncoder()
df['disease_label'] = le_disease.fit_transform(df['disease_name'])

le_effect = LabelEncoder()
df['effect_label'] = le_effect.fit_transform(df['effect'])

In [7]:
# Split data
train_df = df.sample(frac=0.8, random_state=42)
val_df = df.drop(train_df.index)

In [8]:
# Image normalization and augmentation
def preprocess_input(img):
    mean = np.array([123.68, 116.779, 103.939])  # Mean RGB values for ImageNet
    return (img - mean) / 255.0

In [9]:
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    brightness_range=[0.8, 1.2],  # Brightness adjustment
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

In [10]:
# Prepare data generators
train_generator = train_datagen.flow_from_dataframe(
    train_df, x_col='image_path', y_col='disease_label',
    target_size=(224, 224), batch_size=32, class_mode='raw'
)

val_generator = val_datagen.flow_from_dataframe(
    val_df, x_col='image_path', y_col='disease_label',
    target_size=(224, 224), batch_size=32, class_mode='raw'
)

Found 3137 validated image filenames.
Found 784 validated image filenames.


In [11]:
# Create a simple CNN model for demonstration
model = Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.BatchNormalization(),
    
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.BatchNormalization(),
    
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.BatchNormalization(),
    
    tf.keras.layers.GlobalAveragePooling2D(),
    
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    
    tf.keras.layers.Dense(len(le_disease.classes_), activation='softmax')
])

In [12]:
# Compile the model
model.compile(optimizer=Adam(lr=1e-4), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

  super().__init__(name, **kwargs)


In [13]:
# Define a ModelCheckpoint callback to save the best model during training
checkpoint = ModelCheckpoint(filepath='best_model.h5', monitor='val_accuracy', save_best_only=True, mode='max', verbose=1)

In [14]:
# Train the model
history = model.fit(train_generator, validation_data=val_generator, epochs=10, batch_size=32, callbacks=[checkpoint])

Epoch 1/10
Epoch 1: val_accuracy improved from -inf to 0.02551, saving model to best_model.h5
Epoch 2/10
Epoch 2: val_accuracy improved from 0.02551 to 0.03571, saving model to best_model.h5
Epoch 3/10
Epoch 3: val_accuracy improved from 0.03571 to 0.04209, saving model to best_model.h5
Epoch 4/10
Epoch 4: val_accuracy improved from 0.04209 to 0.05995, saving model to best_model.h5
Epoch 5/10
Epoch 5: val_accuracy improved from 0.05995 to 0.07398, saving model to best_model.h5
Epoch 6/10
Epoch 6: val_accuracy improved from 0.07398 to 0.08163, saving model to best_model.h5
Epoch 7/10
Epoch 7: val_accuracy improved from 0.08163 to 0.08929, saving model to best_model.h5
Epoch 8/10
Epoch 8: val_accuracy did not improve from 0.08929
Epoch 9/10
Epoch 9: val_accuracy improved from 0.08929 to 0.09566, saving model to best_model.h5
Epoch 10/10
Epoch 10: val_accuracy did not improve from 0.09566


In [15]:
# Evaluate the model
y_pred = model.predict(val_generator)
y_pred_classes = np.argmax(y_pred, axis=1)



In [16]:
# Calculate metrics
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

y_true = val_df['disease_label'].values

print("Accuracy:", accuracy_score(y_true, y_pred_classes))
print("Precision:", precision_score(y_true, y_pred_classes, average='weighted'))
print("Recall:", recall_score(y_true, y_pred_classes, average='weighted'))
print("F1 Score:", f1_score(y_true, y_pred_classes, average='weighted'))
print("Classification Report:\n", classification_report(y_true, y_pred_classes))

Accuracy: 0.021683673469387755
Precision: 0.010656838499653902
Recall: 0.021683673469387755
F1 Score: 0.0113566972000564
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         4
           1       0.00      0.00      0.00         2
           2       0.00      0.00      0.00         7
           3       0.00      0.00      0.00         6
           4       0.00      0.00      0.00         8
           5       0.00      0.00      0.00        14
           6       0.00      0.00      0.00         9
           7       0.00      0.00      0.00         1
           8       0.00      0.00      0.00         3
          10       0.00      0.00      0.00         4
          12       0.00      0.00      0.00        14
          13       0.00      0.00      0.00         3
          14       0.00      0.00      0.00         1
          15       0.25      0.08      0.12        12
          17       0.00      0.00      0.00 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
