In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
from sklearn.metrics import classification_report
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import PNASNetLarge, InceptionResNetV2, DenseNet201, InceptionV3
from tensorflow.keras.layers import Concatenate, Input, GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

In [None]:
# Load your dataset into a DataFrame (assuming you already have it loaded)
df = pd.read_csv('../data-collection/dataset/skin_disease_dataset.csv')

In [None]:
# Check class imbalance
class_counts = df['disease_name'].value_counts()
total_instances = len(df)
class_proportions = class_counts / total_instances

In [None]:
# Plot class distribution
plt.figure(figsize=(10, 6))
class_counts.plot(kind='bar', color='skyblue')
plt.title('Class Distribution of Skin Diseases')
plt.xlabel('Disease Name')
plt.ylabel('Number of Instances')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Balance the classes
majority_class = class_counts.idxmax()
resampled_dfs = []
for disease_name, count in class_counts.items():
    if disease_name != majority_class:
        df_minority = df[df['disease_name'] == disease_name]
        df_resampled = resample(df_minority, replace=True, n_samples=class_counts[majority_class], random_state=42)
        resampled_dfs.append(df_resampled)
df_balanced = pd.concat([df] + resampled_dfs)

In [None]:
# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    rescale=1./255
)

In [None]:
# Create training and testing datasets
X = df_balanced['image_path']
y = df_balanced['disease_name']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Define input shape
ip_shape = (224, 224, 3)
inputs = Input(shape=ip_shape)

In [None]:
# Load pre-trained models
base_model1 = InceptionResNetV2(weights='imagenet', include_top=False, input_shape=ip_shape)
base_model2 = DenseNet201(weights='imagenet', include_top=False, input_shape=ip_shape)
base_model3 = InceptionV3(weights='imagenet', include_top=False, input_shape=ip_shape)

In [None]:
# Freeze base models
base_model1.trainable = False
base_model2.trainable = False
base_model3.trainable = False

In [None]:
# Get output tensors from base models
x1 = base_model1(inputs)
x2 = base_model2(inputs)
x3 = base_model3(inputs)

In [None]:
# Global average pooling for each base model output
x1 = GlobalAveragePooling2D()(x1)
x2 = GlobalAveragePooling2D()(x2)
x3 = GlobalAveragePooling2D()(x3)

In [None]:
# Concatenate the outputs
x = Concatenate()([x1, x2, x3])

In [None]:
# Final classification layers
outputs = Dense(len(set(y_train)), activation='softmax')(x)

In [None]:
# Create the ensemble model
model = Model(inputs=inputs, outputs=outputs)

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Print model summary
model.summary()

In [None]:
# Train the model
history = model.fit(datagen.flow_from_dataframe(dataframe=df_balanced, x_col='image_path', y_col='disease_name',
                                                target_size=(224, 224), batch_size=32, class_mode='categorical'),
                    steps_per_epoch=len(X_train) // 32, epochs=20)

In [None]:
# Evaluate the model
y_pred = model.predict(datagen.flow_from_dataframe(dataframe=df_balanced, x_col='image_path', y_col='disease_name',
                                                    target_size=(224, 224), batch_size=32, class_mode='categorical'))
y_pred_classes = np.argmax(y_pred, axis=1)
print(classification_report(y_test, y_pred_classes))

In [None]:
# Save the model
model.save('skin_disease_detection_model.h5')