### downloading the dataset of images 
#### Source: Kaggle

In [None]:
import pandas as pd
df = pd.read_csv("../data/HAM10000_metadata.csv")
print(df.head())

In [None]:
import cv2
import numpy as np
import os

def load_images(image_dir, df, size=(64,64)):
    images = []
    labels = []
    for i, row in df.iterrows():
        img_path = os.path.join(image_dir, row['image_id'] + ".jpg")
        img = cv2.imread(img_path)
        if img is not None:
            img = cv2.resize(img, size)
            images.append(img / 255.0)  # normalize
            labels.append(row['dx'])
    return np.array(images), labels


In [None]:
images, labels = load_images("../data/HAM10000_images_part_1", df, size=(64,64))
print(f"Loaded {len(images)} images and {len(labels)} labels.")


In [None]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels) 
labels_categorical = to_categorical(labels_encoded)  

### Model TRaining

In [None]:

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from sklearn.model_selection import train_test_split

# Split data into train and test
X_train, X_test, y_train, y_test = train_test_split(images, labels_categorical, test_size=0.2, random_state=42)

model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(64, 64, 3)),
    MaxPooling2D(2,2),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(y_train.shape[1], activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Model training
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Save the model in the recommended Keras format
model.save('../model/model.keras')


### This is the outupt block


#### Training Accuracy
The following plot shows the training and validation accuracy over epochs.

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(8,5))
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

#### Confusion Matrix
The confusion matrix below shows the performance of the model on the test set.

In [None]:
import numpy as np

y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

from sklearn.metrics import confusion_matrix
import seaborn as sns

cm = confusion_matrix(y_true, y_pred_classes)
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

In [None]:
pip install seaborn

#### Predictions
Below are sample predictions from the test set, showing the true and predicted labels.

In [None]:
# Display a few test images with their predicted and true labels
for i in range(5):
    plt.imshow(X_test[i])
    plt.title(f"True: {label_encoder.classes_[y_true[i]]}, Pred: {label_encoder.classes_[y_pred_classes[i]]}")
    plt.axis('off')
    plt.show()

# Example output image (replace with your own if needed)
from PIL import Image
import io
import base64
from IPython.display import display

# Provided example image (replace with actual file path if available)
img_path = 'example_prediction.png'  # Save the provided image as this file in the notebook directory
try:
    img = Image.open(img_path)
    display(img)
except FileNotFoundError:
    print('Example prediction image not found. Please add it as example_prediction.png.')

In [None]:

import numpy as np
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt

# Predict class probabilities and get class indices
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)


print(classification_report(y_true, y_pred_classes, target_names=label_encoder.classes_))

# Display a few test images with their predicted and true labels
for i in range(5):
    plt.imshow(X_test[i])
    plt.title(f"True: {label_encoder.classes_[y_true[i]]}, Pred: {label_encoder.classes_[y_pred_classes[i]]}")
    plt.axis('off')
    plt.show()
