<a href="https://colab.research.google.com/github/kranthi1525/deeplearning/blob/main/HAM10000.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


In [None]:
!kaggle datasets download -d kmader/skin-cancer-mnist-ham10000

Dataset URL: https://www.kaggle.com/datasets/kmader/skin-cancer-mnist-ham10000
License(s): CC-BY-NC-SA-4.0
Downloading skin-cancer-mnist-ham10000.zip to /content
100% 5.19G/5.20G [03:18<00:00, 19.3MB/s]
100% 5.20G/5.20G [03:20<00:00, 27.8MB/s]


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import zipfile
zip_ref=zipfile.ZipFile('/content/skin-cancer-mnist-ham10000.zip','r')
zip_ref.extractall('/content')
zip_ref.close()

In [None]:
!mkdir HAM10000_images_all
!cp HAM10000_images_part_1/* HAM10000_images_all/
!cp HAM10000_images_part_2/* HAM10000_images_all/


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.applications import ResNet50
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.layers import GlobalAveragePooling2D,Dense


In [None]:
df = pd.read_csv('/content/HAM10000_metadata.csv')

df['image_id'] = df['image_id']+".jpg"

# df['image_id']

image_dir = "HAM10000_images_all"

df['path']=image_dir + "/"+df['image_id']



In [None]:

labelled_data={label: idx for idx, label in enumerate(df['dx'].unique())}
# labelled_data
df['label']=df['dx'].map(labelled_data)
# # df['label']

train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)


In [None]:
train_df['label'] = train_df['label'].astype(str)
val_df['label'] = val_df['label'].astype(str)


In [None]:
img_size = 224

train_datagen = ImageDataGenerator(rescale=1./255,
                                   horizontal_flip=True,
                                   zoom_range=0.2,
                                   rotation_range=10)

val_datagen = ImageDataGenerator(rescale=1./255)

train_gen = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='path',
    y_col='label',
    target_size=(img_size, img_size),
    batch_size=32,
    class_mode='sparse'  # <<-- important: for integer labels
)

val_gen = val_datagen.flow_from_dataframe(
    dataframe=val_df,
    x_col='path',
    y_col='label',
    target_size=(img_size, img_size),
    batch_size=32,
    class_mode='sparse'  # <<-- important: for integer labels
)


Found 8012 validated image filenames belonging to 7 classes.
Found 2003 validated image filenames belonging to 7 classes.


In [None]:
base_model=ResNet50(weights='imagenet',include_top=False,input_shape=(224,224,3))
base_model.trainable=False

In [None]:
model = Sequential()
model.add(base_model)
model.add(GlobalAveragePooling2D())
model.add(Dense(128,activation='relu'))
model.add(Dense(7,activation='softmax'))

In [None]:
model.compile(
    optimizer='Adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [None]:
history=model.fit(train_gen,validation_data=val_gen,epochs=10)

Epoch 1/10
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m198s[0m 730ms/step - accuracy: 0.6396 - loss: 1.2239 - val_accuracy: 0.6695 - val_loss: 1.1355
Epoch 2/10
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m172s[0m 688ms/step - accuracy: 0.6730 - loss: 1.1238 - val_accuracy: 0.6705 - val_loss: 1.1083
Epoch 3/10
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m162s[0m 647ms/step - accuracy: 0.6659 - loss: 1.1324 - val_accuracy: 0.6700 - val_loss: 1.1032
Epoch 4/10
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m160s[0m 638ms/step - accuracy: 0.6683 - loss: 1.1067 - val_accuracy: 0.6725 - val_loss: 1.0891
Epoch 5/10
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m161s[0m 641ms/step - accuracy: 0.6780 - loss: 1.0865 - val_accuracy: 0.6715 - val_loss: 1.1005
Epoch 6/10
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m157s[0m 625ms/step - accuracy: 0.6687 - loss: 1.0861 - val_accuracy: 0.6725 - val_loss: 1.0663
Epoc