<a href="https://colab.research.google.com/github/koyarekoikoi/tkpj/blob/main/skin.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import os
import numpy as np
import pandas as pd
import cv2
import tensorflow as tf
from tensorflow.keras.applications import VGG16, ResNet50
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Flatten, Dropout, Conv2D, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from tqdm import tqdm

In [None]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# Define paths
metadata_path = "/content/drive/MyDrive/SKIN/HAM10000_metadata.csv"
images_path_1 = "/content/drive/MyDrive/SKIN/HAM10000_images_part_1"
images_path_2 = "/content/drive/MyDrive/SKIN/HAM10000_images_part_2"

In [5]:
# Load metadata
df = pd.read_csv(metadata_path)

In [6]:
# Image processing function
def load_images(image_ids, image_folder, target_size=(128, 128)):
    images = []
    for img_id in tqdm(image_ids):
        img_path = os.path.join(image_folder, img_id + ".jpg")
        if os.path.exists(img_path):
            img = cv2.imread(img_path)
            img = cv2.resize(img, target_size)
            img = img / 255.0  # Normalize
            images.append(img)
    return np.array(images)

In [7]:
# Load images from both folders
X_images = load_images(df['image_id'], images_path_1)
X_images2 = load_images(df['image_id'], images_path_2)
X = np.concatenate([X_images, X_images2])

100%|██████████| 10015/10015 [29:18<00:00,  5.69it/s]
100%|██████████| 10015/10015 [32:47<00:00,  5.09it/s]


In [8]:
# Remaining Imports to load model, can be done on init import
import pickle
from tensorflow.keras.models import load_model


In [66]:
# Encode labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df['dx'])

In [67]:
# Train-test split
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [69]:
from tensorflow.keras.utils import to_categorical

# One-hot encode y_train and y_test
y_train = to_categorical(y_train, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)

# Check the new shapes
print("One-hot encoded y_train shape:", y_train.shape)
print("One-hot encoded y_test shape:", y_test.shape)



One-hot encoded y_train shape: (8012, 10)
One-hot encoded y_test shape: (2003, 10)


In [70]:
def build_cnn_model():
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)))  # Example input shape
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(10, activation='softmax'))  # Output layer for 10 classes
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model


In [76]:
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # Detect TPU
tf.config.experimental_connect_to_cluster(tpu)  # Connect to TPU
tf.tpu.experimental.initialize_tpu_system(tpu)  # Initialize the TPU


<tensorflow.python.tpu.topology.Topology at 0x7a6fe29be150>

In [89]:
# Create TPU strategy
strategy = tf.distribute.TPUStrategy(tpu)

# Define and compile your model inside the TPU strategy scope
with strategy.scope():
    cnn_model = build_cnn_model()
    cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Now train the model
#cnn_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))
# Train the model
cnn_model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model on test data
test_loss, test_accuracy = cnn_model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

Epoch 1/100
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 70ms/step - accuracy: 0.6431 - loss: 1.1994 - val_accuracy: 0.6614 - val_loss: 1.0988
Epoch 2/100
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 35ms/step - accuracy: 0.6832 - loss: 1.0493 - val_accuracy: 0.6614 - val_loss: 1.0562
Epoch 3/100
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 35ms/step - accuracy: 0.6826 - loss: 1.0299 - val_accuracy: 0.6614 - val_loss: 1.0841
Epoch 4/100
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 36ms/step - accuracy: 0.6781 - loss: 0.9745 - val_accuracy: 0.6614 - val_loss: 1.0434
Epoch 5/100
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 36ms/step - accuracy: 0.6799 - loss: 0.9483 - val_accuracy: 0.6614 - val_loss: 1.0615
Epoch 6/100
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 36ms/step - accuracy: 0.7076 - loss: 0.9007 - val_accuracy: 0.6614 - val_loss: 1.0301
Epoch 7/100
[

In [90]:
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

cnn_model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])


Epoch 1/100
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 39ms/step - accuracy: 0.9579 - loss: 0.1103 - val_accuracy: 0.6175 - val_loss: 6.1753
Epoch 2/100
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 37ms/step - accuracy: 0.9668 - loss: 0.0911 - val_accuracy: 0.6175 - val_loss: 7.2114
Epoch 3/100
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 38ms/step - accuracy: 0.9696 - loss: 0.0822 - val_accuracy: 0.6255 - val_loss: 6.2593
Epoch 4/100
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 38ms/step - accuracy: 0.9600 - loss: 0.0879 - val_accuracy: 0.6494 - val_loss: 7.3791
Epoch 5/100
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 35ms/step - accuracy: 0.9585 - loss: 0.1283 - val_accuracy: 0.6016 - val_loss: 6.4844
Epoch 6/100
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 36ms/step - accuracy: 0.9766 - loss: 0.0745 - val_accuracy: 0.6295 - val_loss: 6.3555


<keras.src.callbacks.history.History at 0x7a6830b46a50>

In [91]:
from tensorflow.keras.callbacks import ReduceLROnPlateau

lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)

cnn_model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test), callbacks=[lr_scheduler])


Epoch 1/100
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 38ms/step - accuracy: 0.9527 - loss: 0.1206 - val_accuracy: 0.6096 - val_loss: 5.8894 - learning_rate: 0.0010
Epoch 2/100
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 36ms/step - accuracy: 0.9715 - loss: 0.0980 - val_accuracy: 0.6096 - val_loss: 6.6522 - learning_rate: 0.0010
Epoch 3/100
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 35ms/step - accuracy: 0.9686 - loss: 0.0770 - val_accuracy: 0.5857 - val_loss: 5.9376 - learning_rate: 0.0010
Epoch 4/100
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 35ms/step - accuracy: 0.9812 - loss: 0.0671 - val_accuracy: 0.6215 - val_loss: 5.8252 - learning_rate: 0.0010
Epoch 5/100
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 36ms/step - accuracy: 0.9497 - loss: 0.1662 - val_accuracy: 0.6016 - val_loss: 7.3730 - learning_rate: 0.0010
Epoch 6/100
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

<keras.src.callbacks.history.History at 0x7a6f7a416a50>

In [94]:
# Evaluate the model on test data
test_loss, test_accuracy = cnn_model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step - accuracy: 0.6442 - loss: 5.8951
Test Accuracy: 62.55%


In [95]:
from tensorflow.keras.layers import BatchNormalization

def build_cnn_model():
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax'))  # Output layer for 10 classes
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

datagen.fit(X_train)

cnn_model.fit(datagen.flow(X_train, y_train, batch_size=32), epochs=100, validation_data=(X_test, y_test))


Epoch 1/100
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 117ms/step - accuracy: 0.6149 - loss: 6.9963 - val_accuracy: 0.6215 - val_loss: 7.7320
Epoch 2/100
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 115ms/step - accuracy: 0.6137 - loss: 8.0783 - val_accuracy: 0.6255 - val_loss: 7.5335
Epoch 3/100
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 114ms/step - accuracy: 0.6287 - loss: 6.9704 - val_accuracy: 0.6215 - val_loss: 7.3432
Epoch 4/100
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 115ms/step - accuracy: 0.5767 - loss: 6.5886 - val_accuracy: 0.6175 - val_loss: 7.1841
Epoch 5/100
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 115ms/step - accuracy: 0.5964 - loss: 6.4870 - val_accuracy: 0.6175 - val_loss: 7.0526
Epoch 6/100
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 115ms/step - accuracy: 0.6168 - loss: 7.2949 - val_accuracy: 0.6175 - val_loss: 6.9093
Epoc

In [None]:
# Evaluate this model on test data
test_loss, test_accuracy = cnn_model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

In [None]:
# Save CNN model
cnn_model.save("/content/drive/MyDrive/SKIN/cnn_skin_cancer.h5")

In [None]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import GlobalAveragePooling2D

base_model = VGG16(weights='imagenet', include_top=False, input_shape=(128, 128, 3))
base_model.trainable = False  # Freeze the base model layers

model = Sequential()
model.add(base_model)
model.add(GlobalAveragePooling2D())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))  # Output layer for 10 classes
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))


In [None]:
# Evaluate the model on test data
test_loss, test_accuracy = cnn_model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

In [None]:
# Save CNN model
cnn_model.save("/content/drive/MyDrive/SKIN/cnn_skin_cancer.h5")

In [None]:
# Extract features for ML models
X_flat = X.reshape(X.shape[0], -1)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_flat)
pca = PCA(n_components=100)
X_pca = pca.fit_transform(X_scaled)

In [None]:
# Train ML models
rf_model = RandomForestClassifier(n_estimators=100)
rf_model.fit(X_pca[:len(y_train)], y_train)
y_pred_rf = rf_model.predict(X_pca[len(y_train):])
rf_accuracy = accuracy_score(y_test, y_pred_rf)

svm_model = SVC(kernel='linear')
svm_model.fit(X_pca[:len(y_train)], y_train)
y_pred_svm = svm_model.predict(X_pca[len(y_train):])
svm_accuracy = accuracy_score(y_test, y_pred_svm)


In [None]:
# Save ML models
with open("rf_model.pkl", "wb") as f:
    pickle.dump(rf_model, f)
with open("svm_model.pkl", "wb") as f:
    pickle.dump(svm_model, f)
with open("scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)
with open("pca.pkl", "wb") as f:
    pickle.dump(pca, f)

In [None]:
# results
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print(f"Random Forest Accuracy: {rf_accuracy:.4f}")
print(f"SVM Accuracy: {svm_accuracy:.4f}")

In [None]:
# Take image path input at runtime
image_path = input("Enter the path of the image: ")

# Choose model type (CNN, RF, or SVM)
model_type = input("Enter model type (cnn/rf/svm): ").lower()

# Make prediction
predict_custom_image(image_path, model_type)


In [None]:
# Function to load a custom image and make a prediction
def predict_custom_image(image_path, model_type="cnn"):
    img = cv2.imread(image_path)
    img = cv2.resize(img, (128, 128))
    img = img / 255.0  # Normalize
    img = np.expand_dims(img, axis=0)

    if model_type == "cnn":
        model = load_model("cnn_skin_cancer.h5")
        prediction = np.argmax(model.predict(img))
    else:
        with open("rf_model.pkl", "rb") as f:
            rf_model = pickle.load(f)
        with open("svm_model.pkl", "rb") as f:
            svm_model = pickle.load(f)
        with open("scaler.pkl", "rb") as f:
            scaler = pickle.load(f)
        with open("pca.pkl", "rb") as f:
            pca = pickle.load(f)

        img_flat = img.reshape(1, -1)
        img_scaled = scaler.transform(img_flat)
        img_pca = pca.transform(img_scaled)
        prediction = rf_model.predict(img_pca)[0] if model_type == "rf" else svm_model.predict(img_pca)[0]

    label = label_encoder.inverse_transform([prediction])[0]
    print(f"Predicted class: {label}")
    return label

In [None]:
import gradio as gr

def predict_uploaded_image(image, model_type="cnn"):
    if image is None:
        return "No image uploaded!"

    img = cv2.imread(image)
    img = cv2.resize(img, (128, 128))
    img = img / 255.0  # Normalize
    img = np.expand_dims(img, axis=0)

    if model_type == "cnn":
        model = load_model("cnn_skin_cancer.h5")
        prediction = np.argmax(model.predict(img))
    else:
        with open("rf_model.pkl", "rb") as f:
            rf_model = pickle.load(f)
        with open("svm_model.pkl", "rb") as f:
            svm_model = pickle.load(f)
        with open("scaler.pkl", "rb") as f:
            scaler = pickle.load(f)
        with open("pca.pkl", "rb") as f:
            pca = pickle.load(f)

        img_flat = img.reshape(1, -1)
        img_scaled = scaler.transform(img_flat)
        img_pca = pca.transform(img_scaled)
        prediction = rf_model.predict(img_pca)[0] if model_type == "rf" else svm_model.predict(img_pca)[0]

    label = label_encoder.inverse_transform([prediction])[0]
    return f"Predicted class: {label}"

# Create Gradio UI
iface = gr.Interface(
    fn=predict_uploaded_image,
    inputs=[
        gr.Image(type="filepath"),
        gr.Radio(["cnn", "rf", "svm"], label="Choose Model Type", value="cnn")
    ],
    outputs="text"
)

iface.launch()
