<a href="https://colab.research.google.com/github/koyarekoikoi/tkpj/blob/main/skin.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import numpy as np
import pandas as pd
import cv2
import tensorflow as tf
from tensorflow.keras.applications import VGG16, ResNet50
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Flatten, Dropout, Conv2D, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from tqdm import tqdm

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
# Define paths
metadata_path = "/content/drive/MyDrive/SD dataset /HAM10000_metadata.csv"
images_path_1 = "/content/drive/MyDrive/SD dataset /HAM10000_images_part_1"
images_path_2 = "/content/drive/MyDrive/SD dataset /HAM10000_images_part_2"

In [8]:
# Load metadata
df = pd.read_csv(metadata_path)

FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/SD dataset /HAM10000_metadata.csv'

In [None]:
# Image processing function
def load_images(image_ids, image_folder, target_size=(128, 128)):
    images = []
    for img_id in tqdm(image_ids):
        img_path = os.path.join(image_folder, img_id + ".jpg")
        if os.path.exists(img_path):
            img = cv2.imread(img_path)
            img = cv2.resize(img, target_size)
            img = img / 255.0  # Normalize
            images.append(img)
    return np.array(images)

In [None]:
# Load images from both folders
X_images = load_images(df['image_id'], images_path_1)
X_images2 = load_images(df['image_id'], images_path_2)
X = np.concatenate([X_images, X_images2])

100%|██████████| 10015/10015 [50:36<00:00,  3.30it/s]
100%|██████████| 10015/10015 [50:32<00:00,  3.30it/s]


In [None]:
# Encode labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df['dx'])

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
# Define CNN model
def build_cnn_model():
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
        MaxPooling2D(2, 2),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D(2, 2),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(len(np.unique(y)), activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model


In [None]:
# Train CNN
cnn_model = build_cnn_model()
cnn_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))
cnn_accuracy = cnn_model.evaluate(X_test, y_test)[1]

In [None]:
# Extract features for ML models
X_flat = X.reshape(X.shape[0], -1)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_flat)
pca = PCA(n_components=100)
X_pca = pca.fit_transform(X_scaled)

In [None]:
# Train ML models
rf_model = RandomForestClassifier(n_estimators=100)
rf_model.fit(X_pca[:len(y_train)], y_train)
y_pred_rf = rf_model.predict(X_pca[len(y_train):])
rf_accuracy = accuracy_score(y_test, y_pred_rf)

svm_model = SVC(kernel='linear')
svm_model.fit(X_pca[:len(y_train)], y_train)
y_pred_svm = svm_model.predict(X_pca[len(y_train):])
svm_accuracy = accuracy_score(y_test, y_pred_svm)


In [None]:
# results
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print(f"Random Forest Accuracy: {rf_accuracy:.4f}")
print(f"SVM Accuracy: {svm_accuracy:.4f}")