# **STEP 1: Install and Import Libraries**

In [None]:
import torch
from torch import nn
from torchvision import transforms
from torch.utils.data import DataLoader
import timm
import os
from sklearn.model_selection import train_test_split
from torchvision.datasets import ImageFolder

# **STEP 2: Prepare the Dataset**

In [None]:
# Training transformations with augmentation
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(30),
    transforms.RandomResizedCrop((224, 224)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Validation transformations (no augmentation)
val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
import os
import shutil
import pandas as pd

# Define dataset paths
dataset_path = '/kaggle/input/skin-cancer-mnist-ham10000/'
image_dirs = [
    os.path.join(dataset_path, 'HAM10000_images_part_1'),
    os.path.join(dataset_path, 'HAM10000_images_part_2')
]
metadata_path = os.path.join(dataset_path, 'HAM10000_metadata.csv')

# Load metadata
metadata = pd.read_csv(metadata_path)
print(metadata.head())  # Display metadata to understand structure

In [None]:
# Create output directory
organized_dataset_path = '/kaggle/working/organized_dataset'
os.makedirs(organized_dataset_path, exist_ok=True)

# Create class-specific folders
for label in metadata['dx'].unique():
    os.makedirs(os.path.join(organized_dataset_path, label), exist_ok=True)

# Move images into corresponding class folders
for _, row in metadata.iterrows():
    image_id = row['image_id']
    label = row['dx']

    # Find the image in either directory
    src_path = None
    for image_dir in image_dirs:
        potential_path = os.path.join(image_dir, image_id + '.jpg')
        if os.path.exists(potential_path):
            src_path = potential_path
            break

    # Copy image to the organized directory
    if src_path:
        dst_path = os.path.join(organized_dataset_path, label, image_id + '.jpg')
        shutil.copy(src_path, dst_path)

# **STEP 3: Load the Dataset**

In [None]:
from torchvision.datasets import ImageFolder
from sklearn.model_selection import train_test_split

# Load the entire dataset
full_dataset = ImageFolder(root=organized_dataset_path)

# Stratified split into training and validation indices
train_indices, val_indices = train_test_split(
    list(range(len(full_dataset))),
    stratify=[sample[1] for sample in full_dataset.samples],  # Use class labels for stratification
    test_size=0.2,
    random_state=42,
)

# Create subsets
train_subset = torch.utils.data.Subset(full_dataset, train_indices)
val_subset = torch.utils.data.Subset(full_dataset, val_indices)


In [None]:
from torchvision import transforms

# Training transformations with augmentation
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(30),
    transforms.RandomResizedCrop((224, 224)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Validation transformations (no augmentation)
val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Apply transformations to subsets
train_dataset = torch.utils.data.Subset(ImageFolder(organized_dataset_path, transform=train_transform), train_indices)
val_dataset = torch.utils.data.Subset(ImageFolder(organized_dataset_path, transform=val_transform), val_indices)


In [None]:
from torch.utils.data import DataLoader

# Define DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)

# Check the sizes
print(f"Number of training samples: {len(train_dataset)}")
print(f"Number of validation samples: {len(val_dataset)}")


# **STEP 4: Load the Model**

In [None]:
import timm
from torch import nn

# Load pre-trained EfficientNet model
model = timm.create_model('efficientnet_b0', pretrained=True)

# Modify the classification head
model.classifier = nn.Linear(model.classifier.in_features, len(full_dataset.classes))

# Move model to GPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device)


# **STEP 5: Define Loss and Optimizer**

In [None]:
import torch

# Compute class weights
class_counts = [len([i for i in train_indices if full_dataset.samples[i][1] == c]) for c in range(len(full_dataset.classes))]
class_weights = 1.0 / torch.tensor(class_counts, dtype=torch.float)
class_weights = class_weights / class_weights.sum()  # Normalize
class_weights = class_weights.to(device)

# Define weighted loss
criterion = nn.CrossEntropyLoss(weight=class_weights)

# Define optimizer and scheduler
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)


# **STEP 6: Train the Model**

In [None]:
from tqdm import tqdm

def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=20, patience=3):
    best_val_loss = float('inf')
    early_stop_counter = 0

    for epoch in range(num_epochs):
        print(f"Epoch {epoch + 1}/{num_epochs}")
        
        # Training phase
        model.train()
        running_loss, correct, total = 0.0, 0, 0
        
        for images, labels in tqdm(train_loader):
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

        train_loss = running_loss / len(train_loader)
        train_accuracy = correct / total
        print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}")
        
        # Validation phase
        model.eval()
        val_loss, correct, total = 0.0, 0, 0
        
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, preds = torch.max(outputs, 1)
                correct += (preds == labels).sum().item()
                total += labels.size(0)

        val_loss = val_loss / len(val_loader)
        val_accuracy = correct / total
        print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")
        
        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            early_stop_counter = 0
            torch.save(model.state_dict(), '/kaggle/working/efficientnet_best.pth')
            print("Saved best model.")
        else:
            early_stop_counter += 1
            if early_stop_counter >= patience:
                print("Early stopping triggered.")
                break

        # Step scheduler
        scheduler.step()

# Train the model
train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=10)


# **STEP 7: Evaluate the Model**

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

# Load the best model
model.load_state_dict(torch.load('/kaggle/working/efficientnet_best.pth'))
model.eval()

# Evaluate on validation data
all_preds, all_labels = [], []
with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Classification report and confusion matrix
print(classification_report(all_labels, all_preds, target_names=full_dataset.classes))
print(confusion_matrix(all_labels, all_preds))

In [None]:
from torchvision import transforms
from PIL import Image

# Define the test image transformation
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


In [None]:
import torch
import timm
from torch import nn

# Initialize EfficientNet architecture
model = timm.create_model('efficientnet_b0', pretrained=False)  # Ensure architecture matches
model.classifier = nn.Linear(model.classifier.in_features, 7)   # Adjust output classes

# Map model to the correct device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Load the state dict
model.load_state_dict(torch.load('/kaggle/input/effnet-for-sc/pytorch/default/1/efficientnet_best.pth', map_location=device))
model = model.to(device)
model.eval()  # Set to evaluation mode


In [None]:
def predict_image(image_path, model, transform, device='cuda'):
    # Load and preprocess the image
    image = Image.open(image_path).convert('RGB')  # Ensure RGB format
    input_tensor = transform(image).unsqueeze(0).to(device)  # Add batch dimension

    # Make prediction
    with torch.no_grad():
        outputs = model(input_tensor)
        _, predicted_class = torch.max(outputs, 1)

    # Map the predicted class index to the class label
    class_idx = predicted_class.item()
    class_label = full_dataset.classes[class_idx]  # `full_dataset` from earlier code

    return class_label


In [None]:
from sklearn.metrics import classification_report, confusion_matrix

# Evaluate on the validation data
all_preds, all_labels = [], []
model.eval()  # Set model to evaluation mode

with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)

        # Collect predictions and true labels
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Generate a classification report
class_names = full_dataset.classes
print("Classification Report:")
print(classification_report(all_labels, all_preds, target_names=class_names))

# Confusion matrix
print("Confusion Matrix:")
print(confusion_matrix(all_labels, all_preds))


In [None]:
from sklearn.metrics import roc_auc_score, roc_curve
import matplotlib.pyplot as plt
from sklearn.preprocessing import label_binarize
import numpy as np

# Binarize the labels for multi-class ROC/AUC
num_classes = len(class_names)
all_labels_bin = label_binarize(all_labels, classes=list(range(num_classes)))
all_preds_bin = label_binarize(all_preds, classes=list(range(num_classes)))

# Compute ROC curve and AUC for each class
fpr, tpr, roc_auc = {}, {}, {}
for i in range(num_classes):
    fpr[i], tpr[i], _ = roc_curve(all_labels_bin[:, i], all_preds_bin[:, i])
    roc_auc[i] = roc_auc_score(all_labels_bin[:, i], all_preds_bin[:, i])

# Plot ROC curves
plt.figure(figsize=(10, 8))
for i in range(num_classes):
    plt.plot(fpr[i], tpr[i], label=f"{class_names[i]} (AUC = {roc_auc[i]:.2f})")

plt.plot([0, 1], [0, 1], color="gray", linestyle="--")  # Diagonal line
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve for Each Class")
plt.legend(loc="lower right")
plt.show()


In [None]:
accuracy = sum(np.array(all_preds) == np.array(all_labels)) / len(all_labels)
print(f"Overall Accuracy: {accuracy:.2f}")


LOADING THE MODEL TO FURTHER TRAIN IT AND WORK IT TILL MAKING IT A GUI BASED APP TO NAVIGATE EASILY

In [None]:
print(f"Total samples in full_dataset: {len(full_dataset)}")
print(f"Training samples: {len(train_subset)}")
print(f"Validation samples: {len(val_subset)}")

# **STEP 8: FineTuning the Model**

In [None]:
from torchvision.datasets import ImageFolder
from sklearn.model_selection import train_test_split
from torch.utils.data import Subset

# Path to your organized dataset
organized_dataset_path = '/kaggle/working/organized_dataset'

# Load the full dataset
full_dataset = ImageFolder(root=organized_dataset_path)

# Stratified split of the dataset to maintain class distribution
train_indices, val_indices = train_test_split(
    list(range(len(full_dataset))),
    stratify=[sample[1] for sample in full_dataset.samples],
    test_size=0.2,
    random_state=42,
)

# Create the train and validation subsets
train_dataset = Subset(full_dataset, train_indices)
val_dataset = Subset(full_dataset, val_indices)


In [None]:
from torchvision import transforms

# Training transformations with augmentation
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(30),
    transforms.RandomResizedCrop((224, 224)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Validation transformations (no augmentation)
val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Apply transformations to datasets
train_dataset = ImageFolder(root=organized_dataset_path, transform=train_transform)
val_dataset = ImageFolder(root=organized_dataset_path, transform=val_transform)


In [None]:
from torch.utils.data import WeightedRandomSampler, DataLoader
import torch

# Actual class counts from your dataset
class_counts = [327, 514, 1099, 115, 1113, 6705, 142]
class_weights = 1.0 / torch.tensor(class_counts, dtype=torch.float)  # Inverse of class counts

# Assign a weight to each sample in the dataset based on its class
weights = [class_weights[train_dataset.samples[i][1]] for i in range(len(train_dataset))]

# Create a WeightedRandomSampler using these weights
sampler = WeightedRandomSampler(weights, num_samples=len(weights), replacement=True)

# DataLoader with the sampler for training and without for validation
train_loader = DataLoader(train_dataset, batch_size=32, sampler=sampler)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


    FROM HERE
    

In [None]:
import torch
import timm
from torch import nn

# Path to the trained model
model_path = '/kaggle/input/effnet-for-sc/pytorch/default/1/efficientnet_best.pth'

# Load pre-trained EfficientNet model
model = timm.create_model('efficientnet_b0', pretrained=False)

# Modify the classification head to match 7 classes
model.classifier = nn.Linear(model.classifier.in_features, 7)

# Load the saved weights
model.load_state_dict(torch.load(model_path))

# Move the model to the appropriate device (GPU or CPU)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device)

# Set the model to evaluation mode
model.eval()

print("Model loaded successfully.")

In [None]:
import torch.nn.functional as F

class FocalLoss(nn.Module):
    def __init__(self, alpha=1.0, gamma=2.0):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma

    def forward(self, inputs, targets):
        ce_loss = F.cross_entropy(inputs, targets, reduction='none')
        pt = torch.exp(-ce_loss)
        focal_loss = self.alpha * (1 - pt) ** self.gamma * ce_loss
        return focal_loss.mean()

# Define optimizer and learning rate scheduler
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)


In [None]:
# Freeze all layers initially except the classifier
for param in model.parameters():
    param.requires_grad = False
for param in model.classifier.parameters():
    param.requires_grad = True

# Define class counts and class weights for imbalanced data
class_counts = [327, 514, 1099, 115, 1113, 6705, 142]  # As per the dataset
class_weights = 1.0 / torch.tensor(class_counts, dtype=torch.float)  # Inverse of class counts
class_weights = class_weights / class_weights.sum()  # Normalize
class_weights = class_weights.to(device)

# Apply class weights in the loss function
criterion = FocalLoss(alpha=1.0, gamma=2.0)


In [None]:
from torch.utils.data import WeightedRandomSampler, DataLoader

# Assign weights to the dataset based on class counts
weights = [class_weights[train_dataset.samples[i][1]] for i in range(len(train_dataset))]
sampler = WeightedRandomSampler(weights, num_samples=len(weights), replacement=True)

# DataLoader with the sampler for training
train_loader = DataLoader(train_dataset, batch_size=32, sampler=sampler)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [None]:
from tqdm import tqdm

def fine_tune_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=15, patience=3, device='cuda'):
    best_val_loss = float('inf')
    early_stop_counter = 0

    for epoch in range(num_epochs):
        print(f"Epoch {epoch + 1}/{num_epochs}")
        
        # Training phase
        model.train()
        running_loss, correct_preds, total_samples = 0.0, 0, 0
        
        for images, labels in tqdm(train_loader):
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            correct_preds += (preds == labels).sum().item()
            total_samples += labels.size(0)

        train_loss = running_loss / len(train_loader)
        train_accuracy = correct_preds / total_samples
        print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}")

        # Validation phase
        model.eval()
        val_loss, correct_preds, total_samples = 0.0, 0, 0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, preds = torch.max(outputs, 1)
                correct_preds += (preds == labels).sum().item()
                total_samples += labels.size(0)

        val_loss = val_loss / len(val_loader)
        val_accuracy = correct_preds / total_samples
        print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")

        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            early_stop_counter = 0
            torch.save(model.state_dict(), '/kaggle/working/fine_tuned_model.pth')
            print("Saved best model.")
        else:
            early_stop_counter += 1
            if early_stop_counter >= patience:
                print("Early stopping triggered.")
                break

        scheduler.step()

# Fine-tune the model
fine_tune_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=15)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import label_binarize
from sklearn.metrics import roc_curve, roc_auc_score
import matplotlib.pyplot as plt
import numpy as np

# Set the model to evaluation mode
model.eval()

# Initialize lists to store the predictions and true labels
all_preds = []
all_labels = []

# Evaluate on the validation data
with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Convert to numpy arrays for evaluation
all_preds = np.array(all_preds)
all_labels = np.array(all_labels)

# Generate the classification report
class_names = val_dataset.classes  # Getting class names from the validation dataset
print("Classification Report:")
print(classification_report(all_labels, all_preds, target_names=class_names))

# Generate the confusion matrix
print("Confusion Matrix:")
cm = confusion_matrix(all_labels, all_preds)
print(cm)


In [None]:
# Binarize the labels and predictions for multi-class ROC/AUC
num_classes = len(class_names)
all_labels_bin = label_binarize(all_labels, classes=range(num_classes))
all_preds_bin = label_binarize(all_preds, classes=range(num_classes))

# Compute the ROC curve and AUC for each class
fpr, tpr, roc_auc = {}, {}, {}

for i in range(num_classes):
    fpr[i], tpr[i], _ = roc_curve(all_labels_bin[:, i], all_preds_bin[:, i])
    roc_auc[i] = roc_auc_score(all_labels_bin[:, i], all_preds_bin[:, i])

# Plot ROC curve for each class
plt.figure(figsize=(10, 8))
for i in range(num_classes):
    plt.plot(fpr[i], tpr[i], label=f'{class_names[i]} (AUC = {roc_auc[i]:.2f})')

# Plot the diagonal line (random classifier line)
plt.plot([0, 1], [0, 1], color='gray', linestyle='--')

plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve for Each Class')
plt.legend(loc='lower right')
plt.show()


In [None]:
accuracy = np.sum(all_preds == all_labels) / len(all_labels)
print(f"Accuracy: {accuracy:.4f}")


In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

# Precision, Recall, and F1-Score for each class
precision = precision_score(all_labels, all_preds, average=None)
recall = recall_score(all_labels, all_preds, average=None)
f1 = f1_score(all_labels, all_preds, average=None)

# Print metrics for each class
for i, class_name in enumerate(class_names):
    print(f"Class: {class_name}")
    print(f"  Precision: {precision[i]:.4f}")
    print(f"  Recall: {recall[i]:.4f}")
    print(f"  F1-Score: {f1[i]:.4f}")
    print("-" * 30)

# Also calculate macro and weighted averages
macro_precision = precision_score(all_labels, all_preds, average='macro')
macro_recall = recall_score(all_labels, all_preds, average='macro')
macro_f1 = f1_score(all_labels, all_preds, average='macro')

weighted_precision = precision_score(all_labels, all_preds, average='weighted')
weighted_recall = recall_score(all_labels, all_preds, average='weighted')
weighted_f1 = f1_score(all_labels, all_preds, average='weighted')

print(f"Macro Average Precision: {macro_precision:.4f}")
print(f"Macro Average Recall: {macro_recall:.4f}")
print(f"Macro Average F1-Score: {macro_f1:.4f}")

print(f"Weighted Average Precision: {weighted_precision:.4f}")
print(f"Weighted Average Recall: {weighted_recall:.4f}")
print(f"Weighted Average F1-Score: {weighted_f1:.4f}")


In [None]:
def predict_image(image_path, model, transform, device='cuda'):
    # Load and preprocess the image
    image = Image.open(image_path).convert('RGB')  # Ensure RGB format
    input_tensor = transform(image).unsqueeze(0).to(device)  # Add batch dimension

    # Make prediction
    with torch.no_grad():
        outputs = model(input_tensor)
        _, predicted_class = torch.max(outputs, 1)

    # Map the predicted class index to the class label
    class_idx = predicted_class.item()
    class_label = full_dataset.classes[class_idx]  # `full_dataset` from earlier code

    return class_label


In [None]:
# Path to the test image
test_image_path = '/kaggle/input/skin-disease-test-data/test data/7.jpg'

# Predict the class
predicted_class = predict_image(test_image_path, model, test_transform, device='cuda' if torch.cuda.is_available() else 'cpu')
print(f"The predicted class for the test image is: {predicted_class}")

APP