In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from glob import glob
from PIL import Image

In [3]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Path to the directory containing all images
image_dir = 'HAM10000_images'

# Path to the CSV file containing image labels
metadata_path = 'HAM10000_metadata.csv'

# Directories for the split datasets
train_dir = 'train'
test_dir = 'test'
validation_dir = 'valid'

# Creating directories if they don't exist
for directory in [train_dir, test_dir, validation_dir]:
    if not os.path.exists(directory):
        os.makedirs(directory)


In [4]:
import pandas as pd

# Load metadata
metadata = pd.read_csv(metadata_path)

target=list(map(lambda x: 1 if x=='mel' else 0,metadata['dx']))
metadata['target']=target
print(metadata)
# Splitting the dataset into train, validation, and test sets
#train_val, test = train_test_split(metadata, test_size=0.2, random_state=42)
#train, validation = train_test_split(train_val, test_size=0.25, random_state=42)  # 0.25 x 0.8 = 0.2


         lesion_id      image_id     dx dx_type   age     sex localization  \
0      HAM_0000118  ISIC_0027419    bkl   histo  80.0    male        scalp   
1      HAM_0000118  ISIC_0025030    bkl   histo  80.0    male        scalp   
2      HAM_0002730  ISIC_0026769    bkl   histo  80.0    male        scalp   
3      HAM_0002730  ISIC_0025661    bkl   histo  80.0    male        scalp   
4      HAM_0001466  ISIC_0031633    bkl   histo  75.0    male          ear   
...            ...           ...    ...     ...   ...     ...          ...   
10010  HAM_0002867  ISIC_0033084  akiec   histo  40.0    male      abdomen   
10011  HAM_0002867  ISIC_0033550  akiec   histo  40.0    male      abdomen   
10012  HAM_0002867  ISIC_0033536  akiec   histo  40.0    male      abdomen   
10013  HAM_0000239  ISIC_0032854  akiec   histo  80.0    male         face   
10014  HAM_0003521  ISIC_0032258    mel   histo  70.0  female         back   

       target  
0           0  
1           0  
2           0  

In [None]:
def move_images(df, source_dir, target_dir):
    for _, row in df.iterrows():
        filename = row['image_id'] + '.jpg'  # Assuming image IDs in the CSV and filenames match
        source_path = os.path.join(source_dir, filename)
        target_path = os.path.join(target_dir, filename)
        
        # Move the image
        shutil.move(source_path, target_path)

# Moving the images
move_images(train, image_dir, train_dir)
move_images(validation, image_dir, validation_dir)
move_images(test, image_dir, test_dir)


In [None]:
import os
train_images = [f for f in os.listdir(train_dir) if os.path.isfile(os.path.join(train_dir, f))]
test_images = [f for f in os.listdir(test_dir) if os.path.isfile(os.path.join(test_dir, f))]
validation_images = [f for f in os.listdir(validation_dir) if os.path.isfile(os.path.join(validation_dir, f))]


In [None]:
def append_target_to_images(images):    
    metadata_dict = pd.Series(metadata['dx'].values,index=metadata['image_id']).to_dict()
    images_with_target = []
    for image in images:
        image_id = image.split('.')[0]  # Assuming image_id does not contain '.'
        target = 1 if metadata_dict.get(image_id) == 'mel' else 0
        train_image_with_label=(image,target)
        images_with_target.append(train_image_with_label)
    return images_with_target



In [None]:
train_images_with_targets=append_target_to_images(train_images)
test_images_with_targets=append_target_to_images(test_images)
validation_images_with_targets=append_target_to_images(validation_images)

In [None]:
melanoma_count_in_train=sum(map(lambda x:x[1],train_images_with_targets))
melanoma_count_in_test=sum(map(lambda x:x[1],test_images_with_targets))
melanoma_count_in_validation=sum(map(lambda x:x[1],validation_images_with_targets))

In [None]:
print(f"Count of people with melanoma in train_images_with_targets:{melanoma_count_in_train}")
print(f"Count of people without melanoma in train_images_with_targets:{len(train_images_with_targets)-melanoma_count_in_train}")

In [None]:
print(f"Count of people with melanoma in test_images_with_targets:{melanoma_count_in_test}")
print(f"Count of people without melanoma in test_images_with_targets:{len(test_images_with_targets)-melanoma_count_in_test}")

In [None]:
print(f"Count of people with melanoma in validation_images_with_targets:{melanoma_count_in_validation}")
print(f"Count of people without melanoma in validation_images_with_targets:{len(validation_images_with_targets)-melanoma_count_in_validation}")

In [None]:
import os

def move_on_class(imagelist,dirs):
    for image_name, label in imagelist:
        mel_dir = os.path.join(dirs, 'mel')
        no_mel_dir = os.path.join(dirs, 'no_mel')
        os.makedirs(mel_dir, exist_ok=True)
        os.makedirs(no_mel_dir, exist_ok=True)
        source_dir=dirs
        # Determine the source path of the image
        source_path = os.path.join(source_dir, image_name)

        # Determine the destination path based on the label
        if label == 1:  # Melanoma
            dest_path = os.path.join(mel_dir, image_name)
        else:  # Non-melanoma
            dest_path = os.path.join(no_mel_dir, image_name)
            

        # Move the image from source to destination
        shutil.move(source_path, dest_path)



In [None]:
#move_on_class(train_images_with_targets,'train')

In [None]:
#move_on_class(test_images_with_targets,'test')

In [None]:
#move_on_class(validation_images_with_targets,'valid')

In [5]:
from torchvision import transforms

# Custom Gaussian Noise Transform
class AddGaussianNoise(object):
    def __init__(self, mean=0., std=1.):
        self.mean = mean
        self.std = std
        
    def __call__(self, tensor):
        return tensor + torch.randn(tensor.size()) * self.std + self.mean
    
    def __repr__(self):
        return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)

# Correctly ordered transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize the image
    transforms.ColorJitter(brightness=0.2, contrast=0.2),  # Adjust brightness and contrast
    transforms.RandomHorizontalFlip(),  # Randomly flip images horizontally
    transforms.RandomRotation(20),  # Randomly rotate images by 20 degrees
    transforms.ToTensor(),  # Convert PIL Image to Tensor
    AddGaussianNoise(0., 0.1),  # Add custom Gaussian Noise
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize the tensor
])


In [6]:
from torchvision import datasets
from torch.utils.data import DataLoader
# Create datasets using ImageFolder

train_dataset = datasets.ImageFolder(root=train_dir, transform=transform)
valid_dataset = datasets.ImageFolder(root=validation_dir, transform=transform)

# Create data loaders
batch_size =12  # Set this to something appropriate for your hardware

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
print(train_loader)

<torch.utils.data.dataloader.DataLoader object at 0x00000229CBB78970>


In [7]:
print(f"Number of training samples: {len(train_dataset)}")
print(f"Number of batches in train_loader: {len(train_loader)}")


Number of training samples: 6009
Number of batches in train_loader: 501


In [8]:
import torch

# Check if CUDA is available
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("CUDA is available. Using GPU.")
else:
    device = torch.device("cpu")
    print("CUDA not available. Using CPU.")





CUDA is available. Using GPU.


In [9]:
from torchvision import models

# Load the pre-trained DenseNet201 and MobileNetV2 models
densenet = models.densenet201(pretrained=True)
mobilenet = models.mobilenet_v2(pretrained=True)




In [10]:
import torch
# DenseNet201: Replace classifier with an identity layer to keep features
densenet.classifier = torch.nn.Identity()

# MobileNetV2: Replace classifier with an identity layer as well
mobilenet.classifier = torch.nn.Identity()


In [11]:
import torch
import torch.nn as nn
import torchvision.models as models
from torch.utils.checkpoint import checkpoint
import torch.nn.utils.prune as prune


class CustomClassificationHead(nn.Module):
    def __init__(self, num_features, num_classes):
        super(CustomClassificationHead, self).__init__()
        # No changes here as this part is not as memory intensive
        self.global_avg_pooling = nn.AdaptiveAvgPool2d((1, 1))
        self.batch_norm = nn.BatchNorm1d(num_features)
        self.dense1 = nn.Linear(num_features, 512)  # Consider reducing size if necessary
        self.relu = nn.ReLU()
        self.dense2 = nn.Linear(512, num_classes)
        self.log_softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten the features
        x = self.batch_norm(x)
        x = self.dense1(x)
        x = self.relu(x)
        x = self.dense2(x)
        x = self.log_softmax(x)
        return x

class CombinedModel(nn.Module):
    def __init__(self):
        super(CombinedModel, self).__init__()
        densenet = models.densenet201(pretrained=True)
        mobilenet = models.mobilenet_v2(pretrained=True)
        
        # Remove classification layers
        densenet.classifier = nn.Identity()
        mobilenet.classifier = nn.Sequential(*list(mobilenet.classifier.children())[:-1], nn.Identity())
        
        self.densenet = densenet
        self.mobilenet = mobilenet
        
        # Feature reduction layer
        self.feature_reduction = nn.Linear(1920 + 1280, 1024)  # Example: Reducing to 1024 features
        
        # Adjusted classification head for reduced feature size
        self.classification_head = CustomClassificationHead(num_features=1024, num_classes=2)

    def forward(self, x):
        # Using checkpointing for memory efficiency
        features_densenet = checkpoint(self.densenet, x)
        features_mobilenet = checkpoint(self.mobilenet, x)
        
        # Concatenate features along the feature dimension
        features_combined = torch.cat((features_densenet, features_mobilenet), dim=1)
        
        # Reduce feature size
        reduced_features = self.feature_reduction(features_combined)
        
        # Forward pass through the classification head
        x = self.classification_head(reduced_features)
        return x

# Initialize the combined model
combined_model = CombinedModel()

# Assuming 'device' is defined (e.g., cuda or cpu)
combined_model=combined_model.to(device)


# Example: Pruning 20% of connections in the dense1 layer of the classification head by weight magnitude
prune.l1_unstructured(combined_model.classification_head.dense1, name='weight', amount=0.2)


Linear(in_features=1024, out_features=512, bias=True)

In [12]:
import torch

torch.cuda.empty_cache()  # Clear unused memory

inputs, labels = next(iter(train_loader))
inputs, labels = inputs.to(device), labels.to(device)
outputs = combined_model(inputs)  # Check if this line hangs



In [13]:
optimizer = torch.optim.AdamW(combined_model.parameters(), lr=0.001, weight_decay=0.01)
criterion = torch.nn.NLLLoss()

In [15]:
from tqdm import tqdm

epochs = 10

for epoch in range(epochs):
    combined_model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for inputs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs} - Training"):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        
        outputs = combined_model(inputs)
        
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        # Calculate accuracy
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    train_accuracy = correct / total
    
    # Validation phase
    combined_model.eval()
    valid_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, labels in tqdm(valid_loader, desc="Validating"):
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = combined_model(inputs)
            loss = criterion(outputs, labels)
            
            valid_loss += loss.item()
            
            # Calculate accuracy
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    valid_accuracy = correct / total
    
    print(f"Epoch {epoch+1}: Train Loss: {running_loss / len(train_loader):.4f}, "
          f"Train Acc: {train_accuracy * 100:.2f}%, "
          f"Valid Loss: {valid_loss / len(valid_loader):.4f}, "
          f"Valid Acc: {valid_accuracy * 100:.2f}%")


Epoch 1/10 - Training: 100%|█████████████████████████████████████████████████████████| 501/501 [02:28<00:00,  3.38it/s]
Validating: 100%|████████████████████████████████████████████████████████████████████| 167/167 [00:46<00:00,  3.61it/s]


Epoch 1: Train Loss: 0.2847, Train Acc: 89.03%, Valid Loss: 0.2861, Valid Acc: 89.17%


Epoch 2/10 - Training: 100%|█████████████████████████████████████████████████████████| 501/501 [02:24<00:00,  3.46it/s]
Validating: 100%|████████████████████████████████████████████████████████████████████| 167/167 [00:46<00:00,  3.60it/s]


Epoch 2: Train Loss: 0.2731, Train Acc: 89.28%, Valid Loss: 0.3110, Valid Acc: 88.77%


Epoch 3/10 - Training: 100%|█████████████████████████████████████████████████████████| 501/501 [02:25<00:00,  3.44it/s]
Validating: 100%|████████████████████████████████████████████████████████████████████| 167/167 [00:46<00:00,  3.57it/s]


Epoch 3: Train Loss: 0.2740, Train Acc: 89.42%, Valid Loss: 0.2738, Valid Acc: 89.27%


Epoch 4/10 - Training: 100%|█████████████████████████████████████████████████████████| 501/501 [02:26<00:00,  3.43it/s]
Validating: 100%|████████████████████████████████████████████████████████████████████| 167/167 [00:46<00:00,  3.57it/s]


Epoch 4: Train Loss: 0.2692, Train Acc: 89.53%, Valid Loss: 0.2635, Valid Acc: 89.37%


Epoch 5/10 - Training: 100%|█████████████████████████████████████████████████████████| 501/501 [02:25<00:00,  3.44it/s]
Validating: 100%|████████████████████████████████████████████████████████████████████| 167/167 [00:46<00:00,  3.56it/s]


Epoch 5: Train Loss: 0.2640, Train Acc: 89.63%, Valid Loss: 0.2644, Valid Acc: 89.07%


Epoch 6/10 - Training: 100%|█████████████████████████████████████████████████████████| 501/501 [02:26<00:00,  3.43it/s]
Validating: 100%|████████████████████████████████████████████████████████████████████| 167/167 [00:46<00:00,  3.57it/s]


Epoch 6: Train Loss: 0.2701, Train Acc: 89.53%, Valid Loss: 0.2528, Valid Acc: 89.87%


Epoch 7/10 - Training: 100%|█████████████████████████████████████████████████████████| 501/501 [02:26<00:00,  3.43it/s]
Validating: 100%|████████████████████████████████████████████████████████████████████| 167/167 [00:46<00:00,  3.56it/s]


Epoch 7: Train Loss: 0.2619, Train Acc: 89.50%, Valid Loss: 0.2496, Valid Acc: 89.52%


Epoch 8/10 - Training: 100%|█████████████████████████████████████████████████████████| 501/501 [02:26<00:00,  3.42it/s]
Validating: 100%|████████████████████████████████████████████████████████████████████| 167/167 [00:46<00:00,  3.58it/s]


Epoch 8: Train Loss: 0.2533, Train Acc: 89.95%, Valid Loss: 0.2516, Valid Acc: 89.67%


Epoch 9/10 - Training: 100%|█████████████████████████████████████████████████████████| 501/501 [02:26<00:00,  3.41it/s]
Validating: 100%|████████████████████████████████████████████████████████████████████| 167/167 [00:46<00:00,  3.58it/s]


Epoch 9: Train Loss: 0.2575, Train Acc: 89.77%, Valid Loss: 0.2710, Valid Acc: 89.72%


Epoch 10/10 - Training: 100%|████████████████████████████████████████████████████████| 501/501 [02:25<00:00,  3.43it/s]
Validating: 100%|████████████████████████████████████████████████████████████████████| 167/167 [00:46<00:00,  3.56it/s]

Epoch 10: Train Loss: 0.2557, Train Acc: 89.95%, Valid Loss: 0.2573, Valid Acc: 89.07%



