### Install dataset from ISIC


In [1]:
!pip install isic-cli

Collecting isic-cli
  Downloading isic_cli-11.0.0-py3-none-any.whl.metadata (2.6 kB)
Collecting django-s3-file-field-client>=1.0.0 (from isic-cli)
  Downloading django_s3_file_field_client-1.0.1-py3-none-any.whl.metadata (2.7 kB)
Collecting girder-cli-oauth-client<1.0.0 (from isic-cli)
  Downloading girder_cli_oauth_client-0.4.0-py3-none-any.whl.metadata (2.6 kB)
Collecting isic-metadata>=1.2.0 (from isic-cli)
  Downloading isic_metadata-4.0.0-py3-none-any.whl.metadata (1.3 kB)
Collecting retryable-requests (from isic-cli)
  Downloading retryable_requests-0.1.2-py3-none-any.whl.metadata (2.7 kB)
Collecting authlib (from girder-cli-oauth-client<1.0.0->isic-cli)
  Downloading Authlib-1.3.2-py2.py3-none-any.whl.metadata (3.9 kB)
Collecting pyxdg (from girder-cli-oauth-client<1.0.0->isic-cli)
  Downloading pyxdg-0.28-py2.py3-none-any.whl.metadata (567 bytes)
Downloading isic_cli-11.0.0-py3-none-any.whl (30 kB)
Downloading django_s3_file_field_client-1.0.1-py3-none-any.whl (3.2 kB)
Download

In [None]:
!isic image download images/ --limit 100000

If you have been granted special permissions, logging in with `isic user login` might return more data.

[2KDownloading images (and metadata) (100,000 total) [91m━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━[0m [35m 61%[0m [36m0:41:36[0m

### Download data

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv('images/metadata.csv')
df.head()

In [None]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"Current device: {torch.cuda.current_device()}")
    print(f"Device name: {torch.cuda.get_device_name()}")

### Load initial weights

We're using transfer learning to improve accuracy on our model

In [None]:
from torchvision.models import resnet50, ResNet50_Weights

# Using the latest weights: https://pytorch.org/vision/stable/models.html#initializing-pre-trained-models
weights = ResNet50_Weights.DEFAULT

model = resnet50(weights=weights).to(device)

transform = weights.transforms()

# Freeze the pretrained parameters
for param in model.parameters():
    param.requires_grad = False

# Allow training last layer
model.fc = torch.nn.Linear(2048, 4).to(device)

### Load into train & test datasets

In [None]:
from torch.utils.data import Dataset
from PIL import Image 

class CustomDataset(Dataset):
    def __init__(self, df, transform = None):
        self.df = df
        self.transform = transform
        self.class_labels = {
            'benign': 0,
            'malignant': 1,
            'indeterminate/benign': 2,
            'indeterminate/malignant': 3
        }
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        # get the corresponding image
        image_id = row["isic_id"]
        img_path = f"/kaggle/working/images/{image_id}.jpg"
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)
        
        # get the label
        label_str = row['benign_malignant']
        label = self.class_labels[label_str] # convert label to number
        
        return image, torch.tensor(label, dtype=torch.long)
        
    def __len__(self):
        return len(self.df)

In [None]:
from sklearn.model_selection import train_test_split
import os

train_df, test_df = train_test_split(
    df,
    train_size=0.9
)

train_dataset = CustomDataset(df=train_df, transform=transform)
test_dataset = CustomDataset(df=test_df, transform=transform)

### Load data into mini batches

In [None]:
from torch.utils.data import DataLoader

BATCH_SIZE=32

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=os.cpu_count())
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=os.cpu_count())

### Loss function & optimizer

In [None]:
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.01)
loss_fn = torch.nn.CrossEntropyLoss()

### Train the model!!!
The fun part :)

In [None]:
from timeit import default_timer as timer
start_time = timer()

epochs = 3

results = {"train_loss": [],
           "train_acc": [],
           "test_loss": [],
           "test_acc": []}

for epoch in range(epochs):
    model.train()

    train_loss, train_acc = 0, 0
    
    for batch, (X, y) in enumerate(train_dataloader):
        # Device agnoistic code
        X, y = X.to(device), y.to(device)
        
        # Forward pass
        y_logits = model(X)
        
        # Calculate loss
        loss = loss_fn(y_logits, y)
        train_loss += loss.item()
        
        pred_label = y_logits.argmax(dim=1)
        train_acc += (pred_label==y).sum().item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Keep track of the current batch
        if batch % 100 == 0:
            print(f"Epoch {epoch+1}/{epochs} | Batch {batch}/{len(train_dataloader)}")

    test_loss, test_acc = 0, 0
    
    # Evaluate the model
    model.eval()
    with torch.inference_mode():
        for batch, (X, y) in enumerate(test_dataloader):
            X, y = X.to(device), y.to(device)

            y_logits = model(X)
            loss = loss_fn(y_logits, y)

            test_loss += loss.item()

            pred_label = y_logits.argmax(dim=1)
            test_acc += (pred_label==y).sum().item()

    train_loss = train_loss / len(train_dataloader)
    test_loss = test_loss / len(test_dataloader)
    
    train_acc = train_acc / (len(train_dataloader) * train_dataloader.batch_size)
    test_acc = test_acc / (len(test_dataloader) * test_dataloader.batch_size)
    
    results["train_loss"].append(train_loss)
    results["train_acc"].append(train_acc)
    results["test_loss"].append(test_loss)
    results["test_acc"].append(test_acc)
    
    print(f"Epoch: {epoch} | Train loss: {train_loss} | Test loss: {test_loss} | Train accuracy: {train_acc} | Test accuracy: {test_acc}")

end_time = timer()
print(f"Total training time: {end_time-start_time:.3f} seconds")

In [None]:
def plot_loss_curves(results):
    """Plots training curves of a results dictionary.

    Args:
        results (dict): dictionary containing list of values, e.g.
            {"train_loss": [...],
             "train_acc": [...],
             "test_loss": [...],
             "test_acc": [...]}
    """
    loss = results["train_loss"]
    test_loss = results["test_loss"]

    accuracy = results["train_acc"]
    test_accuracy = results["test_acc"]

    epochs = range(len(results["train_loss"]))

    plt.figure(figsize=(15, 7))

    # Plot loss
    plt.subplot(1, 2, 1)
    plt.plot(epochs, loss, label="train_loss")
    plt.plot(epochs, test_loss, label="test_loss")
    plt.title("Loss")
    plt.xlabel("Epochs")
    plt.legend()

    # Plot accuracy
    plt.subplot(1, 2, 2)
    plt.plot(epochs, accuracy, label="train_accuracy")
    plt.plot(epochs, test_accuracy, label="test_accuracy")
    plt.title("Accuracy")
    plt.xlabel("Epochs")
    plt.legend()

In [None]:
plot_loss_curves(results)

### Save model
So we can do inference later

In [None]:
from pathlib import Path 

models = Path("models")

models.mkdir(parents=True, exist_ok=True)

model_name = "model.pth"
PATH = models / model_name

torch.save(model.state_dict(), f=PATH)