# Resnet on Raw ISIC 
In this notebook we will train a resnet on the ISIC dataset. We will use the the raw dataset without augmentation. This will be a valuable baseline, so we can compare and see if augmenting data holds any benefits.

We will first separate the data into different bucks 
    - train, validate, test 



In [8]:
## Imports
# torch 
import torch
import torch.nn as nn
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import random_split
from torch.utils.data import DataLoader
import torch.nn.functional as F
# vision
import torchvision ## Contains some utilities for working with the image data
from torchvision import datasets
from torchvision import transforms
from torchvision.models import ResNet
# math tools
import numpy as np
import matplotlib.pyplot as plt

# utilize GPU 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [37]:
#Imports 
import os
import h5py
import numpy as np
from PIL import Image
import random

file_dict = {
    "sample_sub": "sample_submission.csv",
    "test_images": "test-image.hdf5",
    "test_metadata": "test-metadata.csv",
    "train_images": "train-image.hdf5",
    "train_metadata": "train-metadata.csv",
    "train_labels": "train-labels.csv",
    "train_malig_cases": "train_malig-labels.csv",
    "train_nonmalig_cases": "train_nonmalig-labels.csv",
    #
    "train_dir": "train-image",
}
for key in file_dict.keys():
    path = os.path.join(os.getcwd(), '..', 'data/isic-2024-challenge', file_dict[key])
    path = os.path.normpath(path)
    file_dict[key] = path
file_dict

{'sample_sub': '/projectnb/cs640grp/projects/ao/Agent-O/data/isic-2024-challenge/sample_submission.csv',
 'test_images': '/projectnb/cs640grp/projects/ao/Agent-O/data/isic-2024-challenge/test-image.hdf5',
 'test_metadata': '/projectnb/cs640grp/projects/ao/Agent-O/data/isic-2024-challenge/test-metadata.csv',
 'train_images': '/projectnb/cs640grp/projects/ao/Agent-O/data/isic-2024-challenge/train-image.hdf5',
 'train_metadata': '/projectnb/cs640grp/projects/ao/Agent-O/data/isic-2024-challenge/train-metadata.csv',
 'train_labels': '/projectnb/cs640grp/projects/ao/Agent-O/data/isic-2024-challenge/train-labels.csv',
 'train_malig_cases': '/projectnb/cs640grp/projects/ao/Agent-O/data/isic-2024-challenge/train_malig-labels.csv',
 'train_nonmalig_cases': '/projectnb/cs640grp/projects/ao/Agent-O/data/isic-2024-challenge/train_nonmalig-labels.csv',
 'train_dir': '/projectnb/cs640grp/projects/ao/Agent-O/data/isic-2024-challenge/train-image'}

In [24]:
# Lets quickly create a smaller csv with the images and labels
import pandas as pd
input_csv = file_dict["train_metadata"]

df = pd.read_csv(input_csv, usecols=[0, 1])
postitive_df = df[df["target"] == 1]
negative_df = df[df["target"] == 0]
df.to_csv(file_dict["train_labels"])
postitive_df.to_csv(file_dict["train_malig_cases"])
negative_df.to_csv(file_dict["train_nonmalig_cases"])

In [4]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', weights=None) # Lets load the model we will train 

Using cache found in /usr4/cs640/rakin374/.cache/torch/hub/pytorch_vision_v0.10.0


In [50]:
# We must create a custom Dataset class for ISIC data
import os
import pandas as pd
from torchvision.io import read_image
from torch.utils.data import Dataset

class ISIC(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 1],'.jpg')
        image = read_image(img_path)
        label = self.img_labels.iloc[idx, 2]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

In [42]:
# img_dir = file_dict["train_dir"]
# img_labels = pd.read_csv(file_dict["train_labels"])
# os.path.join(img_dir, img_labels.iloc[0, 1],'.jpg')

'/projectnb/cs640grp/projects/ao/Agent-O/data/isic-2024-challenge/train-image/ISIC_0015670/.jpg'

In [51]:
def data_loader(data_dir,
                    batch_size,
                    random_seed=42,
                    valid_size=0.1,
                    shuffle=True,
                    test=False):

        normalize = transforms.Normalize(
            mean=[0.4914, 0.4822, 0.4465],
            std=[0.2023, 0.1994, 0.2010],
        )

        # define transforms
        transform = transforms.Compose([
                transforms.Resize((224,224)),
                transforms.Grayscale(num_output_channels=3),
                transforms.ToTensor(),
                normalize,
        ])

        if test:
            dataset = ISIC(file_dict["train_ladbels"], 
                           file_dict["train_imdage"], 
                           transform=transform, 
                           target_transform=None)
        
            data_loader = torch.utils.data.DataLoader(
                dataset, batch_size=batch_size, shuffle=shuffle
            )

            return data_loader

        # load the dataset
        train_dataset = ISIC(file_dict["train_labels"], 
                           file_dict["train_image"], 
                           transform=transform, 
                           target_transform=None)

        valid_dataset = ISIC(file_dict["train_labels"], 
                           file_dict["train_image"], 
                           transform=transform, 
                           target_transform=None)

        num_train = len(train_dataset)
        indices = list(range(num_train))
        split = int(np.floor(valid_size * num_train))

        if shuffle:
            np.random.seed(42)
            np.random.shuffle(indices)

        train_idx, valid_idx = indices[split:], indices[:split]
        train_sampler = SubsetRandomSampler(train_idx)
        valid_sampler = SubsetRandomSampler(valid_idx)

        train_loader = torch.utils.data.DataLoader(
            train_dataset, batch_size=batch_size, sampler=train_sampler)

        valid_loader = torch.utils.data.DataLoader(
            valid_dataset, batch_size=batch_size, sampler=valid_sampler)

        return (train_loader, valid_loader)
train_loader, valid_loader = data_loader(data_dir='../data/',
                                     batch_size=64)

# test_loader = data_loader(data_dir='../data/',
#                           batch_size=64,
#                           test=True)

SyntaxError: unmatched ')' (3970677467.py, line 23)

In [47]:
class ResNet2(nn.Module):
        def __init__(self, block, layers, num_classes = 10):
            super(ResNet2, self).__init__()
            self.inplanes = 64
            self.conv1 = nn.Sequential(
                            nn.Conv2d(3, 64, kernel_size = 7, stride = 2, padding = 3),
                            nn.BatchNorm2d(64),
                            nn.ReLU())
            self.maxpool = nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)
            self.layer0 = self._make_layer(block, 64, layers[0], stride = 1)
            self.layer1 = self._make_layer(block, 128, layers[1], stride = 2)
            self.layer2 = self._make_layer(block, 256, layers[2], stride = 2)
            self.layer3 = self._make_layer(block, 512, layers[3], stride = 2)
            self.avgpool = nn.AvgPool2d(7, stride=1)
            self.fc = nn.Linear(512, num_classes)

        def _make_layer(self, block, planes, blocks, stride=1):
            downsample = None
            if stride != 1 or self.inplanes != planes:

                downsample = nn.Sequential(
                    nn.Conv2d(self.inplanes, planes, kernel_size=1, stride=stride),
                    nn.BatchNorm2d(planes),
                )
            layers = []
            layers.append(block(self.inplanes, planes, stride, downsample))
            self.inplanes = planes
            for i in range(1, blocks):
                layers.append(block(self.inplanes, planes))

            return nn.Sequential(*layers)

        def forward(self, x):
            x = self.conv1(x)
            x = self.maxpool(x)
            x = self.layer0(x)
            x = self.layer1(x)
            x = self.layer2(x)
            x = self.layer3(x)

            x = self.avgpool(x)
            x = x.view(x.size(0), -1)
            x = self.fc(x)

            return x

class ResidualBlock(nn.Module):
        expansion = 1
        def __init__(self, in_channels, out_channels, stride = 1, downsample = None):
            super(ResidualBlock, self).__init__()
            
            self.conv1 = nn.Sequential(
                            nn.Conv2d(in_channels, out_channels, kernel_size = 3, stride = stride, padding = 1),
                            nn.BatchNorm2d(out_channels),
                            nn.ReLU())
            self.conv2 = nn.Sequential(
                            nn.Conv2d(out_channels, out_channels, kernel_size = 3, stride = 1, padding = 1),
                            nn.BatchNorm2d(out_channels))
            self.downsample = downsample
            self.relu = nn.ReLU()
            self.out_channels = out_channels

        def forward(self, x):
            residual = x
            out = self.conv1(x)
            out = self.conv2(out)
            if self.downsample:
                residual = self.downsample(x)
            out += residual
            out = self.relu(out)
            return out



In [None]:

num_classes = 10
num_epochs = 20
batch_size = 16
learning_rate = 0.01

model = ResNet2(ResidualBlock, [3, 4, 6, 3]).to(device)

#Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.001, momentum = 0.9)  

#Train the model
total_step = len(train_loader)

import gc
total_step = len(train_loader)

for epoch in range(num_epochs):
    print(f"Epoch : {epoch} " ) 
    for i, (images, labels) in enumerate(train_loader):  
        
        #Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)
        
        #Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        #Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        del images, labels, outputs
        torch.cuda.empty_cache()
        gc.collect()

    print ('Epoch [{}/{}], Loss: {:.4f}' .format(epoch+1, num_epochs, loss.item()))

    #Validation
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in valid_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            rocplot(labels, outputs)
        del images, labels, outputs
    
        print('Accuracy of the network on the {} validation images: {} %'.format(5000, 100 * correct / total))

In [10]:
# Useful code for saving and fetching state
# torch.save(model.state_dict(), "../models/rescarnation") # saves model 
# model = ResNet2(ResidualBlock, [3, 4, 6, 3]).to(device)
# model.load_state_dict(torch.load('../models/rescarnation', weights_only=True)) # reload model 
# model.eval()

# Model Performance

In [11]:
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import multilabel_confusion_matrix

def roc_plot(y_true,y_pred):
    # Calculate ROC curve
    
    fpr, tpr, thresholds = roc_curve(y_true, y_pred) 
    roc_auc = auc(fpr, tpr)
    # Plot the ROC curve
    plt.figure()  
    plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
    plt.plot([0, 1], [0, 1], 'k--', label='No Skill')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve')
    plt.legend()
    plt.show()

In [None]:
#Validation
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
with torch.no_grad():
    correct = 0
    total = 0
    full_labels = torch.empty(0, dtype=torch.int, device=device)
    full_preds = torch.empty(0, dtype=torch.int, device=device)
    for images, labels in valid_loader:
        images = images.to(device)
        labels = labels.to(device)
        predicts = []
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        #print(predicted)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        full_labels = torch.cat((full_labels, predicted), dim=0)
        full_preds = torch.cat((full_preds, labels), dim=0)
        #print(outputs)
        #mlcm_plot(labels, outputs)
    del images, labels, outputs
#roc_plot(full_labels, full_preds)