In [1]:
# Torch Imports
import torch
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [2]:
import os
import torch
import pandas as pd
import torchvision
import torchvision.io
from torchvision.io.image import read_image
from torch.utils.data import Dataset
from PIL import Image
import numpy as np
import cv2 as io

class Ham10000(Dataset):
    def __init__(self, csv_file, directory, transform, datasetname):
        self.annotations = pd.read_csv(f"{csv_file}")
        self.img_root_dir = 'dataverse_files'
        self.transform = transform
        self.datasetname = datasetname
        self.csv_file = csv_file
        self.directory = directory
        
    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        img_path = ''
        img_path = os.path.join(self.directory, self.datasetname, self.annotations.iloc[idx, 1])
        img_path += ".jpg"

        x_img = io.imread(img_path)
        if self.transform:
            x_img = self.transform(x_img)

        y_label = torch.tensor(int(self.annotations.iloc[idx, 7]))

        return (x_img, y_label)




In [11]:
# KFold Function
from sklearn.model_selection import KFold
import torch.nn as nn
import copy

# pass in model constructor
def kfold(model, dataset, device, path_name, num_folds=5, num_epochs=10, loss_function=nn.CrossEntropyLoss()):

  results = {}
  kfold = KFold(n_splits=num_folds, shuffle=True)

  print('--------------------------------')
  # K-fold Cross Validation model evaluation
  for fold, (train_ids, test_ids) in enumerate(kfold.split(dataset)):

    # Print
    print(f'FOLD {fold}')
    print('--------------------------------')
    
    # Sample elements randomly from a given list of ids, no replacement.
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
    test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
    
    # Define data loaders for training and testing data in this fold
    trainloader = torch.utils.data.DataLoader(
                      dataset, 
                      batch_size=32, sampler=train_subsampler, num_workers=4)
    testloader = torch.utils.data.DataLoader(
                      dataset,
                      batch_size=32, sampler=test_subsampler, num_workers=4)
    
    # Init the neural network
    network = copy.deepcopy(model)
    network.to(device)
    
    # Initialize optimizer (CHANGE)
    optimizer = torch.optim.Adam(network.parameters(), lr=0.001)
    total_train_loss = []
    fold_train_acc = []
    fold_test_acc = []
    for epoch in range(0, num_epochs):
      print(f'Starting epoch {epoch+1}', '-', num_epochs)
      correct, total = 0, 0
      current_loss = 0.0
      network.train()
      train_loss = 0
      for i, data in enumerate(trainloader, 0):
        inputs, targets = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()
        outputs = network(inputs)
        
        _, predicted = torch.max(outputs.data, 1)
        
        loss = loss_function(outputs, targets)
        loss.backward()
        optimizer.step()
        
        total += targets.size(0)
        correct += (predicted == targets).sum().item()
        
        # Print statistics
        train_loss+= loss.item()
        current_loss += loss.item()
      fold_train_acc.append(100.0 * correct / total)

      network.eval()
    # Evaluation for this fold
      correct, total = 0, 0
      total_labels, total_preds = [],[]
      with torch.no_grad():
        for i, data in enumerate(testloader, 0):
          inputs, targets = data[0].to(device), data[1].to(device)
          outputs = network(inputs)
          _, predicted = torch.max(outputs.data, 1)
          total_labels.extend(targets.data.cpu().detach().numpy())
          total_preds.extend(predicted.cpu().detach().numpy())
          total += targets.size(0)
          correct += (predicted == targets).sum().item()
   
        results[fold] = 100.0 * (correct / total)

      total_train_loss.append(train_loss)
      fold_test_acc.append(100.0 * correct / total)
    torch.save(network, path_name)

    print("fold_train_acc: ", fold_train_acc)
    print("fold_test_acc: ", fold_test_acc)
    print("total_train_loss: ", total_train_loss)
  print(f'K-FOLD CROSS VALIDATION RESULTS FOR {num_folds} FOLDS')
  print('--------------------------------')
  sum = 0.0
  for key, value in results.items():
    print(f'Fold {key}: {value} %')
    sum += value
  print(f'Average: {sum/len(results.items())} %')
  

In [None]:
# Run K-Folds
csv_file = "./ECS171/dataverse_files/HAM10000_metadata.csv"
directory = "./ECS171/dataverse_files"
import torchvision.transforms as transforms

transform = transforms.Compose(
      [
        transforms.ToTensor()
      ]
    )

batch_size = 128
datasetname = "HAM10000_images_off"
dataset = Ham10000(csv_file, directory, transform, datasetname)

resnet18 = models.resnet18(pretrained=False)
num_ftrs = resnet18.fc.in_features

resnet18.fc = nn.Sequential(
    nn.Linear(num_ftrs, 250),
    nn.Linear(250, 7)
)

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
resnet18 = resnet18.to(device)

path = './resnet18_10epoch_noaug.pth'
kfold(resnet18, dataset, device, path)

In [None]:
# Run K-Folds
csv_file = "./ECS171/dataverse_files/HAM10000_metadata.csv"
directory = "./ECS171/dataverse_files"
import torchvision.transforms as transforms

transform = transforms.Compose(
      [
        transforms.ToTensor(),
        transforms.RandomCrop(224),
        transforms.RandomRotation(degrees=(13)),
        transforms.ColorJitter(brightness=.5, hue=.3),
        transforms.Normalize((0.1411, 0.0923, 0.5270), (0.3407, 0.3058, 0.2824))
          
      ]
    )

batch_size = 128
datasetname = "HAM10000_images_off"
dataset = Ham10000(csv_file, directory, transform, datasetname)

resnet18 = models.resnet18(pretrained=False)
num_ftrs = resnet18.fc.in_features

resnet18.fc = nn.Sequential(
    nn.Linear(num_ftrs, 250),
    nn.Linear(250, 7)
)

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
resnet18 = resnet18.to(device)

path = './resnet18_10epoch_aug.pth'
kfold(resnet18, dataset, device, path)