# ATML Project Report

### Group 2
Members: Léonard Barras & Nathan Gyger

Github: https://github.com/svapili/FS2019_ATML_Group2 

# Melanoma image classification
### Goal:
Implement a deep learning algorithm to classify mole pictures as benign or malignant using the [ISIC database](https://isic-archive.com/).

### Approach description:
TODO: write a description
- Class imbalance => data augmentation
- Images => CNN as a logical choice
- Transfer learning

# Results
TODO: add accuracy table for different parameters

## Learning curve
TODO: add graphic

# Example of use

In [None]:
import torch
print("Hello from cluster!")
print("Available GPU: ", torch.cuda.get_device_name(0))    

### Imports and configuration

In [None]:
# Libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision
import torchvision.models as models
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt
import time
import os
import copy
import util
import csv
import glob

In [None]:
# Custom functions
import dataSplitter
import loader
import dataAugmenter
import SimpleNet
import train
import test_

In [None]:
# Paths definitions
Path = '/var/tmp/'
dataDir = Path + 'ISIC-images'
trainDir = Path + 'ISIC-images/train/'
testDir = Path + 'ISIC-images/test/'
valDir = Path + 'ISIC-images/val/'


# Paths definitions for saving results and model state
my_path = os.getcwd()
dir = os.path.dirname(my_path)
results_dir = dir + '/results'
modelstate_dir = '/var/tmp/modelstate'
if not os.path.exists(results_dir):
    os.makedirs(results_dir)
if not os.path.exists(modelstate_dir):
    os.makedirs(modelstate_dir)

In [None]:
# Data pre-processing
newDataSplit = False # Set to true to split the data randomly again. Data have first to be downloaded and extracted with data_extractor.py
dataPreprocessing = False # Set to true to resize and augment the data

In [None]:
# Check if we can use CUDA
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Data pre-processing

In [None]:
# Performs a new random split of the data
# Data have first to be downloaded and extracted with data_extractor.py
if (newDataSplit):
    testRatio = .1
    valRatio = .1
    split(trainDir, testDir, valDir, testRatio, valRatio)

In [None]:
# Preprocessing the data (resizing and augmenting)
if (dataPreprocessing):
    preprocessData([trainDir, testDir, valDir])

### Data loading

In [None]:
batch_size = 32

In [None]:
# Create dataset and dataloaders objects
image_datasets, dataloaders = loader.melanomaDataLoader(dataDir, batch_size=batch_size)

# Get dataset objects sizes
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'test', 'val']}
print("Size of the dataset objects: ", dataset_sizes)

# Get the class names
class_names = image_datasets['train'].classes
print("Images class names: ", class_names)

# Visualize sample images
print("Sample images:")
loader.showSample(dataloaders, dataset_sizes, class_names)

### Network configuration

In [None]:
###############################
# SELECT MODEL
###############################
model = models.AlexNet(num_classes=2)
#model = models.resnet101()

###############################
# SELECT OPTIMIZER
###############################
learning_rate = 0.001
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

###############################
# SELECT LOSS FUNCTION
###############################
loss_fn = nn.CrossEntropyLoss()

n_epochs = 10
debug_training_status = False


saving = True

model = model.to(device)

### First Epoch Training Test

In [None]:
# test train and test function
if debug_training_status is True:
    train_loss, train_accuracy = train.train(model, dataloaders['train'], optimizer, loss_fn, device, status = debug_training_status)
    val_loss, val_accuracy, a, b, c, d  = test_.test(model, dataloaders['val'], loss_fn, device)
    test_loss, test_accuracy, a, b, c, d = test_.test(model, dataloaders['test'], loss_fn, device)
    print('Test training: train_loss: {:.4f}, train_accuracy: {:.4f}, val_loss: {:.4f}, val_accuracy: {:.4f}, test_loss: {:.4f}, test_accuracy: {:.4f}'.format(
        train_loss,
        train_accuracy,
        val_loss,
        val_accuracy,
        test_loss,
        test_accuracy))


### Train and Save Result

In [None]:
    import time
    
    train_losses, train_accuracies = ['train_losses'], ['train_accuracies']
    val_losses, val_accuracies = ['val_losses'], ['val_accuracies']
    time_epoch = ['execution time']
    
    TPs = ['True Positives']
    TNs = ['True Negatives']
    FPs = ['False Positives']
    FNs = ['False Negatives']
    
    config  = model._get_name() + " " + "_bs=" + str(batch_size)
    
    ##############################
    # Training Epochs            #
    ##############################

    for epoch in range(n_epochs):
        start_time_epoch = time.time()
        
        train_loss, train_accuracy = train.train(model, dataloaders['train'], optimizer, loss_fn, device)
        val_loss, val_accuracy, TP, TN, FP, FN  = test_.test(model, dataloaders['val'], loss_fn, device) 
        
        train_losses.append(train_loss)
        train_accuracies.append(train_accuracy)
        val_losses.append(val_loss)
        val_accuracies.append(val_accuracy)
        TPs.append(TP)
        TNs.append(TN)
        FPs.append(FP)
        FNs.append(FN)
        
        # TIME CALCULATION
        time_last_epoch = time.time() - start_time_epoch
        time_epoch.append(time_last_epoch)
        
        # OTHER METRICS
        
        
        print('Epoch {}/{}: train_loss: {:.4f}, train_accuracy: {:.4f}, val_loss: {:.4f}, val_accuracy: {:.4f}, epoch execution time: {:.4f}'.format(
            epoch + 1, n_epochs,
            train_losses[-1],
            train_accuracies[-1],
            val_losses[-1],
            val_accuracies[-1],
            time_epoch[-1]))
        
        print('True Positive: {}, True Negative: {}, False Positives: {}, False Negative: {}'.format(
            TPs[-1],
            TNs[-1],
            FPs[-1],
            FNs[-1]))

    ##############################
    # Saving results             #
    ##############################

        if saving is True: #and (epoch+1) % 5 == 0:
            print('...saving...')
            name = config + '_' + loss_fn.__str__() + '_lr=' + str(learning_rate) + '_' +(optimizer.__str__()).split(' ')[0]

            #remove old results
            for filename in glob.glob(results_dir + '/' + name + '*'):
                os.remove(filename)
            for filename in glob.glob(modelstate_dir + '/' + name + '*'):
                os.remove(filename)

            name = name + '_Epoch_' + str(epoch)

            # save model weights
            torch.save(model.state_dict(), modelstate_dir + '/' + name + '.pth')

            # save results per epoch
            path = results_dir + '/' + name + '.csv'
            with open(path, 'a') as csvFile:
                writer = csv.writer(csvFile)
                writer.writerow(train_losses)
                writer.writerow(train_accuracies)
                writer.writerow(val_losses)
                writer.writerow(val_accuracies)
                writer.writerow(time_epoch)
                writer.writerow(TPs)
                writer.writerow(TNs)
                writer.writerow(FPs)
                writer.writerow(FNs)
            csvFile.close()
