In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import torch
import pandas as pd
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
import cv2 as cv
import os


import torchvision.models as models
resnet = models.resnet18(pretrained=True)
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


# Any results you write to the current directory are saved as output.

In [None]:
device = torch.device('cuda:0')
root_dir = "/kaggle/input/skin-cancer-mnist-ham10000/"
print(torch.cuda.is_available())
print( torch.cuda.get_device_name())
resnet =  resnet.to(device)
torch.cuda.empty_cache()

In [None]:
skin_cancer_cells = pd.read_csv(root_dir + "/HAM10000_metadata.csv")
print(skin_cancer_cells)
all_ages = list(skin_cancer_cells.age)
is_valid = lambda x: not(np.isnan(x) or not x)
all_ages = list(filter(is_valid, all_ages))
avg = int(sum(all_ages)/len(all_ages))


In [None]:
dx_dictionary = {
    "bcc": "basal cell carcinoma",
    "akiec":"Actinic keratoses and intraepithelial carcinoma / Bowen's disease",
    "bkl": "benign keratosis-like lesions",
    "df":"dermatofibroma",
    "mel":"melanoma",
    "nv":"melanocytic nevi",
    "vasc":"vascular lesions"
}

skin_cancer_cells["dx"].value_counts().plot(kind='bar')
weight = torch.cuda.FloatTensor([0.5, 0.3, 1, 0.1, 1, 7, 0.1])

In [None]:
skin_cancer_cells = skin_cancer_cells[["image_id", "dx"]]
print(len(skin_cancer_cells))

In [None]:
class SkinCancerDataset(Dataset):
    def __init__(self, dataframe, root_directory):
        self.df = dataframe
        self.dir = root_directory
        
    def get_index(self, label):
        labels = ["bcc","akiec","bkl","df","mel","nv","vasc"]
        return labels.index(label)
    
    def get_image(self, filename):
        directories = os.listdir(self.dir)
        directory = None
        for i in directories:
            if "." not in i and "{}.jpg".format(filename) in os.listdir("{}/{}".format(self.dir, i)):
                directory = i
        img = cv.resize(cv.cvtColor(cv.imread("{}/{}/{}.jpg".format(self.dir, directory, filename)), cv.COLOR_BGR2RGB)
                                , (176, 176))
        return self.normalize(self.compute_gradients(img))
    
    def compute_gradients(self, img):
        kernel = np.ones((5,5),np.uint8)
        img = cv.morphologyEx(img, cv.MORPH_GRADIENT, kernel)
        return img
    
    def normalize(self, img):
        return cv.normalize(img, None, alpha=0, beta=1, norm_type=cv.NORM_MINMAX, dtype=cv.CV_32F)
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        image = self.df.iloc[index]
        return torch.FloatTensor(self.get_image(image['image_id'])).to(device), self.get_index(image['dx'])





In [None]:
dataset = SkinCancerDataset(skin_cancer_cells, root_dir)
batch_size = 16
validation_split = .2
shuffle_dataset = True
random_seed= 42

# Creating data indices for training and validation splits:
dataset_size = len(dataset)
indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))
if shuffle_dataset :
    np.random.seed(random_seed)
    np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]

# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)

train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, 
                                           sampler=train_sampler)
validation_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                                sampler=valid_sampler)

In [None]:
class CancerNet(torch.nn.Module):
    
    
    def __init__(self):
        super(CancerNet, self).__init__()
        self.fc1 = torch.nn.Linear(1000, 400)
        self.fc2 = torch.nn.Linear(400, 140)
        self.act = torch.nn.ReLU()
        self.fc3 = torch.nn.Linear(140, 7)
        
    
    def forward(self, x):
        x = resnet(x.cuda())
        x = self.act(self.fc1(x))
        x = self.act(self.fc2(x))
        x = self.act(self.fc3(x))
        return x

In [None]:
print(torch.cuda.memory_allocated())
net = CancerNet().to(device)

In [None]:
criterion = torch.nn.CrossEntropyLoss(weight=weight)
optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
losses = []
accuracies = []

In [None]:
for j in range(100):
    whole_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inp, labels = data
        optimizer.zero_grad()
        inp = inp.transpose(1, 3).transpose(2, 3)
        fws = net.forward(inp)
        loss = criterion(fws, labels.to(device))
        loss.backward()
        optimizer.step()
        whole_loss += loss.item()
    losses.append(whole_loss)
    print("Loss: ", whole_loss)
    total  = 0
    correct = 0
    for i, data in enumerate(validation_loader, 0):
        inp, labels = data
        inp = inp.transpose(1, 3).transpose(2, 3)
        outputs = net.forward(inp)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted.to(device) == labels.to(device)).sum().item()
    print("Accuracy: ", correct/(total))
    accuracies.append(correct/(total))
    torch.cuda.empty_cache()
