In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
 #   for filename in filenames:
  #      print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Load data

In [None]:
train_csv = "/kaggle/input/siim-isic-melanoma-classification/train.csv"
image_dir = "/kaggle/input/siim-isic-melanoma-classification/jpeg/train"
train_meta = pd.read_csv(train_csv)
test_csv = "/kaggle/input/siim-isic-melanoma-classification/test.csv"

In [None]:
#print(train_meta)

In [None]:
import torch

#Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Data Loading

In [None]:
import cv2
import tensorflow as tf

from torch.utils.data import DataLoader, Dataset #Create an efficient dataloader set to feed images to the model
from torch.utils.data.sampler import SequentialSampler
from torchvision import transforms

import albumentations as A #Package of transformations
from albumentations.pytorch.transforms import ToTensorV2

def get_train_transform():
    return A.Compose([
        ToTensorV2(p=1.0)
    ])

def get_valid_transforms():
    return A.Compose([
            A.Resize(height=512, width=512, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.0)

class TrainData(Dataset):

    def __init__(self, dataframe, image_dir, labels, transforms):
        super().__init__()
        
        self.df = dataframe
        self.image_ids = dataframe['image_name'].unique()
        self.image_dir = image_dir
        self.labels = labels
        self.transforms = transforms

    def __getitem__(self, idx: int):
        image_id = self.image_ids[idx]
        image = cv2.imread(f'{self.image_dir}/{image_id}.jpg', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image = image.astype(np.float32)/ 255
        
        target = torch.tensor(int(self.df.iloc[idx, 7]))
        
        if self.transforms:
            sample = {'image': image}
            sample = self.transforms(**sample)
            image = sample['image']
            
        return image, target
    
    def __len__(self) -> int:
        return self.image_ids.shape[0]


In [None]:
from sklearn.model_selection import train_test_split

#Split the training data into train and validate sets
train_meta, valid_meta = train_test_split(train_meta,test_size=0.2)


In [None]:
print(train_meta.shape)
print(valid_meta.shape)

In [None]:
train_dataset = TrainData(train_meta, image_dir, labels = train_meta["target"].values, transforms = get_valid_transforms())
valid_dataset = TrainData(valid_meta, image_dir, labels = valid_meta["target"].values, transforms = get_valid_transforms())

In [None]:
train_loader = DataLoader(train_dataset, batch_size = 64, shuffle = True, num_workers = 0)
valid_loader = DataLoader(valid_dataset, batch_size = 64, shuffle = False, num_workers = 0)

# Load Model

In [None]:
from torch import nn
from torch.nn import functional as F

!pip install efficientnet_pytorch
from efficientnet_pytorch import EfficientNet
model = EfficientNet.from_pretrained('efficientnet-b4', num_classes = 2) 
model

# Train Model

In [None]:
# Freeze pretrained model parameters to avoid backpropogating through them
for parameter in model.parameters():
    parameter.requires_grad = True

    
"""from collections import OrderedDict

# Build custom classifier
classifier = nn.Sequential(OrderedDict([('fc1', nn.Linear(2048, 256)),
                                        ('relu', nn.ReLU()),
                                        ('drop', nn.Dropout(p=0.5)),
                                        ('fc2', nn.Linear(256, 2)),
                                        ('output', nn.LogSoftmax(dim=1))]))

model.fc = classifier
model.fc

for parameter in model.fc.parameters():
    parameter.requires_grad = True
"""
model.to(device)
#model.fc.to(device)

In [None]:
# Function for the validation pass
def validation(model, valid_loader, criterion, device):
    
    val_loss = 0
    accuracy = 0
    
    for images, labels in iter(valid_loader):
        images, labels = images.cuda(), labels.cuda()

        output = model.forward(images)
        print("Output Valid")
        val_loss += criterion(output, labels).item()
        print("val_loss")

        probabilities = torch.exp(output)
        print("probabilities")
        
        equality = (labels.data == probabilities.max(dim=1)[1])
        print("equality")
        accuracy += equality.type(torch.FloatTensor).mean()
    
    return val_loss, accuracy

In [None]:
# Train the classifier
def train_classifier(model, optimizer, criterion,train_loader, valid_loader, epochs):

    steps = 0
    print_every = 50

    for e in range(epochs):

        model.train()

        running_loss = 0

        for images, labels in iter(train_loader):            
            images, labels = images.cuda(), labels.cuda()
    
            steps += 1
            print("Steps: " + str(steps))

            optimizer.zero_grad()
        
            output = model.forward(images)
            #print("Output " + str(output))
            loss = criterion(output, labels)
            #print("Loss: " + str(loss))
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            print("Running loss: " + str(running_loss))
            
            if steps % print_every == 0:

                model.eval()

                # Turn off gradients for validation, saves memory and computations
                with torch.no_grad():
                    validation_loss, accuracy = validation(model, valid_loader, criterion, device)

                print("Epoch: {}/{}.. ".format(e+1, epochs),
                      "Training Loss: {:.3f}.. ".format(running_loss/print_every),
                      "Validation Loss: {:.3f}.. ".format(validation_loss/len(valid_loader)),
                      "Validation Accuracy: {:.3f}".format(accuracy/len(valid_loader)))

                running_loss = 0
                model.train()
                
    model_path = "/kaggle/working/model.pth"
    torch.save(model, model_path)
                
    


In [None]:
from torch import optim

#Loss function
criterion = nn.CrossEntropyLoss()

# Gradient descent optimizer
optimizer = optim.Adam(model.parameters(), lr=0.0001)
    
train_classifier(model, optimizer, criterion, train_loader, valid_loader, epochs = 1)

