In [3]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset,DataLoader
import torchvision
from torchvision import transforms,models
import torch.optim as optim

from sklearn.model_selection import train_test_split
from sklearn import metrics
from PIL import Image
from PIL import ImageFile
import albumentations

In [4]:
class ClassificationDataset(Dataset):
    def __init__(self ,image_paths, targets, resize=None, augmentations=None):
        self.image_paths = image_paths
        self.targets = targets
        self.resize = resize
        self.augmentations = augmentations

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, item):
        image = Image.open(self.image_paths[item])
        image = image.convert("RGB")
        targets = self.targets[item]

        if self.resize is not None:
            image = image.resize((self.resize[1], self.resize[0]),
                                 resample=Image.BILINEAR)

        image = np.array(image)

        if self.augmentations is not None:
            augmented = self.augmentations(image=image)
            image = augmented['image']

        # torch expects CxHxW instead of HxWxC
        image = np.transpose(image, (2,0,1)).astype(np.float32)

        image = torch.tensor(image, dtype=torch.float)
        targets = torch.tensor(targets, dtype=torch.long)

        return (image, targets)


In [5]:
root_dir = "../input/jpeg-melanoma-256x256/"
train_csv = pd.read_csv(root_dir + 'train.csv')

def get_train_val_split(df):
    #Removing Duplicates
    df = df[df.tfrecord != -1].reset_index(drop=True)
    train_tf_records = list(range(len(df.tfrecord.unique())))[:12]
    split_cond = df.tfrecord.apply(lambda x: x in train_tf_records)
    train_df = df[split_cond].reset_index()
    valid_df = df[~split_cond].reset_index()
    return train_df,valid_df

train_files, test_files = get_train_val_split(train_csv)

In [25]:
train_image_ids = train_files.image_name.values.tolist()
train_images = [os.path.join(root_dir + 'train/' ,i +'.jpg') for i in train_image_ids]
train_targets = train_files.target.values

test_image_ids = test_files.image_name.values.tolist()
test_images = [os.path.join(root_dir + 'train/' ,i +'.jpg') for i in test_image_ids]
test_targets = test_files.target.values

In [28]:
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
# adding a simple augmentation
aug = albumentations.Compose([
    albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True)
])


In [34]:
train_dataset = ClassificationDataset(image_paths=train_images, targets=train_targets, resize=(224,224), augmentations=aug)
test_dataset = ClassificationDataset(image_paths=test_images, targets= test_targets, resize=(224,224), augmentations=aug)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

In [37]:
for images,targets in test_loader:
    print(images.shape)

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f2ff812be60>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 962, in __del__
    self._shutdown_workers()
  File "/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 942, in _shutdown_workers
    w.join()
  File "/opt/conda/lib/python3.7/multiprocessing/process.py", line 138, in join
    assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f2ff812be60>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 962, in __del__
    self._shutdown_workers()
  File "/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 942, in _shutdown_workers
    w.join()
  File "/opt/conda/lib/python3.7/multipr

torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f2ff812be60>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 962, in __del__
    self._shutdown_workers()
  File "/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 942, in _shutdown_workers
    w.join()


torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])


  File "/opt/conda/lib/python3.7/multiprocessing/process.py", line 138, in join
    assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process


torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f2ff812be60>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 962, in __del__
    self._shutdown_workers()
  File "/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 942, in _shutdown_workers
    w.join()
  File "/opt/conda/lib/python3.7/multiprocessing/process.py", line 138, in join
    assert self._parent_pid == os.getpid(), 'can only join a child process'


torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])


AssertionError: can only join a child process


torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size

IndexError: Caught IndexError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/worker.py", line 178, in _worker_loop
    data = fetcher.fetch(index)
  File "/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "<ipython-input-4-22cfe3d7d223>", line 14, in __getitem__
    targets = self.targets[item]
IndexError: index 6558 is out of bounds for axis 0 with size 6558


In [40]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [41]:
# defining model
model = models.resnext50_32x4d(pretrained=True)
model.fc = nn.Sequential(
    nn.Linear(2048,1000),
    nn.Dropout(p=0.5),
    nn.Linear(1000,1)
)
model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1

In [42]:
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=5e-4)

In [43]:
def train(data_loader, model, optimizer, device):
    model.train()
    
    for images,targets in data_loader:
        
        images = images.to(device, dtype=torch.float)
        targets = targets.to(device, dtype=torch.float) 
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, targets.view(-1,1))
        loss.backward()
        optimizer.step()
    

In [44]:
def evaluate(data_loader, model, device):
    model.eval()
    
    final_targets =  []
    final_outputs = []
    
    with torch.no_grad():
        for images, targets in data_loader:
            images = images.to(device, dtype=torch.float)
            targets = targets.to(device, dtype=torch.float) 
            
            output = model(images)
            targets = targets.detach().cpu().numpy().tolist()
            output = output.detach().cpu().numpy().tolist()
            
            # extend the original list
            final_targets.extend(targets)
            final_outputs.extend(output)
            
    return final_outputs, final_targets

In [45]:
for epoch in range(5):
    train(train_loader, model, optimizer, device=device)
    predictions, valid_targets = evaluate(test_loader, model, device=device)
    roc_auc = metrics.roc_auc_score(valid_targets, predictions)
    # f1_score = metrics.f1_score(valid_targets,predictions)
    print(f"Epochs={epoch} Valid ROC AUC={roc_auc}")

IndexError: index 8854 is out of bounds for axis 0 with size 8734

In [None]:
 F1 Score={f1_score}