In [27]:
import torch
import random
import os
import numpy as np
import torch.nn as nn
import pandas as pd
import math
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split
# import torch_xla.core.xla_model as xm
import torch.nn.functional as F
import matplotlib.pyplot as plt
import seaborn as sn
from torch.utils.data import DataLoader
from PIL import Image
import timm
import torchvision.transforms as transforms

In [3]:
!nvidia-smi

Fri Jun 25 10:56:37 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.119.04   Driver Version: 450.119.04   CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   36C    P0    26W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(42)

### Data Preprocessing

In [5]:
IMG_SIZE = 224
BATCH_SIZE = 64

#### Load CSV

In [6]:
data_df = pd.read_csv('../input/medical-masks-part1/df.csv')

In [7]:
data_df.head()

Unnamed: 0,ID,TYPE,USER_ID,GENDER,AGE,name,size_mb
0,1,1,1,MALE,25,000001_1_000001_MALE_25.jpg,1.801298
1,1,2,1,MALE,25,000001_2_000001_MALE_25.jpg,1.738558
2,1,3,1,MALE,25,000001_3_000001_MALE_25.jpg,1.799667
3,1,4,1,MALE,25,000001_4_000001_MALE_25.jpg,1.740264
4,2,1,2,MALE,23,000002_1_000002_MALE_23.jpg,1.549695


In [8]:
# Number of labels
data_df['TYPE'] = data_df['TYPE'].values - 1

### remove data that unable to be open

In [9]:
error_file = list()

In [10]:
for idx, name in tqdm(enumerate(data_df['name'].values)):
    try:
        file_name = "../input/medical-masks-part1/images/" + name
        image = Image.open(file_name)
    except:
        error_file.append(idx)
        print(name)

0it [00:00, ?it/s]

000030_1_000030_NONE_29.jpg
000162_3_000162_NONE_18.jpg
007790_1_005591_NONE_27.jpg


In [11]:
data_df = data_df.drop(error_file)

In [12]:
len(data_df)

39997

#### Dataset Class

In [13]:
class MaskDataset():
    
    def __init__(self, image_paths, labels, transforms=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transforms = transforms
        
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        file_name = "../input/medical-masks-part1/images/" + self.image_paths[idx]
        image = Image.open(file_name)
        
        if self.transforms is not None:
            image = self.transforms(image)
        
        return image, self.labels[idx]

### Transforms

In [14]:
transforms_train = transforms.Compose(
    [
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.RandomVerticalFlip(p=0.3),
        transforms.RandomResizedCrop(IMG_SIZE),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ]
)

transforms_test = transforms.Compose(
    [
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ]
)

#### Train Validation Test

In [15]:
train_val_image, test_image, train_val_label, test_label = train_test_split(data_df.name.values, data_df.TYPE.values, test_size=0.1, stratify=data_df.TYPE.values)

In [16]:
train_image, validation_image, train_label, validation_label = train_test_split(train_val_image, train_val_label, test_size=(1/9), stratify=train_val_label)

In [17]:
len(train_image), len(validation_image), len(test_image)

(31997, 4000, 4000)

In [18]:
train_dataset = MaskDataset(train_image, train_label, transforms_train)

In [19]:
validation_dataset = MaskDataset(validation_image, validation_label, transforms_test)

In [20]:
test_dataset = MaskDataset(test_image, test_label, transforms_test)

In [21]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

### Training Loop

In [22]:
def train(model, train_loader, optimizer, criterion, device, scheduler=None):
    total_correct = 0.0
    total_loss = 0.0
    
    model.train()
    for idx, (inputs, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        inputs, labels = inputs.to(device), labels.to(device)
        # forward
        output = model(inputs)
        # calculate loss
        loss = criterion(output, labels)
        loss.backward()
        
        total_correct += (output.argmax(dim=1) == labels).sum()
        total_loss += loss.item()
        
        optimizer.step()
        if idx % 10 == 0:
            print(f"Batch Number {idx+1}: Loss {loss.item()} Accuracy {(output.argmax(dim=1) == labels).float().mean()}")
        if scheduler is not None:
            scheduler.step()
    return total_correct/len(train_loader.datasets), total_loss/len(train_loader)

### Evaluation Loop

In [23]:
def evaluate(model, test_loader, criterion, device):
    total_correct = 0.0
    total_loss = 0.0
    
    model.eval()
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        # forward
        output = model(inputs)
        # calculate loss
        loss = criterion(output, labels)
        total_correct += (output.argmax(dim=1) == labels).sum()
        total_loss += loss.item()
        print(loss.item())
        
    return total_correct/len(test_loader.datasets), total_loss/len(test_loader)

### Training Preparation

In [28]:
model = torch.hub.load('facebookresearch/deit:main', 'deit_base_patch16_224', pretrained=True)
### number of unique classes == 4
model.head = nn.Linear(768, 4)

Using cache found in /root/.cache/torch/hub/facebookresearch_deit_main
Downloading: "https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth" to /root/.cache/torch/hub/checkpoints/deit_base_patch16_224-b5f2ef4d.pth


  0%|          | 0.00/330M [00:00<?, ?B/s]

In [29]:
LR = 2e-05
EPOCHS = 2

In [30]:
!nvidia-smi

Fri Jun 25 11:07:50 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.119.04   Driver Version: 450.119.04   CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   36C    P0    26W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [31]:
optimizer = torch.optim.AdamW(model.parameters(), lr=LR)
criterion = nn.CrossEntropyLoss()
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.995)
device = "cuda"

In [32]:
model.to(device)

VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=768, out_features=2304, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=768, out_features=768, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (drop_path): Identity()
      (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=768, out_features=3072, bias=True)
        (act): GELU()
        (fc2): Linear(in_features=3072, out_features=768, bias=True)
        (drop): Dropout(p=0.0, inplace=False)
      )
    )
    (1): Block(
      (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (attn

In [33]:
evaluate(model, train_loader, criterion, device)

1.4000910520553589


RuntimeError: CUDA out of memory. Tried to allocate 148.00 MiB (GPU 0; 15.90 GiB total capacity; 14.73 GiB already allocated; 131.75 MiB free; 14.97 GiB reserved in total by PyTorch)

In [None]:
def run(model, train_loader, validation_loader, optimizer, criterion, device, scheduler=None):
    for epoch in tqdm(range(EPOCHS)):
        print("==================================================")
        print(f"EPOCH {epoch + 1}")
        train_accuracy, train_loss = train(model, train_loader, optimizer, criterion, device, scheduler=None)
        print(f"[TRAIN] EPOCH {epoch + 1} - LOSS: {train_loss}, ACCURACY: {train_accuracy}")
        validation_accuracy, validation_loss = evaluate(model, validation_loader, criterion, device)
        print(f"[VALIDATE] EPOCH {epoch + 1} - LOSS: {validation_loss}, ACCURACY: {validation_accuracy}")
        print("==================================================")