In [None]:
from google.colab import drive
drive.mount('gdrive')

Mounted at gdrive


In [None]:
!unzip -uq gdrive/MyDrive/cs492i_project/Vin_CXR_512.zip -d Vin_CXR_512/
!unzip -uq gdrive/MyDrive/cs492i_project/train.csv.zip train.csv

In [None]:
! pip install tensorboardX tensorboard pyyaml webcolors 
! pip install -U git+https://github.com/albumentations-team/albumentations

Collecting tensorboardX
  Downloading tensorboardX-2.4.1-py2.py3-none-any.whl (124 kB)
[?25l[K     |██▋                             | 10 kB 30.6 MB/s eta 0:00:01[K     |█████▎                          | 20 kB 28.6 MB/s eta 0:00:01[K     |███████▉                        | 30 kB 20.2 MB/s eta 0:00:01[K     |██████████▌                     | 40 kB 16.1 MB/s eta 0:00:01[K     |█████████████▏                  | 51 kB 13.9 MB/s eta 0:00:01[K     |███████████████▊                | 61 kB 12.0 MB/s eta 0:00:01[K     |██████████████████▍             | 71 kB 11.9 MB/s eta 0:00:01[K     |█████████████████████           | 81 kB 12.9 MB/s eta 0:00:01[K     |███████████████████████▋        | 92 kB 11.5 MB/s eta 0:00:01[K     |██████████████████████████▎     | 102 kB 12.1 MB/s eta 0:00:01[K     |█████████████████████████████   | 112 kB 12.1 MB/s eta 0:00:01[K     |███████████████████████████████▌| 122 kB 12.1 MB/s eta 0:00:01[K     |████████████████████████████████| 124 kB 1

In [None]:
import pandas as pd
import numpy as np
import torch
from pathlib import Path 
import os
import matplotlib.pyplot as plt
import json 
import cv2 
import albumentations
from albumentations import *
from albumentations.pytorch import ToTensorV2

import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm

In [None]:
traindata= pd.read_csv('train.csv')
traindata.head(10)

Unnamed: 0,image_id,class_name,class_id,rad_id,x_min,y_min,x_max,y_max
0,50a418190bc3fb1ef1633bf9678929b3,No finding,14,R11,,,,
1,21a10246a5ec7af151081d0cd6d65dc9,No finding,14,R7,,,,
2,9a5094b2563a1ef3ff50dc5c7ff71345,Cardiomegaly,3,R10,691.0,1375.0,1653.0,1831.0
3,051132a778e61a86eb147c7c6f564dfe,Aortic enlargement,0,R10,1264.0,743.0,1611.0,1019.0
4,063319de25ce7edb9b1c6b8881290140,No finding,14,R10,,,,
5,1c32170b4af4ce1a3030eb8167753b06,Pleural thickening,11,R9,627.0,357.0,947.0,433.0
6,0c7a38f293d5f5e4846aa4ca6db4daf1,ILD,5,R17,1347.0,245.0,2188.0,2169.0
7,47ed17dcb2cbeec15182ed335a8b5a9e,Nodule/Mass,8,R9,557.0,2352.0,675.0,2484.0
8,d3637a1935a905b3c326af31389cb846,Aortic enlargement,0,R10,1329.0,743.0,1521.0,958.0
9,afb6230703512afc370f236e8fe98806,Pulmonary fibrosis,13,R9,1857.0,1607.0,2126.0,2036.0


In [None]:
obj_list = ["Aortic_enlargement",
            "Atelectasis",
            "Calcification",
            "Cardiomegaly",
            "Consolidation",
            "ILD",
            "Infiltration",
            "Lung_Opacity",
            "Nodule/Mass",
            "Other_lesion",
            "Pleural_effusion",
            "Pleural_thickening",
            "Pneumothorax",
            "Pulmonary_fibrosis"]
len(obj_list)

14

In [None]:
pic_id = os.listdir('./Vin_CXR_512/Vin_CXR_512/train')[0][:-4]
smalldata = traindata[traindata['image_id'] == pic_id]
lol = set(smalldata['class_id'])
result = np.zeros(14)
for i in lol:
  result[i] = 1
lol.discard(14)
lol

{0, 3, 7}

In [None]:
data_transform= {
        'train': Compose([
            Resize(512,512),
            HorizontalFlip(),
            RandomResizedCrop(512,512,p=0.5),
            ShiftScaleRotate(rotate_limit=10),  
            RandomBrightnessContrast(),
            Normalize(max_pixel_value=1),
            ToTensorV2()
        ]),
        'val': Compose([
            Resize(512,512),
            Normalize(max_pixel_value=1),
            ToTensorV2()
        ]),
    }

In [None]:
class CXR_DataLoader(torch.utils.data.Dataset):
  def __init__(self, mode, transform=data_transform):
    super(CXR_DataLoader, self).__init__()
    self.mode = mode
    self.transform = transform[self.mode]

  def __len__(self):
    return len(os.listdir('./Vin_CXR_512/Vin_CXR_512/' + self.mode))

  def __getitem__(self, index):

    indexlist = os.listdir('./Vin_CXR_512/Vin_CXR_512/' + self.mode)
    pic_id = indexlist[index][:-4]               # Removing 4 characters relating to .png

    X = plt.imread('./Vin_CXR_512/Vin_CXR_512/' + self.mode + '/' + indexlist[index])
    X = self.transform(image=X)['image']

    smalldata = traindata[traindata['image_id'] == pic_id]
    lol = set(smalldata['class_id'])
    result = np.zeros(14)

    for i in lol:
      if i != 14:
        result[i] = 1
    y = (pic_id, result)

    return X, y

In [None]:
train_dataloader = torch.utils.data.DataLoader(CXR_DataLoader(mode='train'), batch_size=8,shuffle=True, num_workers=2)
test_dataloader = torch.utils.data.DataLoader(CXR_DataLoader(mode='val'), batch_size=8,shuffle=False, num_workers=2)

In [None]:
! pip install efficientnet_pytorch



In [None]:
from efficientnet_pytorch import EfficientNet
model = EfficientNet.from_pretrained('efficientnet-b0', num_classes=14)
model.parameters()

ModuleNotFoundError: ignored

In [None]:
device='cuda'
def train_model(model, epoches, optimizer):
  # model = Model()
  loss_function = nn.BCEWithLogitsLoss()
  model.to(device)
  for epoch in range(epoches):
    # Train Models
    model.train()
    for batch_id, (X, y) in tqdm(enumerate(train_dataloader)):
      pic_id, labels = y
      X = X.to(device=device)
      labels = labels.to(device=device)

      #Feed Forward
      output = model(X)

      #Loss Calculate
      loss = loss_function(output, labels)

      #Back propagate
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

    # Evaluate models
    model.eval()
    with torch.no_grad():
      test_loss = 0
      test_accuracy = 0
      test_num_data = 0
      tp = torch.zeros(14).to(device)
      fp = torch.zeros(14).to(device)
      fn = torch.zeros(14).to(device)

      for batch_idx, (X, y) in tqdm(enumerate(test_dataloader)):
        pic_id, labels = y
        X = X.to(device=device)
        labels = labels.to(device=device)
        output = model(X)

        loss = loss_function(output, labels)
        test_loss += loss.item()*X.shape[0]

        # Calculate Accuracy
        lol2 = torch.zeros(size=output.shape).to(device)
        lol2[output > 0.5] = 1
        accuracy = torch.sum(lol2 == labels)/(14*X.shape[0])
        test_accuracy += accuracy.item()*X.shape[0]

        # Calculate Mean F1

        tp += torch.sum((lol2 == labels) & (labels == 1), dim=0)
        fp += torch.sum((lol2 != labels) & (labels == 1), dim=0)
        fn += torch.sum((lol2 != labels) & (labels == 0), dim=0)

        test_num_data +=X.shape[0]
      
      tp /= test_num_data
      fp /= test_num_data
      fn /= test_num_data
      test_loss /= test_num_data
      test_accuracy /= test_num_data
      f1 = tp/(tp + 0.5*(fn+fp))
      mean_f1 = torch.mean(f1)
  print(test_accuracy)
  print(mean_f1)

In [None]:
import torchvision
model = torchvision.models.resnet50(pretrained= True, progress= True)
model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
model_ft.fc = nn.Linear(num_ftrs, 2)

model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

In [None]:
optimizer = torch.optim.Adam(model.parameters())
train_model(model, 100, optimizer)

0it [00:00, ?it/s]


ValueError: ignored

In [None]:
PATH = './gdrive/MyDrive/cs492i_project/efficientnet'
torch.save(model.state_dict(), PATH)

In [None]:
from efficientnet_pytorch import EfficientNet
efficientnet = torch.load('./gdrive/MyDrive/cs492i_project/efficientnet')

model = EfficientNet.from_pretrained('efficientnet-b0', num_classes=14)
model.load_state_dict(efficientnet)

Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b0-355c32eb.pth


  0%|          | 0.00/20.4M [00:00<?, ?B/s]

Loaded pretrained weights for efficientnet-b0


<All keys matched successfully>

In [None]:
model = model.cuda()
model.eval()
loss_function = nn.BCEWithLogitsLoss()
with torch.no_grad():
  test_loss = 0
  test_accuracy = 0
  test_num_data = 0
  tp = torch.zeros(14).to(device)
  fp = torch.zeros(14).to(device)
  fn = torch.zeros(14).to(device)

  for batch_idx, (X, y) in tqdm(enumerate(test_dataloader)):
    pic_id, labels = y
    X = X.to(device=device)
    labels = labels.to(device=device)
    output = model(X)

    loss = loss_function(output, labels)
    test_loss += loss.item()*X.shape[0]

    # Calculate Accuracy
    lol2 = torch.zeros(size=output.shape).to(device)
    lol2[output > 0.5] = 1
    accuracy = torch.sum(lol2 == labels)/(14*X.shape[0])
    test_accuracy += accuracy.item()*X.shape[0]

    # Calculate Mean F1

    tp += torch.sum((lol2 == labels) & (labels == 1), dim=0)
    fp += torch.sum((lol2 != labels) & (labels == 1), dim=0)
    fn += torch.sum((lol2 != labels) & (labels == 0), dim=0)

    test_num_data +=X.shape[0]
  
  tp /= test_num_data
  fp /= test_num_data
  fn /= test_num_data
  test_loss /= test_num_data
  test_accuracy /= test_num_data
  f1 = tp/(tp + 0.5*(fn+fp))
  mean_f1 = torch.mean(f1)
print(test_accuracy)
print(f1)

110it [00:19,  5.72it/s]

0.8552738927194554
tensor(0.5424, device='cuda:0')





In [None]:
f1

tensor([0.8828, 0.3768, 0.4054, 0.8571, 0.4762, 0.3577, 0.4419, 0.5953, 0.4759,
        0.3050, 0.7050, 0.5853, 0.4118, 0.7179], device='cuda:0')