In [1]:
import numpy as np
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torchvision.transforms import Normalize, Resize, ToTensor
from torch.utils.tensorboard import SummaryWriter

In [2]:
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from PIL import Image
import os
import time
import datetime
import pandas as pd


In [3]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3,4,5"

In [4]:
checkpoint = 'checkpoint'

In [5]:
log = "xray-resnet4"

os.path.join(os.getcwd(), 'logs', log)

'/lusnlsas/ramkik_data/covid19/covid/logs/xray-resnet4'

In [6]:

tf_writer = SummaryWriter(os.path.join(os.getcwd(), 'logs', log))


In [7]:
def check_rootfolders():
    """Create log and model folder"""
    folders_util = [checkpoint]
    for folder in folders_util:
        if not os.path.exists(folder):
            print('creating folder ' + folder)
            os.mkdir(folder)

check_rootfolders()

In [8]:
def save_checkpoint(epoch , state):
    filename = '%s/ckpt-%s.pth.tar' % (checkpoint, epoch)
    torch.save(state, filename)

In [9]:
model = models.resnet50(pretrained=True)
#model.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

In [10]:
model.fc = nn.Sequential(nn.Linear(2048, 512),
                                 nn.ReLU(),
                                 nn.Dropout(0.2),
                                 nn.Linear(512, 3),
                                 nn.Softmax(dim=1))

In [11]:
#model.to(device)
model = torch.nn.DataParallel(model).cuda()


criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.003)

In [12]:
df = pd.read_csv(os.path.join(os.getcwd(), 'data', 'train.csv'))
df

Unnamed: 0.1,Unnamed: 0,filename,label
0,1038,0c2e9b99-9a8f-4b44-854e-acd181a0208c.jpg,0
1,2922,34fdff09-5bc2-4df5-b8cf-3c37662037c8.jpg,2
2,1174,0ebc8268-df3d-45d8-8ee7-b34880c62830.jpg,2
3,341,06f1d0a2-d8c5-4229-9944-59da85c96b81.jpg,0
4,280,06951c33-b247-4daf-a087-cc082f83238b.jpg,0
...,...,...,...
940,1200,0f8c91da-7e03-480e-8760-1604b1d53c97.jpg,0
941,1151,0dbb83c1-2214-4152-ac69-d1e7e25453cb.jpg,0
942,499,081e308c-0134-4ba3-b745-f632e37a83a1.jpg,0
943,663,095d6b7c-fa53-4f06-90b9-5c5f76038f04.jpg,2


In [13]:
df.iloc[0].filename

'0c2e9b99-9a8f-4b44-854e-acd181a0208c.jpg'

In [14]:
len(df.index)

945

In [15]:
df.shape[0]

945

In [16]:
class XrayDataset(Dataset):
    
    def __init__(self, base, folder,  csv_path, transform=None):
        
        self.base = base
        self.folder = folder
        self.df = pd.read_csv(os.path.join(os.getcwd(), base, csv_path))
        self.transform = transform
        
    
    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, idx):
        
        filepath = self.df.iloc[idx].filename
        filepath = os.path.join(os.getcwd(), self.base, self.folder,  filepath)
        class_id = self.df.iloc[idx].label
        #print(filepath)
        image =  Image.open(filepath).convert('RGB')

        #image = cv2.resize(image, (224, 224))

        if self.transform:
            image = self.transform(image)
            #print(image.shape)
        #print(image.shape)
        return image, class_id

In [17]:
train_transforms = transforms.Compose([
                                     #transforms.Grayscale(1),
                                      # transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
                                       transforms.RandomHorizontalFlip(p=0.3),
                                        transforms.Resize((224,224)),
                                       transforms.ToTensor(),
                                       transforms.Normalize(mean=[0.5], std=[0.5])
                                       ])
test_transforms = transforms.Compose([
#                                     transforms.Grayscale(1),
                                      transforms.Resize((224,224)),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=[0.5], std=[0.5])
                                      ])

In [18]:
train_dataset = XrayDataset( 'data', 'train', 'train.csv', train_transforms )

test_dataset = XrayDataset('data', 'test', 'test.csv', test_transforms )

image, label = next(iter(train_dataset))

In [19]:
label

0

In [20]:
batch_size = 128

In [21]:
train_dataloader = DataLoader(train_dataset, batch_size=batch_size,  shuffle=True, num_workers=4)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size,  shuffle=False, num_workers=4)

In [22]:
image, label = next(iter(train_dataloader))

In [23]:
label

tensor([0, 1, 1, 2, 1, 0, 2, 2, 0, 2, 0, 2, 0, 0, 0, 2, 2, 0, 2, 0, 0, 2, 2, 2,
        2, 2, 2, 2, 2, 0, 0, 2, 0, 2, 0, 0, 0, 2, 2, 0, 1, 0, 0, 0, 2, 0, 2, 0,
        0, 0, 2, 0, 1, 2, 0, 2, 0, 2, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 2, 2, 2, 0,
        2, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 1, 1, 2, 2, 2, 2, 0, 2, 2, 1, 0, 1, 0,
        2, 1, 2, 0, 0, 2, 2, 0, 0, 0, 2, 2, 0, 0, 0, 2, 0, 1, 0, 0, 2, 2, 0, 0,
        1, 2, 2, 2, 0, 2, 2, 1])

In [24]:
def validate(val_loader, model, criterion, epoch):
    model.eval()

    start_val_time = time.time()

    correct = 0
    total = 0
    running_loss = 0.0
    count_loop = 0
    with torch.no_grad():
        for i, (input, target) in enumerate(val_loader):
            count_loop +=1
            target = target.cuda()

            # compute output
            output = model(input.cuda())
            loss = criterion(output, target)
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

            running_loss += loss.item()

            # measure accuracy and record loss
            #prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
    accurecy = 100 * correct / total
    end_val_time = time.time()

    currentDT = datetime.datetime.now()        
    print(str(currentDT), "=== Validation Loss : ", running_loss, "  Accurecy : ", accurecy, "count loop : ", count_loop, "   Validation Time :  ", (end_val_time-start_val_time) )
    
    if tf_writer is not None:
        tf_writer.add_scalar('accurecy/test', accurecy, epoch)
        tf_writer.add_scalar('loss/test', running_loss, epoch)
    return running_loss

In [25]:
epochs = 300
steps = 0
print_every = 1
  
train_losses, test_losses = [], []

In [None]:
for epoch in range(epochs):
    
    running_loss = 0.0     
    correct = 0
    total = 0 
    
    model.train()

    for inputs, labels in train_dataloader:
        steps += 1
        inputs, labels = inputs.cuda(), labels.cuda()
        optimizer.zero_grad()
        logps = model(inputs)
        loss = criterion(logps, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        _, predicted = torch.max(logps.data, 1)
        #print(predicted)
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        #print(correct)
    
    accurecy = 100 * correct / total
    tf_writer.add_scalar('loss/train', running_loss, epoch)
    tf_writer.add_scalar('accurecy/train', accurecy, epoch)
    print( " === epoch : ", epoch, "=== Training Loss : ", running_loss, "  Accurecy : ", accurecy )

    validate(test_dataloader, model, criterion, epoch)
    tf_writer.flush()
    
    
    save_checkpoint(epoch, {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                
            })

tf_writer.close()


 === epoch :  0 === Training Loss :  -3.9685350358486176   Accurecy :  51.74603174603175
2020-04-17 10:12:00.169423 === Validation Loss :  -0.4285714328289032   Accurecy :  42.857142857142854 count loop :  1    Validation Time :   2.384591579437256
 === epoch :  1 === Training Loss :  -3.6466249227523804   Accurecy :  45.82010582010582
2020-04-17 10:12:09.168661 === Validation Loss :  -0.4285714328289032   Accurecy :  42.857142857142854 count loop :  1    Validation Time :   2.370037078857422
 === epoch :  2 === Training Loss :  -4.046564936637878   Accurecy :  50.58201058201058
2020-04-17 10:12:17.958047 === Validation Loss :  -0.4476190507411957   Accurecy :  44.76190476190476 count loop :  1    Validation Time :   2.392124891281128
 === epoch :  3 === Training Loss :  -3.6444785594940186   Accurecy :  45.60846560846561
2020-04-17 10:12:26.714402 === Validation Loss :  -0.43810150027275085   Accurecy :  43.80952380952381 count loop :  1    Validation Time :   2.332263708114624
 === e