<a href="https://colab.research.google.com/github/rbbh/Chest-X-Ray/blob/main/Chest_X_Ray_Images.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [23]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import os
from os import listdir
import glob
import tqdm.notebook as tq

In [24]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [25]:
data_path = '/content/drive/MyDrive/Colab Notebooks/General/PyTorch/chest_xray/chest_xray/'

In [26]:
train_path = os.path.join(data_path,'train')
test_path = os.path.join(data_path, 'test')

In [39]:
in_channels = 1
num_classes = 2
learning_rate = 1e-3
batch_size = 32
num_epochs = 20

In [28]:
my_transforms = transforms.Compose([
                    transforms.ToPILImage(),
                    transforms.Grayscale(),
                    transforms.Resize((256,256)),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=[0.5], std=[0.5]),
])

In [29]:
labels = {
    'NORMAL': np.eye(2)[0],
    'PNEUMONIA': np.eye(2)[1],
}

In [30]:
def label_data(path, transforms, labels):
  dataset = []
  for key in tq.tqdm(labels.keys()):
    for img in tq.tqdm(listdir(os.path.join(path, key))):
      image = mpimg.imread(os.path.join(path, key, img))
      image = transforms(image)
      y_label = torch.tensor(labels[key])
      answers = (image,y_label) 
      dataset.append(answers)
      
  return dataset 

In [31]:
class CNN(nn.Module):
    def __init__(self, in_channels=in_channels, num_classes=num_classes):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(64 * 32 * 32, 10)
        self.fc2 = nn.Linear(10, num_classes)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), kernel_size=2)
        x = F.max_pool2d(F.relu(self.conv2(x)), kernel_size=2)
        x = F.max_pool2d(F.relu(self.conv3(x)), kernel_size=2)
        x = x.reshape(x.shape[0], -1)
        x = F.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))

        return x

In [32]:
model = CNN().to(device)

In [33]:
def verify_network(network, random_data):
  ''' 
  Return:
  - True if there are no errors on the topology of the network
  - False if there are errors on the topology of the network
  '''
  try:
    network(random_data)
    return True
  except:
    return False

In [34]:
criterion = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

In [35]:
# Train Network
def train_net(model, train_loader):
    for epoch in tq.tqdm(range(num_epochs)):
        losses = []

        for batch_idx, (data, targets) in enumerate(train_loader):
            # Get data to cuda if possible
            data = data.to(device)
            targets = targets.to(device)

            # forward
            scores = model(data)
            loss = criterion(scores.double(), targets)

            losses.append(loss.item())

            # backward
            optimizer.zero_grad()
            loss.backward()

            # gradient descent or adam step
            optimizer.step()

        print(f'Cost at epoch {epoch + 1} is {sum(losses) / len(losses)}')

In [36]:
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)

            scores = model(x)
            _, predictions = scores.max(1)
            target_list = []
            for aux in y:
                if aux[0] == torch.Tensor(np.eye(2)[0])[0].to(device):
                    aux_ = 0
                else:
                    aux_ = 1
                target_list.append(aux_)
            num_correct += (predictions == torch.Tensor(target_list).to(device)).sum()
            num_samples += predictions.size(0)

        print(f'Got {num_correct} / {num_samples} with accuracy {float(num_correct) / float(num_samples) * 100:.2f}')

    model.train()

In [40]:
def main():
    train_set = label_data(train_path, my_transforms, labels)
    test_set = label_data(test_path, my_transforms, labels)

    train_loader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(dataset=test_set, batch_size=batch_size, shuffle=True)

    r = torch.randn(32, 1, 256, 256) 
    verify_network(model, r.to(device))

    print('Training the model')
    train_net(model, train_loader)

    print('Checking accuracy on Training Set')
    check_accuracy(train_loader, model)

    print('Checking accuracy on Test Set')
    check_accuracy(test_loader, model)

In [41]:
if __name__ == '__main__':
    main()

HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1341.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=3875.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=234.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=390.0), HTML(value='')))



Training the model


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))

Cost at epoch 1 is 0.5193869352987373
Cost at epoch 2 is 0.49188411643593055
Cost at epoch 3 is 0.44850722426452516
Cost at epoch 4 is 0.386019964874436
Cost at epoch 5 is 0.3184340502775356
Cost at epoch 6 is 0.2583195657094784
Cost at epoch 7 is 0.2162488887682299
Cost at epoch 8 is 0.18875974183493
Cost at epoch 9 is 0.17116100679760482
Cost at epoch 10 is 0.1598154501085317
Cost at epoch 11 is 0.1502104257534384
Cost at epoch 12 is 0.14454927384798769
Cost at epoch 13 is 0.13857570214598194
Cost at epoch 14 is 0.13368947754832167
Cost at epoch 15 is 0.1308006438836466
Cost at epoch 16 is 0.1265016557168894
Cost at epoch 17 is 0.12388746270771406
Cost at epoch 18 is 0.12162963759513382
Cost at epoch 19 is 0.11781437954991465
Cost at epoch 20 is 0.11626470419386807

Checking accuracy on Training Set
Got 4988 / 5216 with accuracy 95.63
Checking accuracy on Test Set
Got 481 / 624 with accuracy 77.08
