### Imports

In [1]:
import os
from time import time
from tqdm import tqdm
import numpy

import torch
import torchvision
import torch.nn as nn
from torch.nn import Linear, CrossEntropyLoss
from torch.optim import Adam
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchvision.models import vgg16
from torchvision.transforms import transforms 
# import albumentations as A

In [2]:
num_epochs = 2
batch_size = 4


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

### Load Data : 
#### - Define Augumentations/Transforms
#### - Create Dataset using root_path + Transforms
#### - DataLoader

In [3]:
# from albumentations.pytorch import ToTensorV2
transformations = transforms.Compose([
    transforms.Resize((128,128)),
    transforms.RandomGrayscale(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    
])

In [4]:
train_root = r'F:\KAJAL\Projects\from_kaggle\chest_xray\dataset\train'
test_root = r'F:\KAJAL\Projects\from_kaggle\chest_xray\dataset\test'

In [5]:
train_dataset = torchvision.datasets.ImageFolder(train_root, transform=transformations)
test_dataset = torchvision.datasets.ImageFolder(test_root, transform=transformations)

In [6]:
train_dataset

Dataset ImageFolder
    Number of datapoints: 5232
    Root location: F:\KAJAL\Projects\from_kaggle\chest_xray\dataset\train
    StandardTransform
Transform: Compose(
               Resize(size=(128, 128), interpolation=bilinear, max_size=None, antialias=None)
               RandomGrayscale(p=0.1)
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )

In [7]:
print(len(train_dataset), len(test_dataset))

5232 624


In [8]:
type(train_dataset)

torchvision.datasets.folder.ImageFolder

In [9]:
print(train_dataset.class_to_idx)

{'NORMAL': 0, 'PNEUMONIA': 1}


In [10]:
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=True)

In [11]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x93959026e0>

### Model

In [12]:
model = vgg16(weights='VGG16_Weights.IMAGENET1K_V1')    # or (pretrained=True)

model.fc = Linear(in_features=4096, out_features=2)
model = model.to(device)

In [13]:
model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

### Criterion

In [14]:
criterion = CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.003)

### Train and Eval

In [15]:
print("Your Model is Under Training")
for epoch in range(num_epochs):
    
    losses = []
    
    for batch_idx, (input, targets) in enumerate(train_loader):
        input = input.to(device=device)
        targets = targets.to(device=device)
        
        #Forward
        pred = model(input)
        loss = criterion(pred, targets)
        
        losses.append(loss.item())
        
        # Backward 
        optimizer.zero_grad()
        loss.backward()
        
        optimizer.step()
        
    print(f'Cost at epoch {epoch} is {sum(losses) / len(losses)}')

print("Checking for Accuracy")

Your Model is Under Training
Cost at epoch 0 is 0.9195395516242693
Cost at epoch 1 is 0.5810899091172473
Checking for Accuracy


In [16]:
# Check Accuracy
def check_accuracy(loader, model):
#     if loader.dataset.train:
#         print("Checking accuracy on training data")
#     else:
#         print("Checking accuracy on test data")

    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)

            scores = model(x)

            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)

        print(f'Got {num_correct}/{num_samples} with accuracy {float(num_correct) / float(num_samples) * 100:.2f}')
    model.train()


In [17]:
check_accuracy(train_loader, model)

Got 3883/5232 with accuracy 74.22


In [18]:
check_accuracy(test_loader, model)

Got 390/624 with accuracy 62.50
