In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    print(os.path.join(dirname))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
traindir = '../input/chest-xray-pneumonia/chest_xray/train'
testdir = '../input/chest-xray-pneumonia/chest_xray/test'
valdir = '../input/chest-xray-pneumonia/chest_xray/val'

In [None]:
train_count_p = len(next(os.walk(traindir+'/PNEUMONIA'))[2])
train_count_n = len(next(os.walk(traindir+'/NORMAL'))[2])
print("Pneumonia Train Number = ", train_count_p)
print("Normal Train Number = ", train_count_n)

test_count_p = len(next(os.walk(testdir+'/PNEUMONIA'))[2])
test_count_n = len(next(os.walk(testdir+'/NORMAL'))[2])
print("Pneumonia Test Number = ",test_count_p)
print("Normal Test Number = ", test_count_n)

val_count_p = len(next(os.walk(valdir+'/PNEUMONIA'))[2])
val_count_p = len(next(os.walk(valdir+'/PNEUMONIA'))[2])
print("Normal Val Number = ", test_count_n)
print("Normal Val Number = ", test_count_n)

In [None]:
import matplotlib.pyplot as plt 
plt.figure(figsize=(15, 5))

for i in range(9):
    plt.subplot(3,3,i+1)
    img = plt.imread(os.path.join(testdir+'/PNEUMONIA', os.listdir(testdir+'/PNEUMONIA')[i]))
    plt.title('Pneumonia')
    plt.imshow(img, cmap='gray')
    plt.axis('off')

In [None]:
plt.figure(figsize=(15, 5))

for i in range(9):
    plt.subplot(3,3,i+1)
    img = plt.imread(os.path.join(testdir+'/NORMAL', os.listdir(testdir+'/NORMAL')[i]))
    plt.title('Normal')
    plt.imshow(img, cmap = 'gray')
    plt.axis('off')

In [None]:
import glob

train_p = glob.glob(traindir+'/PNEUMONIA/*jpeg')
train_n = glob.glob(traindir+'/NORMAL/*jpeg')

In [None]:
data = pd.DataFrame(np.concatenate([[0]*len(train_n), [1]*len(train_p)]), columns=["class"])

In [None]:
import seaborn as sns
plt.figure(figsize=(15,10))
sns.countplot(data['class'], data=data, palette='rocket')
plt.title('PNEUMONIA vs NORMAL')
plt.show()

In [None]:
from torchvision import transforms
train_trnsf = transforms.Compose([transforms.RandomRotation((-20,20)),
                                 transforms.Resize((224,224)),
                                 transforms.ToTensor(),
                                 transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])])

test_trnsf = transforms.Compose([transforms.Resize((224,224)),
                                 transforms.ToTensor(),
                                 transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])])

In [None]:
from torchvision.datasets import ImageFolder 
train_ds = ImageFolder(traindir, train_trnsf)
test_ds = ImageFolder(testdir, test_trnsf)
val_ds = ImageFolder(valdir, test_trnsf)

In [None]:
img, labels = train_ds[0]
img.shape

In [None]:
class_names = train_ds.classes
print(class_names)
print(train_ds.class_to_idx)

In [None]:
from torch.utils.data import DataLoader
batch_size = 64
trainloader = DataLoader(train_ds, batch_size, shuffle=True, num_workers=2, pin_memory=True)
testloader = DataLoader(test_ds, batch_size, shuffle = True, num_workers=2, pin_memory=True)
valloader = DataLoader(val_ds, batch_size*2, shuffle = True, num_workers=2, pin_memory=True)

In [None]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import optuna

In [None]:
class Net(nn.Module):
    def __init__(self, params):
        super().__init__()
        self.conv2 = nn.Sequential(
            nn.Conv2d(3,64, kernel_size=3, stride=1, padding = 1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(in_channels=64, out_channels=128,kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            
            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=1, padding = 1),
            
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=4,stride=2, padding=1), #(512*7*7)
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=1, padding = 1)
        )
        
        self.fc = nn.Sequential(
            nn.Linear(512*7*7, 256),
            nn.Linear(256,128),
            nn.Linear(128,64),
            nn.Linear(64,2)
        )
    
    def forward(self, x):
        out = self.conv2(x)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        return out

def train_and_evaluate(param, model):
    
    run_epochs(param, model)
        
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in valloader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    accuracy = (100*correct/total)
    
    return accuracy

def objective(trial):
    params = {
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-1),
        'optimizer': trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"]),
        #'criterion': trial.suggest_categorical("criterion", ["NLLLoss", "CrossEntropyLoss", "GaussianNLLLoss"])
    }
    
    
    model = Net(params)
    
    accuracy = train_and_evaluate(params, model)
    
    return accuracy

EPOCHS = 30

study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler())
study.optimize(objective, n_trials=30)

In [None]:
params = {
    "optimizer": "Adam",
    "learning_rate": 1.025e-05,
}

model = Net(params)

In [None]:
def check_model(model, trainloader):
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in trainloader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            

        accuracy = (100*correct/total)
    return accuracy

In [None]:
def run_epochs(param,model):
    model = model.to(device)
    best_model = model
    #criterion = getattr(nn, param['criterion'])()
    criterion = nn.CrossEntropyLoss()
    optimizer = getattr(optim, param['optimizer'])(model.parameters(), lr=param['learning_rate'])
    num_epochs = 8
    
    step = 0
    losses = []
    accuracies = []
    best_acc = 0
    accuracy = 0

    torch.cuda.empty_cache()

    for epoch in range(num_epochs):
        train_loss = 0.0
        train_acc = 0.0

        for i, (images,labels) in enumerate(trainloader):
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            losses.append(loss.item())            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            _, predictions = outputs.max(1)
            num_correct = (predictions == labels).sum()
            running_train_acc = float(num_correct)/float(images.shape[0])
            accuracies.append(running_train_acc)
            
            if best_acc < running_train_acc:
                best_model = model
                accuracy = running_train_acc
                
#             train_acc += running_train_acc
#             train_loss += loss.item()
#             avg_train_acc = train_acc / len(trainloader)
#             avg_train_loss = train_loss / len(trainloader)

    model = best_model
            
    return losses, accuracies, accuracy

In [None]:
import time
start_time = time.time()

losses, accuracies, accuracy = run_epochs(params,model)

print("Accuracy = ", accuracy)
print("--- %s minutes ---" % ((time.time() - start_time)/60))

In [None]:
def plot_losses(losses):
    plt.plot(losses, '-bx')
    plt.xlabel('epoch')
    plt.ylabel('loss')

In [None]:
plot_losses(losses)

In [None]:
def plot_accuracies(accuracy):
    plt.plot(accuracies,'-x')
    plt.xlabel('epoch')
    plt.ylabel('accuracy')
    plt.title('Accuracy vs Number of Epochs')

In [None]:
plot_accuracies(accuracies)

In [None]:
torch.save(model.state_dict(), 'model.pth')

In [None]:
print('Test accuracy = %f' % check_model(model, testloader))

In [None]:
print('Validation accuracy = %f' % check_model(model,valloader))

In [None]:
model = torch.load('/kaggle/working/')