# First test

In this file, we do not apply any transformations to the data. We only rescale images and pass them through a pretrained CNN model.
The objective is to determine whether a pretrained model is already capable of finding relevant information in these images, without transforming them, and in 224x224.

In [None]:
import os
import re
import toml
import shutil
import zipfile
import torch
import math
import cv2
from time import time
from IPython.display import Markdown, display
import torchvision.transforms as transforms
from Dataset import Dataset
from data_loading import open_preprocess_photos
#@title ## Importing library, functions
# @markdown ### ➡ Setup
# @markdown Be sure to add all files on root

In [None]:
#@title ## Importing dataset folder from drive or kaggle
choice = "kaggle"#@param ["kaggle", "drive"]
# @markdown # kaggle
# @markdown put kaggle.json
if (choice == "kaggle"):
  ! pip install -q kaggle
  ! mkdir ~/.kaggle
  kaggle = "kaggle.json"
  ! cp {kaggle} ~/.kaggle/
  ! chmod 600 ~/.kaggle/kaggle.json
  ! kaggle datasets download -d paultimothymooney/chest-xray-pneumonia
  ! unzip chest-xray-pneumonia.zip -d data/

# @markdown # google drive (if choice is google drive)
if (choice == "drive") :
  from google.colab import drive
  drive.mount('/content/drive')
  name = 'chest-xray-pneumonia.zip' #@param {type:"string"}
  ! cp /content/drive/MyDrive/{name} .
  ! unzip chest-xray-pneumonia.zip -d data/

In [None]:
#@title ## Creating model
# @markdown ### Restnet34 model

from torchvision.models import resnet34, ResNet34_Weights
import torch

pretrained = resnet34(weights=ResNet34_Weights.IMAGENET1K_V1)

pretrained.fc = torch.nn.Identity()
pretrained.avgpool = torch.nn.Identity()

class Model(torch.nn.Module):
    def __init__(self, pretrained):
        super(Model, self).__init__()
        self.pretrained = pretrained
        self.linear1 = torch.nn.Linear(25088, 10000)
        self.linear2 = torch.nn.Linear(10000, 1)
        self.sigmoid = torch.nn.Sigmoid()

    def forward(self, x):
        x = self.pretrained(x)
        x = self.linear1(x)
        x = self.linear2(x)
        x = self.sigmoid(x)
        return x

model = Model(pretrained)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to /root/.cache/torch/hub/checkpoints/resnet34-b627a593.pth
100%|██████████| 83.3M/83.3M [00:01<00:00, 50.1MB/s]


In [None]:
#@title ## Load Data train
normal_dir: str = r'./data/chest_xray/train/NORMAL'
pneumo_dir: str = r'./data/chest_xray/train/PNEUMONIA'

assert os.path.exists(normal_dir) and os.path.isdir(normal_dir), "Normal dir isn't found or isn't a directory"
assert os.path.exists(pneumo_dir) and os.path.isdir(pneumo_dir), "Pneumonia dir isn't found or isn't a directory"

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
transform = transforms.Compose([
    transforms.ToTensor(),
    normalize,
])


normal = open_preprocess_photos(normal_dir, transform, (224, 224))
pneumonia = open_preprocess_photos(pneumo_dir, transform, (224, 224))

dataset = Dataset(normal, pneumonia, 0, 1, 128)

In [None]:
#@title ## Load Data test
normal_dir_test: str = r'./data/chest_xray/test/NORMAL'
pneumo_dir_test: str = r'./data/chest_xray/test/PNEUMONIA'

assert os.path.exists(normal_dir_test) and os.path.isdir(normal_dir_test), "Normal dir isn't found or isn't a directory"
assert os.path.exists(pneumo_dir_test) and os.path.isdir(pneumo_dir_test), "Pneumonia dir isn't found or isn't a directory"

normal_test = open_preprocess_photos(normal_dir_test, transform, (224, 224))
pneumonia_test = open_preprocess_photos(pneumo_dir_test, transform, (224, 224))

dataset2 = Dataset(normal_test, pneumonia_test, 0, 1, 128)

In [None]:
#@title ## Training preparation
def train_one_epoch(model, criterion, optimizer, dataloader):
  epoch_index = 0
  running_loss = 0.
  last_loss = 0.

  # Here, we use enumerate(training_loader) instead of
  # iter(training_loader) so that we can track the batch
  # index and do some intra-epoch reporting
  i = 0
  for inputs, labels in dataset:
    i += 1
    # Every data instance is an input + label pair
    inputs = inputs.clone().to(device)
    labels = labels.clone().to(device)

    optimizer.zero_grad()

    outputs = model(inputs)
    loss = criterion(outputs, labels.unsqueeze(-1))
    loss.backward()
    optimizer.step()

    del(inputs)
    del(labels)

    running_loss += loss.item()
    last_loss = running_loss / 1000 # loss per batch
    tb_x = epoch_index * len(dataset) + i + 1
    running_loss = 0.
  return last_loss

lr=0.02 #@param {type:"number"}
momentum=0.9 #@param {type:"number"}
batch_size=1024 #@param {type:"number"}

optimizer = torch.optim.SGD(model.parameters(), lr, momentum)
criterion = torch.nn.BCELoss()

In [None]:
#@title ## Evaluate
def evaluate(model: Model, dataset):
    model.eval()
    correct = 0
    total = 0
    loss = 0

    with torch.no_grad():
        for inputs, labels in dataset:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            predicted = torch.round(outputs)

            total += labels.size(0)
            correct += (predicted == labels.unsqueeze(-1)).sum().item()

            loss += criterion(outputs, labels.unsqueeze(-1)).item()

    accuracy = correct / total
    average_loss = loss / total

    print("Test Accuracy: {:.2f}%".format(accuracy * 100))
    print("Average Loss: {:.4f}".format(average_loss))
    model.train

evaluate(model, dataset_train)

In [None]:
#@title ## Save model
model_name = "checkpoint.pth" #@param {type:"string"}
torch.save(model.state_dict(), model_name)