# First test

In this file, we do not apply any transformations to the data. We only rescale images and pass them through a pretrained CNN model.
The objective is to determine whether a pretrained model is already capable of finding relevant information in these images, without transforming them, and in 224x224.

In [15]:
dataset = None
dataset2 = None

In [16]:
import os
import re
import toml
import shutil
import zipfile
import torch
import math
import cv2
from time import time
from IPython.display import Markdown, display
import torchvision.transforms as transforms
from Dataset import Dataset
from data_loading import open_preprocess_photos
from typing import List
#@title ## Importing libraries
# @markdown ### ➡ Setup
# @markdown Be sure to add all files on root

In [11]:
#@title ## Importing dataset folder from drive or kaggle
choice = "drive"#@param ["kaggle", "drive"]
# @markdown # kaggle
# @markdown put kaggle.json
if (choice == "kaggle"):
  ! pip install -q kaggle
  ! mkdir ~/.kaggle
  kaggle = "kaggle.json"
  ! cp {kaggle} ~/.kaggle/
  ! chmod 600 ~/.kaggle/kaggle.json
  ! kaggle datasets download -d paultimothymooney/chest-xray-pneumonia
  ! unzip chest-xray-pneumonia.zip -d data/

# @markdown # google drive (if choice is google drive)
if (choice == "drive") :
  from google.colab import drive
  drive.mount('/content/drive')
  name = 'chest-xray-pneumonia.zip' #@param {type:"string"}
  ! cp /content/drive/MyDrive/{name} .
  ! unzip chest-xray-pneumonia.zip -d data/

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
^C
Archive:  chest-xray-pneumonia.zip
  End-of-central-directory signature not found.  Either this file is not
  a zipfile, or it constitutes one disk of a multi-part archive.  In the
  latter case the central directory and zipfile comment will be found on
  the last disk(s) of this archive.
unzip:  cannot find zipfile directory in one of chest-xray-pneumonia.zip or
        chest-xray-pneumonia.zip.zip, and cannot find chest-xray-pneumonia.zip.ZIP, period.


In [7]:
#@title ## Creating model
# @markdown ### Restnet34 model

from torchvision.models import resnet34, ResNet34_Weights
import torch

pretrained = resnet34(weights=ResNet34_Weights.IMAGENET1K_V1)

pretrained.fc = torch.nn.Identity()
pretrained.avgpool = torch.nn.Identity()

class Model(torch.nn.Module):
    def __init__(self, pretrained):
        super(Model, self).__init__()
        self.pretrained = pretrained
        self.linear1 = torch.nn.Linear(25088, 10000)
        self.linear2 = torch.nn.Linear(10000, 1)
        self.sigmoid = torch.nn.Sigmoid()

    def forward(self, x):
        x = self.pretrained(x)
        x = self.linear1(x)
        x = self.linear2(x)
        x = self.sigmoid(x)
        return x

model = Model(pretrained)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to /root/.cache/torch/hub/checkpoints/resnet34-b627a593.pth
100%|██████████| 83.3M/83.3M [00:00<00:00, 130MB/s]


In [None]:
#@title ## Load Data train
normal_dir: str = r'./data/chest_xray/train/NORMAL'
pneumo_dir: str = r'./data/chest_xray/train/PNEUMONIA'

assert os.path.exists(normal_dir) and os.path.isdir(normal_dir), "Normal dir isn't found or isn't a directory"
assert os.path.exists(pneumo_dir) and os.path.isdir(pneumo_dir), "Pneumonia dir isn't found or isn't a directory"

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
transform = transforms.Compose([
    transforms.ToTensor(),
    normalize,
])


normal = open_preprocess_photos(normal_dir, transform, (224, 224))
pneumonia = open_preprocess_photos(pneumo_dir, transform, (224, 224))

dataset = Dataset(normal, pneumonia, 0, 1, 128)

In [None]:
#@title ## Load Data test
normal_dir_test: str = r'./data/chest_xray/test/NORMAL'
pneumo_dir_test: str = r'./data/chest_xray/test/PNEUMONIA'

assert os.path.exists(normal_dir_test) and os.path.isdir(normal_dir_test), "Normal dir isn't found or isn't a directory"
assert os.path.exists(pneumo_dir_test) and os.path.isdir(pneumo_dir_test), "Pneumonia dir isn't found or isn't a directory"

normal_test = open_preprocess_photos(normal_dir_test, transform, (224, 224))
pneumonia_test = open_preprocess_photos(pneumo_dir_test, transform, (224, 224))

dataset2 = Dataset(normal_test, pneumonia_test, 0, 1, 128)

In [13]:
#@title ## Training preparation

def calc_acc(y_true: torch.Tensor, y_pred: torch.Tensor, threshold: float) -> float:
  assert y_true.shape == y_pred.shape, "Accuracy calculation received two different sized tensors"
  y_pred_mean = []

  for pred in y_pred:
    if pred < threshold:
      y_pred_mean.append(0)
    else:
      y_pred_mean.append(1)

  nb_correct = 0

  for i in range(len(y_pred_mean)):
    if y_pred_mean[i] == y_true[i]:
      nb_correct += 1

  return nb_correct / len(y_pred_mean)

def train(model, optim, criterion, epochs, dataset, verbose: bool = True) -> List[list]:
  # initialize indicator lists for later use
  losses = []
  mean_accs = []
  len_dataset = len(dataset)

  for epoch in range(epochs):
    # init indicators
    epoch_loss = 0
    sum_acc = 0

    for inputs, labels in dataset:
      # load data and move to GPU
      inputs = inputs.clone().to(device)
      labels = labels.clone().to(device).unsqueeze(-1)

      optimizer.zero_grad()
      outputs = model(inputs)

      loss = criterion(outputs, labels)
      loss.backward()
      optimizer.step()

      # calculate indicators
      epoch_loss += loss.item()
      sum_acc += calc_acc(labels, outputs, 0.5)

      # delete temporary data that was moved to the GPU
      del(inputs)
      del(labels)

    # shuffle dataset
    dataset.shuffle(epoch)

    # append indicators to indicator lists
    epoch_mean_acc = sum_acc / len_dataset
    losses.append(epoch_loss)
    mean_accs.append(epoch_mean_acc)

    # print data
    if verbose:
      print(f"Epoch nb°{epoch + 1}:")
      print("Loss (sum over epoch):\t\t%.4f" % epoch_loss)
      print("Accuracy (mean over epoch):\t%.4f" % (epoch_mean_acc * 100), end="%\n\n")
  return [losses, mean_accs]

lr=0.02 #@param {type:"number"}
momentum=0.9 #@param {type:"number"}
batch_size=1024 #@param {type:"number"}

optimizer = torch.optim.SGD(model.parameters(), lr, momentum)
criterion = torch.nn.BCELoss()

In [14]:
#@title ## Training
epoch = 5 #@param {type:"integer"}
verbose = True #@param {type:"boolean"}

train(model, optimizer, criterion, epoch, dataset, verbose)


TypeError: object of type 'NoneType' has no len()

In [None]:
#@title ## Evaluate
def evaluate(model: Model, dataset):
    model.eval()
    correct = 0
    total = 0
    loss = 0

    with torch.no_grad():
        for inputs, labels in dataset:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            predicted = torch.round(outputs)

            total += labels.size(0)
            correct += (predicted == labels.unsqueeze(-1)).sum().item()

            loss += criterion(outputs, labels.unsqueeze(-1)).item()

    accuracy = correct / total
    average_loss = loss / total

    print("Test Accuracy: {:.2f}%".format(accuracy * 100))
    print("Average Loss: {:.4f}".format(average_loss))
    model.train()

dataset_input = "train" #@param ["train", "test"]
evaluate_dataset = None
if dataset_input == "train":
  evaluate_dataset = dataset
else:
  evaluate_dataset = dataset2
evaluate(model, evaluate_dataset)

In [None]:
#@title ## Save model
model_name = "checkpoint.pth" #@param {type:"string"}
torch.save(model.state_dict(), model_name)