In [8]:
import cv2
import numpy as np
from matplotlib import pyplot as plt

from google.colab import drive
import os

import torchvision
from torchvision import transforms
from torchvision.datasets import ImageFolder

from collections import namedtuple
from sklearn.metrics import classification_report

import torch
from torch import nn
from torch.utils.data import DataLoader

In [2]:

# drive.mount("/content/drive")
# !unzip -uq "/content/drive/My Drive/DATA_CHAMBER_2021.zip" -d "./"
# traindir = "DATA_CHAMBER_2021/train"
# testdir = "DATA_CHAMBER_2021/test"

In [3]:
!git clone https://github.com/NguyenThuan215/ML-echo-cardiography
traindir = "/content/ML-echo-cardiography/DATA_CHAMBER_2021/train"
testdir = "/content/ML-echo-cardiography/DATA_CHAMBER_2021/test"

Cloning into 'ML-echo-cardiography'...
remote: Enumerating objects: 8357, done.[K
remote: Counting objects: 100% (8357/8357), done.[K
remote: Compressing objects: 100% (8354/8354), done.[K
remote: Total 8357 (delta 19), reused 8326 (delta 1), pack-reused 0
Receiving objects: 100% (8357/8357), 488.05 MiB | 33.58 MiB/s, done.
Resolving deltas: 100% (19/19), done.
Checking out files: 100% (8328/8328), done.


In [4]:
TrainTest = namedtuple('TrainTest', ['train', 'test'])

def get_classes():
  classes = ['2C', '3C', '4C']
  return classes

def prepare_data():
  image_size = 224
  transform_train = transforms.Compose([
      transforms.Resize((image_size,image_size)),                                    
      transforms.ToTensor(),
  ])
  transform_test = transforms.Compose([
      transforms.Resize((image_size,image_size)),
      transforms.ToTensor(),
  ])
  trainset = torchvision.datasets.ImageFolder(root=traindir, transform=transform_train)
  testset = torchvision.datasets.ImageFolder(root=testdir, transform=transform_test)
  return TrainTest(train=trainset, test=testset)

def prepare_loader(datasets):
  batch = 32
  worker = 4
  trainloader = DataLoader(dataset=datasets.train, batch_size=batch, shuffle=True, num_workers=worker)
  testloader = DataLoader(dataset=datasets.test, batch_size=batch, shuffle=False, num_workers=worker)
  return TrainTest(train=trainloader, test=testloader)

In [5]:
def train_epoch(epoch, model, loader, loss_func, optimizer, device):
  model.train()
  running_loss = 0.0
  reporting_steps = 40
  step = 0
  for images, labels in loader:
    step += 1
    images, labels = images.to(device), labels.to(device)
    outputs = model(images)
    loss = loss_func(outputs, labels)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    running_loss += loss.item()
    if step % reporting_steps == reporting_steps - 1:
      print(f"Epoch {epoch} step {step} ave_loss {running_loss/reporting_steps:.4f}")
      running_loss = 0.0

def test_epoch(epoch, model, loader, device):
  ytrue = []
  ypred = []
  with torch.no_grad():
    model.eval()
    for images, labels in loader:
      images, labels = images.to(device), labels.to(device)
      outputs = model(images)
      _, predicted = torch.max(outputs, dim=1)
      ytrue += list(labels.cpu().numpy())
      ypred += list(predicted.cpu().numpy())

  return ypred, ytrue

In [6]:
def main(PATH='./model.pth', model_in=None):
  classes = get_classes()
  datasets = prepare_data()
  loaders = prepare_loader(datasets)
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

  print("Num Images in train set:", len(datasets.train))
  print("Num Images in test set:", len(datasets.test))
  print("Num batch in train set: ", len(loaders.train))
  print("class: ", datasets.train.class_to_idx)
  print("image size: ", datasets.train[0][0].shape)
  print("device:", device)
  print("model:", model_in)

  if model_in == 'vgg16':  
    model = torchvision.models.vgg16()
    model.classifier[6] = torch.nn.modules.linear.Linear(in_features=4096, out_features=3, bias=True)
  elif model_in == 'resnet50':
    model = torchvision.models.resnet50()
    model.fc = torch.nn.modules.linear.Linear(in_features=2048, out_features=3, bias=True) 
  elif model_in == 'resnet18':
    model = torchvision.models.resnet18()
    model.fc = torch.nn.modules.linear.Linear(in_features=512, out_features=3, bias=True) 
  else: 
    # model = torchvision.models.googlenet()
    # model.fc.out_features = 3
    pass


  model.to(device=device)
  loss_func = nn.CrossEntropyLoss()
  optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)

  accuracies = []
  for epoch in range(10):
    print("---------------------------------------------------------------")
    print(f"\nEpoch {epoch} report: ")
    train_epoch(epoch, model, loaders.train, loss_func, optimizer, device)
    ypred_test, ytrue_test = test_epoch(epoch, model, loaders.test, device)
    print("Test report: \n", classification_report(ytrue_test, ypred_test, target_names=classes))
    torch.save(model.state_dict(), PATH)

    # calculate accurency
    ypred_test = np.array(ypred_test)
    ytrue_test = np.array(ytrue_test)
    accuracy = (ytrue_test==ypred_test).sum() / len(ytrue_test)
    accuracies.append(accuracy)


  fig, ax1 = plt.subplots(1,1, figsize=(8,4))
  ax1.plot(accuracies, "bo--", label=model_in)
  ax1.set(title=model_in, xlabel="epoch", ylabel="accuracy"+ model_in, xlim=(-0.5,10), ylim=(0,1))
  plt.show()

  return model

In [7]:
model = main(PATH="./vgg16.pth", model_in='vgg16')

  cpuset_checked))


Num Images in train set: 6717
Num Images in test set: 1607
Num batch in train set:  210
class:  {'2C': 0, '3C': 1, '4C': 2}
image size:  torch.Size([3, 224, 224])
device: cpu
model: vgg16
---------------------------------------------------------------

Epoch 0 report: 


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Epoch 0 step 39 ave_loss 1.0539
Epoch 0 step 79 ave_loss 0.7813


KeyboardInterrupt: ignored