Structure:

- Get images
  - Per patient
  - Train
  - Test
-Run all images through feature detection
-Take each patient, run PCA with k =?
-Run MLP classifier / linear discriminator

In [1]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import time
import datetime

from sklearn.decomposition import PCA, KernelPCA

# %matplotlib notebook
%matplotlib inline

In [2]:
def fake_data(dir = './data', ptns=12, pchs=10):
  imgs = torch.rand((ptns, pchs,3, 224,224))
  os.mkdir(dir)
  classes = []
  for p in range(ptns):
    case = 'A' if torch.randn(1) > 0 else 'B'
    classes.append(case)
    p_name = "ptn-{}-{}".format(str(p).zfill(3), case)
    pth = os.path.join(dir, p_name)
    os.mkdir(pth)
    for i in range(pchs):
      im = imgs[int(p),i]
      fname = os.path.join(dir ,p_name, str(i).zfill(3) + '.png')
      torchvision.utils.save_image(im, fname)
  return classes

In [20]:
!rm -fr data*
!rm -fr aug_data*
!ls

sample_data


In [21]:
dir = './data'
classes = fake_data(dir)
!ls

data  sample_data


In [22]:
def get_transform(dir):
  dev = "cuda:0" if torch.cuda.is_available() else "cpu"
  print(dev)
  device = torch.device(dev)

  default_transform = transforms.Compose([transforms.CenterCrop(224), transforms.ToPILImage(), transforms.ToTensor()])
  dataset = torchvision.datasets.DatasetFolder(root=dir, loader=torchvision.io.read_image, extensions='.png', transform=default_transform)
  clc = []
  for img, lbl in dataset:
    clc.append(img)
  print("clc: [{} , {}]".format(len(clc),clc[0].shape))

  tn = torch.Tensor(len(clc), *clc[0].shape)
  print(tn.shape)
  for i in range(len(clc)):
    tn[i] = clc[i]
  print(tn.shape)
  tn.to(device)
  mean = tn.mean(dim=(0,2,3)).cpu()
  std = tn.std(dim=(0,2,3)).cpu()
  print("mean = {}".format(mean))
  print("std = {}".format(std))

  normal_transform = transforms.Compose([default_transform,
                                         transforms.Normalize(mean=mean, std=std)])
  return normal_transform

In [23]:
def batch_PCA_(imgs ,k=16):
  print("batch input size: {}".format(imgs.shape))
  ret = imgs.mean(dim=0)
  print("batch new size: {}".format(ret.shape))
  return ret

In [24]:
def get_class(ds, dir):
  cls = ds.find_classes(dir)[0]
  ret = []
  for c in cls:
    case = c[-1]
    ret.append(case)
  return ret

In [30]:
def batch_PCA(imgs, fx, k=8):
  bts = imgs.shape[0]
  print("Batch PCA input size: {}".format(imgs.shape))
  ftrs = fx(imgs).reshape(bts, -1).cpu().numpy()
  #ftr_var = (ftrs.T @ ftrs).cpu().numpy()
  PCA_comp = PCA(k, svd_solver='full').fit(ftrs)
  eig_vec = torch.Tensor(PCA_comp.components_)
  print("Eigen Vector size :{} , with max: {}".format(eig_vec.shape, eig_vec.max()))
  return eig_vec

In [34]:
def ds_img(imgs):
  s = imgs.shape[-1] // 8
  t = transforms.RandomCrop(s)
  return t(imgs)


In [34]:
class AutoEncoder(nn.Module):

  def __init__(self, head, rest):
    super(AutoEncoder, self).__init__()
    self.encode = head
    self.decode = reset
  
  def forward(self, imgs):
    x = self.encode(imgs)
    return self.decode(x)

  def feature_ext(self, imgs):
    return self.encode(imgs)

In [35]:
def batch_aug(dataset, aug_dir, class_fnc, aug_fnc, fx_fnc):

  dev = "cuda:0" if torch.cuda.is_available() else "cpu"
  print(dev)
  device = torch.device(dev)

  classes = class_fnc(dataset, dir)

  data = [[] for i in range(len(dataset.find_classes(dir)[0]))]
  for img, lbl in dataset:
    data[lbl].append(img)
  print("data: [{} , {} , {}]".format(len(data),len(data[0]), data[0][0].shape))

  if not os.path.isdir(aug_dir):
    os.mkdir(aug_dir)
  
  for p, d in enumerate(data):
    tn = torch.Tensor(len(d), *d[0].shape)
    for i in range(len(d)):
      tn[i] = d[i]
    tn.requires_grad_(False).to(device)

    rd = aug_fnc(tn, fx_fnc).cpu()

    curr_dir = os.path.join(aug_dir, classes[p])
    if not os.path.isdir(curr_dir):
      os.mkdir(curr_dir)
    fname = os.path.join(curr_dir , str(p).zfill(3) + '.png')
    torchvision.utils.save_image(rd, fname)

In [36]:
dir = './data'
aug_dir= './aug_data'

transform = get_transform(dir)

dataset = torchvision.datasets.DatasetFolder(root=dir, loader=torchvision.io.read_image, extensions='.png', transform=transform)

batch_aug(dataset, aug_dir, get_class, batch_PCA, ds_img)







cpu
clc: [120 , torch.Size([3, 224, 224])]
torch.Size([120, 3, 224, 224])
torch.Size([120, 3, 224, 224])
mean = tensor([0.5001, 0.4999, 0.5001])
std = tensor([0.2887, 0.2888, 0.2887])
cpu
data: [12 , 10 , torch.Size([3, 224, 224])]
Batch PCA input size: torch.Size([10, 3, 224, 224])
Eigen Vector size :torch.Size([8, 2352]) , with max: 0.06858336180448532
Batch PCA input size: torch.Size([10, 3, 224, 224])
Eigen Vector size :torch.Size([8, 2352]) , with max: 0.06946983188390732
Batch PCA input size: torch.Size([10, 3, 224, 224])
Eigen Vector size :torch.Size([8, 2352]) , with max: 0.07643172889947891
Batch PCA input size: torch.Size([10, 3, 224, 224])
Eigen Vector size :torch.Size([8, 2352]) , with max: 0.07119285315275192
Batch PCA input size: torch.Size([10, 3, 224, 224])
Eigen Vector size :torch.Size([8, 2352]) , with max: 0.07155369222164154
Batch PCA input size: torch.Size([10, 3, 224, 224])
Eigen Vector size :torch.Size([8, 2352]) , with max: 0.06703756004571915
Batch PCA input si

In [12]:
class patches_DataSet(torchvision.datasets.DatasetFolder):
  def __init__(root='./data'):
    super(torchvision.datasets.DatasetFolder, seld).init__(root=root, loader=torchvision.io.read_image, extensions='.png')

  def find_classes(dir):
    return None