# Check that we have everything here

In [1]:
import os
if not os.path.isdir("quantificationlib"):
    print("You should have the quantification library in this directory")
    raise StopExecution

# Load the data

In [2]:
import sys,os
import numpy as np
import pandas as pd

#Load the data
trainpreds = np.genfromtxt('results/trainpred.csv', delimiter=',')
traintrue = np.genfromtxt('results/traintrue.csv', delimiter=',')
trainprobs = np.genfromtxt('results/trainprobs.csv', delimiter=',')
classes=np.genfromtxt('results/classes.csv',dtype='str')

# Fit quantification models

In [3]:
sys.path.insert(0, os.path.abspath("quantificationlib"))
from quantificationlib import classify_and_count
from quantificationlib import distribution_matching

quantifierCC = classify_and_count.CC(verbose=1)
quantifierAC = classify_and_count.AC(verbose=1)
quantifierHDy = distribution_matching.DFy(verbose=1)
quantifierCC.fit(None,traintrue,predictions_train=trainpreds)
quantifierAC.fit(None,traintrue,predictions_train=trainprobs)
quantifierHDy.fit(None,traintrue,predictions_train=trainprobs)

Class CC: Computing predictions for training distribution...done
Class AC: Computing predictions for training distribution...done
Class AC: Estimating confusion matrix for training distribution...done
Class DFy: Computing predictions for training distribution...done
Class DFy: Estimating training distribution...done


DFy(verbose=1)

# Convert a model trained with dataparallel in a normal one to use only one GPU
I leave this code here because it can be useful. I think I created the model using another script that uses dataparalel and save it directly. With this code we can convert to a normal model to be able to use it a single spu

In [20]:
#model = torchvision.models.resnet18(pretrained=True)
#model.fc = nn.Linear(model.fc.in_features, len(classes))
#model = nn.DataParallel(model,device_ids=[0,1])
#model.load_state_dict(torch.load('modelandres.pt'))
#torch.save(model.module.state_dict(), 'modelandres_corrected.pt')

# Receiving the sample to quantify
Here I need to create a custom dataset to load the images from a folder without structure (no labels in this case)

In [30]:
import torchvision
import torchvision.transforms as T
from torch.utils.data import Dataset, DataLoader
from natsort import natsorted

class ProductionDataset(Dataset):
    def __init__(self, main_dir, transform):
        self.main_dir = main_dir
        self.transform = transform
        all_imgs = os.listdir(main_dir)
        self.total_imgs = natsorted(all_imgs)

    def __len__(self):
        return len(self.total_imgs)

    def __getitem__(self, idx):
        img_loc = os.path.join(self.main_dir, self.total_imgs[idx])
        image = Image.open(img_loc).convert("RGB")
        tensor_image = self.transform(image)
        return tensor_image

prod_transform = T.Compose([
  T.Resize(size=256),
  T.CenterCrop(size=224),
  T.ToTensor(),
  #T.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
])

#This directory should be the directory with the new images... using validation for simplicity here
prod_dset = ProductionDataset("production", transform=prod_transform)
prod_loader = DataLoader(prod_dset,batch_size=256,num_workers=4)
print("Loaded %d images " % len(prod_dset))

Loaded 1967 images 


# Adding all neccesary functions

In this case the make prediction function does not use y (because they are not known). Also the load_network always needs a model.pt file (We have ran finetuning before)

In [29]:
import torch.nn as nn
import torch.nn.functional as nnf
from PIL import Image

def load_network():
  num_classes=51
  model = torchvision.models.resnet18(pretrained=True)
  print("Adjusting the CNN for %s classes" % num_classes)
  model.fc = nn.Linear(model.fc.in_features, num_classes)
  #Define loss function
  loss_fn = nn.CrossEntropyLoss()
  model.load_state_dict(torch.load("modelandres_corrected.pt"))
  model = model.to(device) #Send model to gpu
  return model,loss_fn

def make_preds(model, loader, device):
  """
  Check the accuracy of the model.
  """
  with torch.no_grad():
    # Set the model to eval mode
    model.eval()
    y_pred = []
    y_probs = []
    for x in loader:
      x = x.to(device)
      # Run the model forward, and compare the argmax score with the ground-truth
      # category.
      output = model(x)
      predicted = output.argmax(1)
      prob = nnf.softmax(output, dim=1)
      y_probs.extend(prob.cpu().detach().numpy())
      y_pred.extend(predicted.cpu().numpy())
  return y_pred,y_probs

# Classify the sample and then quantify it

In [31]:
import torch

pd.set_option('display.float_format', lambda x: '%.5f' % x)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Using %s"%device)

model,_ =  load_network()
y_pred,y_probs = make_preds(model, prod_loader, device)
y_pred=np.vstack( y_pred )
y_probs=np.vstack( y_probs )

resultsCC=quantifierCC.predict(None,predictions_test=y_pred)
resultsAC=quantifierAC.predict(None,predictions_test=y_pred)
resultsHDy=quantifierHDy.predict(None,predictions_test=y_probs)

print(pd.DataFrame({'CC':resultsCC,'AC':resultsAC,'HDy':resultsHDy},index=classes))


Using cuda:0
Adjusting the CNN for 51 classes
Class CC: Computing predictions for testing distribution...done
Class CC: Computing prevalences for testing distribution...done
Class AC: Computing predictions for testing distribution...done
Class AC: Computing prevalences for testing distribution...done
Class DFy: Computing predictions for testing distribution...done
Class DFy: Estimating testing distribution...Class DFy: Computing prevalences...done
                         CC      AC     HDy
Asterionellopsis    0.00203 0.00339 0.01338
Cerataulina         0.00000 0.00000 0.00000
Ceratium            0.00000 0.00000 0.00000
Chaetoceros         0.00407 0.00352 0.00621
Corethron           0.00153 0.00177 0.00620
Coscinodiscus       0.00254 0.00311 0.01199
Cylindrotheca       0.00661 0.00680 0.01263
DactFragCerataul    0.00051 0.00000 0.00000
Dactyliosolen       0.00000 0.00000 0.00000
Dictyocha           0.00051 0.00158 0.00000
Dinobryon           0.00000 0.00000 0.00000
Dinophysis          