# Assignment 1: Chinese character "detection"
LT2326, Autumn 2021

Name: Max Boholm (gusbohom)

# Introduction

This notebook cotains the code for *Assignment 1: Chinese character "detection"* for the course *Machine learning for statistical NLP: Advanced* (course code LT2326), Autumn 2021. The notebook is organized into the folowing parts:

*    Libraries
*    Meta variables (the term *hyperparameter* is here reserved for decisions on the models), which define ... the loacation (path) of the data, ... 
*    Data preparation
*    Definition and training of two models
*    Testing and evaluation
*    ...




# Libraries

In [1]:
import glob
import json
import random
import operator
import time
from math import sqrt

from PIL import Image
import numpy as np
#import pandas as pd
from sklearn.preprocessing import StandardScaler
import matplotlib.path as mplpath
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Meta variables

In [2]:
path = "../../../../../scratch/lt2326-h21/a1/"
#path = "../develop_util/"
images_dir = path + "images/"      # the directory of images to be data
meta_ctw   = path + "info.json"   # the (path to) the general meta file of CTW 
meta_train = path + "train.jsonl" # the (path to) the file containing the annotations of CTW training data

#shortcut_to_prepared_data = "prepared_data.pkl"

train_proportion = 0.7 # the proportion of training data; proportion of test data will be the complement of this number

device = torch.device("cuda:2")
#device = torch.device('cpu')

path_to_save_models = "../models/"
path_to_save_evaluations = "../evals/"

# For development purposes
restriction = 30 # set to an integer, if subsample of the data is to be used in e.g. the development phase
rescale_input_to = None 
rescale_output_to = None

## Part 1: data preparation

*Decision:* batching is kept as python `list` until feeding it to the model. *This means that the training procedure, e.g. the models, must convert the batces to tensors.* For models with "linear input", the function `flat_batch` is an example which helps us to do this.

### Definitions of functions

In [3]:
def only_train(file_dir = images_dir, meta_file = meta_ctw):
    """Tests whether image files in a directory is part of the training set of the CTW dataset, 
       as defined by the json metafile for CTW. Returns a list of filenames such that they are both
       in the specified directory and the CTW training set.
    """
    meta = json.load(open(meta_file,"r"))
    train_files_CTW = [entry["file_name"] for entry in meta["train"]]
    files_to_keep = []
    potential_files=[file.split("/")[-1] for file in glob.glob(file_dir+"*.jpg")]
    print("Potential files: ", len(potential_files))
    
    for file in potential_files:
        if file in train_files_CTW:
            files_to_keep.append(file)
    
    print("Kept files: ", len(files_to_keep))
    
    return files_to_keep

def shorty(files, restriction = restriction):
    """ Returns a shorter list of files picked by random.
    """
    
    if restriction != None:
        print("Warning: Now you have made a restriction to ", restriction, " files.")
        random.shuffle(files)
        r_files = files[:restriction]
    else:
        print("No restriction was made, since it was not specified.")
        r_files = files
    
    return r_files
        
def CTW_mapper(files, meta = meta_train):
    """ Identifies annotations for files from the training set of the CTW dataset. 
        Returns pyhoton dictionary that maps filenames (keys) with annotations (values), 
        which like in the original format is a list of lists of json elements / python dictinaries. 
    """
    mapping = {}
    with open(meta, "r") as f:
        annotations_data = [json.loads(line) for line in f.readlines()]
        for file in files:
            for annotation in annotations_data:
                if annotation["image_id"] == file[:-4]:
                    mapping[file] = annotation["annotations"]
                    break
    
    return mapping
                    
def img2array(file, directory = images_dir, rescale = rescale_input_to):
    """ Takes a filename of an image in a directory and returns an numpy array 
        corresponding to the image.
    """
    img = Image.open(directory+file)
    
    if rescale != None:
        img = img.resize((rescale, rescale))
    
    img_np = np.array(img)
    
    return img_np

def grid(height = 2048, width = 2048, rescale = rescale_output_to):
    """ Creates a grid (an array of coordinates) to be used in polygon2array 
        to speed things up.
    """
    
    if rescale != None:
        height = rescale
        width = rescale
        
    grid = np.array([[[h,w] for h in list(range(height))] for w in list(range(width))]).reshape(height*width, 2)
    
    return grid

def polygon2array(file, mapping, grid, height = 2048, width = 2048, rescale = rescale_output_to):
    """ Builds a matrix of 0s and 1s representing the character polygons as 
        defined by the coordinates of in the CTW annotations. 
    """
    
    # This takes long time :(
    
    polygons = []
    for block in mapping[file]: # mapping maps files with their annotations
        for character in block:
            if character["is_chinese"] == True:
                polygons.append(character["polygon"])
    #print("found all polygons")
    
    if rescale != None:
        rescale_by = rescale / height # must come first
        height = rescale
        width = rescale
        polygons = [[[point * rescale_by for point in points] for points in set_of_points] for set_of_points in polygons]
    
    #print("start creation grid")
    
    every_point = grid.copy()
    
    #print("created every point")
    zeros_to_update = np.zeros(height * width)
    
    #print("start iteration over polygons")
    for polygon in polygons:
        path = mplpath.Path(np.array(polygon)) # can this be "sent" to Path in one go
        #print("created path")
        hits = np.asarray(path.contains_points(every_point), int)
        #print("identified hits")
        zeros_to_update += hits
        #print("updated zero_matrix")
        #print("one polgon down")
    
    matrix = zeros_to_update.reshape(height, width)
    
    #print(np.sum(matrix))
    
    return matrix

def data_builder(files, directory, mapping):
    """ Compiles the dataset for use. Returns a list of dictionaties, such that each
        element of the list contains:
        -  the filename; key: "file"
        -  a vectorized instance of the training data; key: "img_vector"
        -  a vectorized instance of the labels, or targets (a vector of 0s and 1s indicating 
           boxes of characters in images); key: "label"
        
        Note: (1) the format of instances (training input and targets) are numpy arrays; and
        (2) the instances have "matrix shape". For these reasons, the output of the data_builder()
        requires further processing for it to be ready for pytorch processing. The functions 
        standardizer() and numpy2torch() is does required further down the pipline of data
        preprocessing.
    """
    t1 = time.perf_counter()
    my_grid = grid()
    data = []
    i=1 # for printing out progress
    for file in files: 
        #print("NEW FILE")
        instance = {}
        instance["file"] = file
        instance["img_vector"] = img2array(file)
        #print("img2array done")
        #instance["label"] = bbox2array(file, mapping)
        instance["label"] = polygon2array(file, mapping, grid = my_grid)
        #print("polygon2array done")
        data.append(instance)
        print("{}% done.".format(round((i/len(files))*100, 1)), end="\r")
        i+=1
        
        #break
    
    t2 = time.perf_counter()
    passed_time = t2 - t1
    print("Done! ({} m., {} s.)".format(int(passed_time/60), int(passed_time%60)))
    return data

def standardizer(dataset, scaler = StandardScaler()):
    """ Standardizes the image vectors of a dataset to z-scores using StandardScaler() 
        from the library sklearn.preprocessing. 
    """
    
    std_data = []
    #N = len(dataset)
    example = dataset[0]["img_vector"]
    x, y, z = example.shape
    n_features = example.size # ... or x * y * z
    
    for instance in dataset:
        std_data.append(instance["img_vector"].reshape(n_features))
    
    scaled_data = scaler.fit_transform(std_data)
    
    for i, scaled_ins in enumerate(scaled_data):
        dataset[i]["img_vector"] = scaled_ins.reshape(x, y, z)

def numpy2torch(dataset, device = device, permute = True):
    """ For a dataset, transforms its numpy arrays to torch tensors. If permute = True,
        image vectors are permuted such that ... 
    """
    
    #print("Shape of vector before: ", dataset[0]["img_vector"].shape)
    
    for instance in dataset:
        if permute == True: # ... hmmm 
            instance["img_vector"] = torch.tensor(instance["img_vector"], dtype=torch.float, device = device).permute(2,0,1)
        else:
            instance["img_vector"] = torch.tensor(instance["img_vector"], dtype=torch.float, device = device)
        instance["label"] = torch.tensor(instance["label"], dtype=torch.float, device = device)
    
    #print("Shape of vector after: ", dataset[0]["img_vector"].shape)


### Calling functions: creating the overall dataset

In [4]:
files = only_train()

Potential files:  1000
Kept files:  845


In [5]:
len(files)

845

In [6]:
files = shorty(files)



In [7]:
len(files)

30

In [8]:
mapping = CTW_mapper(files)

In [9]:
my_data = data_builder(files, images_dir, mapping)

Done! (2 m., 26 s.)


**Save & Load** (problems)

In [None]:
my_data.to_pickle(shortcut_to_prepared_data) #this creates monsters!!!!

In [None]:
my_data = pd.read_pickle(shortcut_to_prepared_data)

In [None]:
# back to dict?
my_data = my_data.to_dict("records")

-------

In [10]:
standardizer(my_data)

In [11]:
numpy2torch(my_data)

### Splitting the dataset into train and test sets

In [12]:
def split(data       = my_data, 
          train_prop = train_proportion, 
          val_prop   = None):
    """ Splits a dataset into training data, testing data and, if selected,
        validation data. Note that the proportions of training data, test data
        and validation data (optional) must not exceed 100%. 
    """
    
    if val_prop != None:
        train_to_idx = int(len(data) * train_prop)
        val_to_idx   = int(len(data) * val_prop) + train_to_idx
        train = data[:train_to_idx]
        val   = data[train_to_idx:val_to_idx]
        test  = data[val_to_idx:]
        return train, val, test
    else:
        train_to_idx = int(len(data) * train_prop)
        train = data[:train_to_idx]
        test  = data[train_to_idx:]
        return train, test

In [13]:
train_set, test_set = split() # there is no validation set used below... 

### Defining a dataloader
In training, the function `dataloader` will be called with the `train_set` as argument in every iteration (epoch) yielding  randomized and batched traing inputs.

In [14]:
def dataloader(data, batch_size):
    """ Takes a (proportion of) a dataset and returns a randomized iterator 
        of the data organized into batches as defined by batch_size.
        
        Note: the dataloader preserves the "matrix shape" of trainingdata and 
        targets. Since pytorch neural networks require "flat" shapes of data
        the function flat_batch() is used to let data flow trough training
        in the desired format. 
    """
    
    random.shuffle(data)
    
    for group in [data[i : i+batch_size] for i in range(0, len(data), batch_size)]:
        files = []
        img_vecs = []
        labels = []

        for instance in group:
            files.append(instance["file"])
            img_vecs.append(instance["img_vector"])
            labels.append(instance["label"])

        batch = {"file":files, 
                 "img_vector":img_vecs, 
                 "label":labels}
    yield batch

### Checking

In [58]:
my_data[0]["label"].float()

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], device='cuda:2')

## Part 2: the models

### General traing procedure

In [59]:
def flat_batch(batch):
    """Takes a python list of length B of more-than-one dimensional tensors (N, M, ...) and 
    returns a tensor of shape: (B, M*N*...)..."""
    
    return torch.stack([torch.flatten(instance) for instance in batch])

In [88]:
def trainer(model, # Must be an instance of a model!
            name_of_model,
            learning_rate,
            epochs,
            batch_size,
            train_data = train_set,
            val_data = None,
            save_model = False,
            directory = path_to_save_models,
            my_loss_function = nn.MSELoss,
            #my_loss_function = nn.BCELoss(),
            my_optimizer = optim.Adam
           ):
    """ Specifices a general training procedure for a model. 
        Note: trainer() requires an instantiated model as model argument. 
    """
    
    optimizer = my_optimizer(model.parameters(), lr=learning_rate)    
    
    #model = my_model
    model.to(device)
    model.train()
    
    loss_function = my_loss_function()
    
    #total_loss = 0
    
    for epoch in range(epochs):
        epoch_loss = 0
        
        iterator = dataloader(train_set, batch_size)
        for i, batch in enumerate(iterator):
            optimizer.zero_grad # reset gradients
            
            output = model(batch["img_vector"])
            targets = flat_batch(batch["label"])
            
            #print("output", output)
            #print("targets", targets)
            
            loss = loss_function(output, targets)
            
            #total_loss += loss.item()
            epoch_loss += loss.item()
            #print("Epoch: ", epoch+1, "Batch: ", i, "Total loss: ", total_loss/(i+1), end='\r')
            loss.backward() # compute gradients
            optimizer.step() # update parameters
            
            #break
            
        #print()
        print(f"Epoch: {epoch+1} (out of {epochs}); total loss: {epoch_loss}.")
            
        if val_data != None:
            model.eval()
            # HERE WE COULD DO SOMETHING
            model.train()
            
    if save_model == True:
        torch.save(model, directory+name_of_model+".pt")

### Model 1: Convolutional Model with Upsampling (CUP)

In [89]:
class CUP(nn.Module):
    def __init__(self, inp_height, inp_width, ch, outp):   
        super(CUP, self).__init__()
        
        self.height = inp_height
        self.width = inp_width
        self.channels = ch
        
        self.output = outp

        #self.compression = nn.MaxPool2d(16, 16)

        self.compression = nn.Sequential(      # convolution0, a major size reduction
            nn.Conv2d(in_channels = 3, 
                      out_channels = 3, 
                      kernel_size = 4, 
                      stride = 4, 
                      padding = 0),
            nn.BatchNorm2d(3),
            nn.ReLU(),
            nn.MaxPool2d(4, 4)
        )
        
        self.convolution1 = nn.Sequential(
            nn.Conv2d(in_channels = 3,     # 3 channels in (RGB
                      out_channels = self.channels[0], 
                      kernel_size = 4,     # window of 4x4
                      stride = 1, 
                      padding = "same"), 
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # 2x2 window with stride of 2
        )
        
        self.convolution2 = nn.Sequential(
            nn.Conv2d(in_channels = self.channels[0],     
                      out_channels = self.channels[1], 
                      kernel_size = 2,     
                      stride = 1, 
                      padding = "same"), 
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # 2x2 window with stride of 2
        )
        
        # [batch_n, 3, 2048, 2048] --> [batch_n, 3, 128, 128] (compression by convolution and max pooling)
        # [batch_n, 3, 128,  128]  --> [batch_n, 32, 64,  64]  (convolution1)
        # [batch_n, 32, 64,   64]  --> [batch_n, 64, 32,  32]  (convolution2)
        # 64*32*32 = 65536
        # 4194304 (i.e. target_size) / 65536 = 64 (i.e. the number that our tensor need to be upsampled by)
        # sqrt(64) = 8 (the upsampling will be applied to both heigt and weigth)
        
        self.up_factor = sqrt(self.output/(self.channels[1]*((self.height / (16*2*2))**2)))
        
        self.exit = nn.Sequential(
            nn.Dropout(0.40), #why 0.05? higher?
            nn.Upsample(scale_factor=self.up_factor, mode='nearest'), #decide on mode
            nn.Flatten(), # by default start_dim = 1
            nn.Sigmoid()
        )
        
    def forward(self, batch):
        if isinstance(batch, list):    # this is admittedly a bit ad hoc, but it handles the format of the batched training data and the non-batched evaluation data; and it works ;)
            batch = torch.stack(batch) 
        else:
            batch = torch.stack([batch])
        
        #print(batch.shape)
        features0 = self.compression(batch)
        print("1", features0)
        
        features1 = self.convolution1(features0)
        print("2", features1)
        #print("1", features1.shape)
        
        features2 = self.convolution2(features1)
        print("3", features2)
        
        #print("2", features2.shape)
        #print("factor", self.up_factor)
        
        output = self.exit(features2)
        print("4",  output)
        
        return output

#### Training and saving model

In [90]:
shape_of_input = my_data[0]["img_vector"].shape
height = shape_of_input[1]
width = shape_of_input[2]
shape_of_output = my_data[0]["label"].shape
output_size = shape_of_output[0] * shape_of_output[1]

# print(shape_of_input)
# print(shape_of_output)
print(output_size)

my_cup_model = CUP(inp_height = height, 
                   inp_width = width, 
                   ch = (32,64), 
                   outp = output_size).to(device)


4194304


In [91]:
my_cup_model.parameters

<bound method Module.parameters of CUP(
  (compression): Sequential(
    (0): Conv2d(3, 3, kernel_size=(4, 4), stride=(4, 4))
    (1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  )
  (convolution1): Sequential(
    (0): Conv2d(3, 32, kernel_size=(4, 4), stride=(1, 1), padding=same)
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (convolution2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(2, 2), stride=(1, 1), padding=same)
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (exit): Sequential(
    (0): Dropout(p=0.4, inplace=False)
    (1): Upsample(scale_factor=8.0, mode=nearest)
    (2): Flatten(start_dim=1, end_dim=-1)
    (3): Sigmoid()
  )
)>

In [92]:
nepochs = 1
batchsz = 1
cup_name = "CUP_{}e{}b".format(nepochs, batchsz)

trainer(my_cup_model, # Must be an instance of a model!
        cup_name,
        learning_rate=0.001,
        epochs=nepochs,
        batch_size=batchsz,
        save_model = True)

1 tensor([[[[0.0000, 0.0000, 0.0000,  ..., 0.5973, 0.0079, 0.0688],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.1350],
          [0.0000, 0.0000, 0.0000,  ..., 0.2225, 0.3108, 0.1605],
          ...,
          [0.1036, 0.0422, 0.0000,  ..., 0.0684, 0.1780, 0.3717],
          [0.0000, 0.2635, 0.0911,  ..., 0.4317, 0.2063, 0.1420],
          [0.7558, 0.0000, 0.6394,  ..., 0.0000, 0.2237, 0.1454]],

         [[1.3971, 1.4872, 1.3609,  ..., 0.8240, 0.3259, 0.3149],
          [1.3441, 1.2867, 1.1912,  ..., 0.4188, 0.5639, 0.3672],
          [1.3448, 1.3178, 1.2349,  ..., 0.1534, 0.1290, 0.2156],
          ...,
          [1.3589, 0.9749, 1.0739,  ..., 0.6085, 0.3406, 0.2901],
          [0.8107, 1.2770, 0.9804,  ..., 0.6201, 0.2643, 0.2418],
          [0.9778, 0.9181, 0.9444,  ..., 0.7272, 0.7275, 0.8081]],

         [[2.4745, 2.3867, 2.1520,  ..., 1.7464, 0.9419, 0.4464],
          [2.4512, 2.1562, 1.9072,  ..., 0.9688, 1.0211, 0.6393],
          [2.0859, 2.2063, 1.9642,  ...,

## Part 3: testing and evaluation

### Evaluation Metrics

Two basic types of evaluation metrics are considered:

1. The "continious" ("analog") metric of *mean squared error*.
2. Threshold-based ("dialog", frequency-based) metrics, assuming a treshold *t* for a classfier *C*, such that for every pixel *x*, if the probaility predicted for *x* (i.e. *p(x)*) is greater than *t*, then *C(x)* = 1, if not, *C(x)* = 0. Represented by a threhold-classification, true positives (TP), false positives (FP), true negatives (TN) and false neagtives (FN) can be calculated and therfore also standard measures of *accuracy*, *recall*, *precision* and *F1*. 

Both types of metrics (analog and digital) can be measured for the model's performance on *individual* images. However, general measures of the model's performance on the *complete* test set must be considered. For this, two approaches are used:

*    A pooled approach: the evaluation metrics are calculated for the concatenation of predictions for every image of the test set in relation to the concatenation of every true label (pixel map of polygon boxes). 

`Metric([PredictionImage-1 + ... + PredictionImage-n], [TruthImage-1 + ... + TruthImage-n])` (where `+` here stands for concatenation, not addition). 
*    An averaging approach: taking the mean and standard deviation of a particular metric calculated for individual images 

`Mean([Metric(image-1), ..., Metric(image-n)])` 



    

In [70]:
def thld_metrics(tp, fp, tn, fn):
    """ Calculates Accuracy, Recall, Precision, and F1 from frequencies of 
        true postives (tp), false postives (fp), true negatives (tn), and
        false neagtives (fn).
    """
    accuracy = (tp + tn) / (tp + fp + tn + fn)
    recall = tp / (tp + fn)
    precision = tp / (tp + fp)
    f1 = (2 * recall * precision) / (recall + precision)
    return accuracy, recall, precision, f1

In [71]:
def mean(array):
    """ Calculates the mean and standard deviation of an aray of numbers.
    """
#     print(array)
    mean = np.mean(array)
    std  = np.std(array)
    return mean, std

In [72]:
class Evaluation:
    """ For storing and handling information from the evaluation of models.
    """
    
    def __init__(self, name):
        self.name = name
        self.pooled_mse = "Not yet defined"
        self.pooled_acc = "Not yet defined"
        self.pooled_rec = "Not yet defined"
        self.pooled_prc = "Not yet defined"
        self.pooled_f1  = "Not yet defined"
        self.mean_mse = ("Not yet defined", "Not yet defined")
        self.mean_acc = ("Not yet defined", "Not yet defined")
        self.mean_rec = ("Not yet defined", "Not yet defined")
        self.mean_prc = ("Not yet defined", "Not yet defined")
        self.mean_f1  = ("Not yet defined", "Not yet defined")
        self.metrics_dict = {"mse": ["Not yet defined", "Not yet defined"], "accuracy": ["Not yet defined", "Not yet defined"], "recall": ["Not yet defined", "Not yet defined"], "precision": ["Not yet defined", "Not yet defined"], "f1": ["Not yet defined", "Not yet defined"]}

    def best_case(self, metric):
        """ Returns the file which has the best performance score with respect 
            to a metric.
        """
        m_list = self.metrics_dict[metric]
        m_list.sort(key=operator.itemgetter(1), reverse=True)
        return m_list[0][0]
    
    def best_cases(self, metric, n):
        """ Returns a list of the N files which has the best performance score 
            with respect to a metric.
        """
        m_list = self.metrics_dict[metric]
        m_list.sort(key=operator.itemgetter(1), reverse=True)
        files, values = zip(*m_list)
        return list(files[:n])
    
    def worst_case(self, metric):
        """ Returns the file which has the best performance score with respect 
            to a metric.
        """
        m_list = self.metrics_dict[metric]
        m_list.sort(key=operator.itemgetter(1), reverse=False)
        return m_list[0][0]

    def worst_cases(self, metric, n):
        """ Returns a list of the N files which has the best performance score 
            with respect to a metric.
        """
        m_list = self.metrics_dict[metric]
        m_list.sort(key=operator.itemgetter(1), reverse=False)
        files, values = zip(*m_list)
        return list(files[:n])
    
    def compare(self, other_model):
        """ Compares the evaluation of one model with another.
        """
        pass
    
    def summary(self):
        """ Summarises an evaluation. Returns string."""
        summary  = "\n".join([f"Model {self.name} performs as follows:", 
                      f"Pooled MSE: {self.pooled_mse}",
                      f"Pooled Accuracy: {self.pooled_acc}",
                      f"Pooled Recall: {self.pooled_rec}",
                      f"Pooled Precision: {self.pooled_prc}",
                      f"Pooled F1: {self.pooled_f1}",
                      f"Mean MSE: {self.mean_mse[0]} (std = {self.mean_mse[1]})",
                      f"Mean Accuracy: {self.mean_acc[0]} (std = {self.mean_acc[1]})",
                      f"Mean Recall: {self.mean_rec[0]} (std = {self.mean_rec[1]})",
                      f"Mean Precision: {self.mean_prc[0]} (std = {self.mean_prc[1]})",
                      f"Mean F1: {self.mean_f1[0]} (std = {self.mean_f1[1]})"]) 
        return summary
    
    def save(self, directory=path_to_save_evaluations):
        """ Writes the summary of an evaluation to a text file (at some diectory)."""
        
        summary = self.summary()
        with open(directory+self.name, "w") as e:
            e.write(summary)
    
    def print_summary(self):
        """ Prints out the summary of an evaluation.
        """
        summary = self.summary()
        print(summary)
    

### Testing: setting hyperparameters

In [None]:
batch?
stride?
iterations?
window?

### Evaluation: performance of best models

In [79]:
def evaluator(model, name, test_data = test_set, threshold = 0.5):
    """ Defines a general pipeline for evaluation by evaluation metrics Mean Squared Error (MSE),
        Accuracy, Recall, Precison and F1. The evaluator() function implements the Evaluation class
        to store and handle informaion from the evaluation. For each metric, there is a value 
        calculated for the test data as a whole ("pooled") and an average value calculated over the 
        set of values calculated for each image individually. Besides an instance of the Evaluation 
        class, evaluator() returns a mapping between files and the predicted outcome for that file.
    """
    t1 = time.perf_counter()
    
    model.eval()
    
    evaluation = Evaluation(name)
    
    prediction_pooled = []
    truth_pooled = [] 
    thld_frequencies_pooled = {"tp": 0, "fp": 0, "tn": 0, "fn": 0}
    thld_metrics_calc = {"mse": [], "accuracy": [], "recall": [], "precision": [], "f1": []}

    i=1 # in order to print out progress
    for instance in test_data:
        
        t=time.perf_counter()
        print(f"starting with first evaluation ({int(t-t1)} s.)")
        
        prediction = torch.flatten(model(instance["img_vector"])) # due to design of model
        print("prediction", prediction)
        print("sum of this prediction", torch.sum(prediction))
        #print("pred shape", prediction.shape)
        truth = torch.flatten(instance["label"]).int()
        file = instance["file"]

        t=time.perf_counter()
        print(f"having output from model ({int(t-t1)} s.)")
        
        prediction_pooled.append( (file, prediction) )
        truth_pooled.append(truth)        
        
        mse = F.mse_loss(prediction, truth)
        
        t=time.perf_counter()
        print(f"starting threshold stuff ({int(t-t1)} s.)")        
        
        roundof = (prediction >= threshold).int()
        print("sum of roundof", torch.sum(roundof))
        tp = sum(roundof * truth)
        fp = sum(roundof * (~truth.bool()))
        tn = sum((~roundof.bool()) * (~truth.bool()))
        fn = sum((~roundof.bool()) * truth)
        
        t=time.perf_counter()
        print(f"tp, fp, tn, fn calculated ({int(t-t1)} s.)")           
        
        accuracy, recall, precision, f1 = thld_metrics(tp, fp, tn, fn)
        
        for key, value in zip(["tp", "fp", "tn", "fn"], [tp, fp, tn, fn]):
            thld_frequencies_pooled[key]+=value
        
        for key, value in zip(["mse", "accuracy", "recall", "precision", "f1"], 
                              [mse.item(), accuracy.item(), recall.item(), precision.item(), f1.item()]):
            thld_metrics_calc[key].append( (file, value) )
            
        t=time.perf_counter()
        print(f"lists are updated ({int(t-t1)} s.)")  
        
        print("({}%)".format(round((i/len(test_data)*100), 1)), end="\r")
        i+=1
        
        #print("TP", tp, "FP", fp, "TN", tn, "FN", fn)
        break
    
    pooled_accuracy, pooled_recall, pooled_precision, pooled_f1 = thld_metrics(
        thld_frequencies_pooled["tp"], 
        thld_frequencies_pooled["fp"], 
        thld_frequencies_pooled["tn"], 
        thld_frequencies_pooled["fn"])
    
    file, predictions = zip(*prediction_pooled)
    
    evaluation.pooled_mse = F.mse_loss(torch.flatten(torch.stack(list(predictions))), 
                                       torch.flatten(torch.stack(truth_pooled))).item()
    evaluation.pooled_acc = pooled_accuracy
    evaluation.pooled_rec = pooled_recall
    evaluation.pooled_prc = pooled_precision
    evaluation.pooled_f1  = pooled_f1
    
    # The code below is a bit nested. What it does in plain English is:
    # go to the dictionary where we keep all the performance scores with respect to 
    # each file. Every key (i.e. metric) of that dict maps to a list of tupples of 
    # file and value of the metric. Here, we "unzip" that list of tupples and calculate the 
    # mean (and standard deviation) for the values and use that mean (and std) to define 
    # the respective attributes of the Evaluation class instance.
    
#     print("MSE", thld_metrics_calc["mse"])
#     print("ACC", thld_metrics_calc["accuracy"])
#     print("REC", thld_metrics_calc["recall"])
#     print("PRE", thld_metrics_calc["precision"])
#     print("F1", thld_metrics_calc["f1"])
    
    evaluation.mean_mse = mean(list(zip(*thld_metrics_calc["mse"]))[1]) 
    evaluation.mean_acc = mean(list(zip(*thld_metrics_calc["accuracy"]))[1]) 
    evaluation.mean_rec = mean(list(zip(*thld_metrics_calc["recall"]))[1]) 
    evaluation.mean_prc = mean(list(zip(*thld_metrics_calc["precision"]))[1]) 
    evaluation.mean_f1  = mean(list(zip(*thld_metrics_calc["f1"]))[1]) 
    
    evaluation.metrics_dict = thld_metrics_calc
    
    t2 = time.perf_counter()
    passed_time = t2 - t1
    print("Done! ({} m., {} s.)".format(int(passed_time/60), int(passed_time%60)))
    
    return evaluation, dict(prediction_pooled)


#### CUP model

In [80]:
evaluation_cup, file2pred_map = evaluator(my_cup_model, cup_name)

starting with first evaluation (0 s.)
prediction tensor([0.5000, 0.5000, 0.5000,  ..., 0.5000, 0.5000, 0.5000], device='cuda:2',
       grad_fn=<ViewBackward>)
sum of this prediction tensor(2097152., device='cuda:2', grad_fn=<SumBackward0>)
having output from model (0 s.)
starting threshold stuff (0 s.)
sum of roundof tensor(4194304, device='cuda:2')


KeyboardInterrupt: 

In [None]:
evaluation_cup.print_summary()

In [None]:
my_best_file = evaluation_cup.best_case("f1")

In [None]:
my_worst_file = evaluation_cup.worst_case("f1")

In [None]:
evaluation_cup.save()

### Visualizations

In [None]:
def visualize(file, 
              file2prediction, 
              file2annotation, # the mapping of files with their annotations was defined above
              height = 2048, 
              width = 2048, 
              rescale = rescale_output_to,
              img_dir = images_dir, 
              colorscheme = "Reds", 
              alpha_value = 0.3):
    """ Visualizes the performance of a model on a particular image.
    """
    
    # FIRST, collect elements of figure
    img = Image.open(img_dir+file)
    if rescale != None:
        img = img.resize((rescale, rescale))
    
    if rescale != None:
        rescale_by = rescale / height # must come first
        height = rescale
        width = rescale

    polygons = []
    for block in file2annotation[file]:  
        for character in block:
            if character["is_chinese"] == True:
                polygons.append(character["polygon"])
    
    if rescale != None:
        polygons = [[[point * rescale_by for point in points] for points in set_of_points] for set_of_points in polygons]
    
    cpu_tensor = file2prediction[file].cpu().detach().numpy()
    heat = cpu_tensor.reshape(height, width)
    #heat = file2prediction[file].reshape(height, width).detach().numpy()
    
    # SECOND, arrange and plot elements in 2 x 2 subplots
    
    %matplotlib inline
    
    figure = plt.figure(figsize=(16, 16))
    
    #Up-Left
    axUL = figure.add_subplot(221)
    axUL.set_title("True polygon box on image (Target)")
    axUL.imshow(img)
    for polygon in polygons:
        polly = Polygon(polygon, fill = False, color = (0, 1, 0)) # Recall that Polygon is a method of matplotlib.patches
        axUL.add_patch(polly)

    #Up-Right
    axUR = figure.add_subplot(222)
    axUR.set_title("Predicted heatmap on image (Result)")
    axUR.imshow(img)
    axUR.imshow(heat, cmap = colorscheme, alpha = alpha_value)

    #Down-Left
    axDL = figure.add_subplot(223)
    axDL.set_title("Image, Target and Result")
    axDL.imshow(img)
    axDL.imshow(heat, cmap = colorscheme, alpha = alpha_value)
    for polygon in polygons:
        polly = Polygon(polygon, fill = False, color = (0, 1, 0))
        axDL.add_patch(polly)

    #Down-Right
    axDR = figure.add_subplot(224)
    axDR.set_title("True box on predicted heatmap (Abstraction)")
    axDR.imshow(heat, cmap = colorscheme, alpha = alpha_value)
    for polygon in polygons:
        polly = Polygon(polygon, fill = False, color = (0, 1, 0))
        axDR.add_patch(polly)

    plt.show()


In [None]:
visualize(my_best_file, file2pred_map, mapping)

In [None]:
visualize(my_worst_file, file2pred_map, mapping)