# Chata

WARNING: maskrcnn_benchmark.config load global variables - in case of RuntimeError: Error(s) in loading state_dict for GeneralizedRCNN: size mismatch (...) try restart kernel

In [1]:
model_type = "tables"
model_index = f"2019_06_06_{model_type}"
#model_output_dir = "./output/chata/2019_05_15_charts" # directory with model checkpoints
model_output_dir = f"./output/chata/2019_05_15_{model_type}" # directory with model checkpoints
#model_output_dir = "./"

if model_type=="charts":
    config_file_type = "chata"
    correct_label = 2
elif model_type=="tables":
    config_file_type = "tables"
    correct_label = 1

In [2]:
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab

import importlib
import requests
from io import BytesIO
from PIL import Image
import numpy as np
from demo.predictor_chata import ChataDemo
from demo.predictor import COCODemo

import maskrcnn_benchmark.config as config
import cv2 as cv

In [3]:
# this makes our figures bigger
pylab.rcParams['figure.figsize'] = 20, 12

In [4]:
def imshow(img):
    plt.imshow(img[:, :, [2, 1, 0]])
    plt.axis("off")

In [5]:
def load(url):
    """
    Given an url of an image, downloads the image and
    returns a PIL image
    """
    response = requests.get(url)
    pil_image = Image.open(BytesIO(response.content)).convert("RGB")
    # convert to BGR format
    image = np.array(pil_image)[:, :, [2, 1, 0]]
    return image

def imshow(img):
    plt.imshow(img[:, :, [2, 1, 0]])
    plt.axis("off")

In [6]:
# this makes our figures bigger
pylab.rcParams['figure.figsize'] = 20, 12

# Chata

In [7]:
from maskrcnn_benchmark.data.datasets.chata import ChataDataset, TablesDataset
import imageio as imio
import os

In [8]:
# "train", "test", "val"
if model_type=="charts":
    dataset = ChataDataset("./datasets/chata/chata", "val")
elif model_type=="tables":
    dataset = TablesDataset("./datasets/chata/tables", "val")

In [9]:
def apply_filter(img):
    img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
    #plt.imshow(img, cmap='gray', vmin=0, vmax=255)
        
    threshold, img = cv.threshold(img, 0, 255, cv.THRESH_OTSU)
    #plt.imshow(img, cmap='gray', vmin=0, vmax=255)
        
    img = cv.distanceTransform(img, cv.DIST_L1, 0)
    #plt.imshow(img, cmap='gray', vmin=0, vmax=255)
        
    img = cv.cvtColor(img, cv.COLOR_GRAY2BGR)
    img = img.astype(np.uint8)
    return img

In [10]:
def predict(model_index, chata_demo, dataset, dataset_type="val", img_range=(0,5)):
    
    os.makedirs(f"chata_results/{model_index}_samples", exist_ok=True)
    predictions = []
    groundtruth = []
    labels = []
    for i in range(*img_range):
        test_img = np.array(dataset.__getitem__(i)[0])
        result, top_predictions = chata_demo.run_on_opencv_image(test_img)
        #imio.imwrite(f'chata_results/{model_index}_samples/{dataset_type}_{i:2d}.jpg', result)
        predictions.append(top_predictions)
        groundtruth.append(np.array(dataset.__getitem__(i)[1].bbox)[0])
        labels.append(dataset.__getitem__(0)[1].get_field("labels"))
        
    return predictions, groundtruth, labels

In [11]:
config_file = f"./configs/chata/e2e_faster_rcnn_R_50_C4_1x_1_gpu_voc_{config_file_type}.yaml"
config.cfg.merge_from_file(config_file)
config.cfg.merge_from_list(["MODEL.DEVICE", "cpu"])
config.cfg.merge_from_list(["OUTPUT_DIR", model_output_dir])

chata_demo = ChataDemo(
    config.cfg,
    model_type,
    min_image_size=800,
    confidence_threshold=0.5,
)

Loading checkpoint from ./output/chata/2019_05_15_tables/model_final.pth
[Checkpointer._load_file] Model loading: ./output/chata/2019_05_15_tables/model_final.pth


In [12]:
len(dataset.ids)

158

In [13]:
predictions, groundtruth, labels = predict(model_index, chata_demo, dataset, "val", (0,20))

In [14]:
#(0,0) = top_left_corner
#xyxy - top_left, bottom_right 
def get_box_area(box):
    y_diff = box[3]-box[1]
    x_diff = box[2]-box[0]
    if y_diff<=0 or x_diff<=0:
        return 0
    
    return x_diff * y_diff

In [15]:
assert get_box_area([2,1,4,3]) == 4
assert get_box_area([1,2,3,4]) == 4

In [16]:
def get_boxes_common_area(box1, box2):
    x1 = np.maximum(box1[0], box2[0])
    y1 = np.maximum(box1[1], box2[1])
    x2 = np.minimum(box1[2], box2[2])
    y2 = np.minimum(box1[3], box2[3])
    return get_box_area([x1,y1,x2,y2])

In [17]:
assert get_boxes_common_area([2,1,4,3], [1,2,3,4]) == 1
assert get_boxes_common_area([2,1,4,3], [0,0,4,4]) == 4
assert get_boxes_common_area([0,0,1,1], [1,1,2,2]) == 0
assert get_boxes_common_area([0,0,1,1], [6,9,9,15]) == 0

In [18]:
#(0,0) = top_left_corner
#xyxy - top_left, bottom_right 
def IoU(box, box_pred):
    common_area = get_boxes_common_area(box, box_pred)
    box_area = get_box_area(box)
    box_pred_area = get_box_area(box_pred)
    
    return common_area / (box_area+box_pred_area-common_area)

In [19]:
iou = []
for i in range(len(predictions)):
    image_predictions = predictions[i]
    box = groundtruth[i]
    label = labels[i]
    if label!=correct_label:
        continue
    if len(image_predictions.bbox) == 0:
        iou.append(-1)
    else:
        label = image_predictions.get_field("labels")
        score = image_predictions.get_field("scores")
        pred_box = image_predictions.bbox[0]
        
        if label[0]==correct_label:
            result = IoU(box, np.array(pred_box))
            iou.append(result)
        else:
            iou.append(-2)

In [20]:
iou = np.array(iou)

In [21]:
# iou array
# >0 - iou score
# (-1) - no prediction
# (-2) - predicted other object

In [22]:
iou

array([ 0.79812461,  0.46922365,  0.33291793, -1.        , -1.        ,
        0.89150119,  0.70695841,  0.82276833,  0.57117981, -1.        ,
        0.77590674,  0.34555846,  0.13867702, -1.        , -1.        ,
        0.76271766,  0.90269619,  0.78698009,  0.88211757,  0.35490149])

### Precision + recall

In [23]:
def calculate_metrics(results):
    for threshold in [0.6,0.7,0.8,0.9]:
        precision = np.sum(results>=threshold) / np.sum(results>=0)
        recall = np.sum(results>=threshold) / len(results)
        f1_score = 2*precision*recall/(precision+recall)
        print(f"Threshold={threshold:.2f}; Precision={precision:.3f}; Recall={recall:.3f}; F1 score: {f1_score:.3f}")

In [24]:
calculate_metrics(np.array([i/10 for i in range(0,11)]))

Threshold=0.60; Precision=0.455; Recall=0.455; F1 score: 0.455
Threshold=0.70; Precision=0.364; Recall=0.364; F1 score: 0.364
Threshold=0.80; Precision=0.273; Recall=0.273; F1 score: 0.273
Threshold=0.90; Precision=0.182; Recall=0.182; F1 score: 0.182


In [25]:
calculate_metrics(np.array([i/10 for i in range(0,11)] + [-1,-2,-1,-2,-1]))

Threshold=0.60; Precision=0.455; Recall=0.312; F1 score: 0.370
Threshold=0.70; Precision=0.364; Recall=0.250; F1 score: 0.296
Threshold=0.80; Precision=0.273; Recall=0.188; F1 score: 0.222
Threshold=0.90; Precision=0.182; Recall=0.125; F1 score: 0.148


In [26]:
calculate_metrics(iou)

Threshold=0.60; Precision=0.600; Recall=0.450; F1 score: 0.514
Threshold=0.70; Precision=0.600; Recall=0.450; F1 score: 0.514
Threshold=0.80; Precision=0.267; Recall=0.200; F1 score: 0.229
Threshold=0.90; Precision=0.067; Recall=0.050; F1 score: 0.057


### Filter example

In [None]:
# Example
img = cv.imread('cTDaR_t10260.png')
img = apply_filter(img)
imshow(img)

In [None]:
img = cv.imread('cTDaR_t10260.png')
prediction = chata_demo.run_on_opencv_image(img)
imshow(prediction)

# Training loss

In [14]:
import re

In [26]:
def print_loss(date, dataset_type="val", log_path=None):
    os.makedirs(f"./output/chata/{model_index}", exist_ok=True)
    os.makedirs(f"./chata_results/{model_index}", exist_ok=True)
    if log_path is None:
        log_path = f"./output/chata/{model_index}/log.txt"
    with open(log_path) as file:
        logs = file.read()

        for loss_type in ["loss", "loss_classifier", "loss_box_reg", "loss_objectness", "loss_rpn_box_reg"]:
            loss = re.findall(f"{loss_type}: [0-9]*.[0-9]* \([0-9]*.[0-9]*", logs)
            loss = [x[len(loss_type)+2:] for x in loss]
            
            iterations = re.findall(f"iter: [0-9]*", logs)
            iterations = [int(x[6:]) for x in iterations]
            
            plt.plot(iterations, [float(x.split("(")[0]) for x in loss])
            plt.plot(iterations, [float(x.split("(")[1]) for x in loss])
            plt.title(loss_type.upper())
            plt.xlabel('Iteration', fontsize=14)
            plt.ylabel('Loss value', fontsize=14)
            plt.savefig(f"./chata_results/{date}/{dataset_type}_{loss_type}.png")
            plt.clf()

In [36]:
print_loss(model_index, "val", "./log.txt")

<Figure size 1440x864 with 0 Axes>

# Test on real dataset

In [13]:
import cv2

In [14]:
catalog_names = os.listdir("chata_results/images")
catalogs = [f"chata_results/images/{name}" for name in catalog_names]

In [15]:
for catalog in catalogs:
    os.makedirs(f'{catalog}/{model_index}', exist_ok=True)
    for img_name in os.listdir(catalog):
        if "png" in img_name:
            test_img = cv2.imread(f"{catalog}/{img_name}")
            test_img = cv2.cvtColor(test_img, cv2.COLOR_BGR2RGB)
            #imshow(test_img)
            prediction = chata_demo.run_on_opencv_image(test_img)
            imio.imwrite(f'{catalog}/{model_index}/{img_name}', prediction)

Labels: {}
Labels: {'table': 0.5912390351295471}
Labels: {}
Labels: {'table': 0.7189908027648926}
Labels: {}
Labels: {'table': 0.9999996423721313}
Labels: {'table': 0.99920254945755}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {'table': 0.68449467420578}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {'table': 0.9499477744102478}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {'table': 0.9999972581863403}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {'table': 0.9999815225601196}
Labels: {}
Labels: {'table': 0.7795243859291077}
Labels: {}
Labels: {}
Labels: {}
Labels: {}
Labels: {'table': 0.9999988079071045}
Labels: {}
Labels: {'table': 0.5679640173912048}
Lab