In [1]:
# auto-load when code changes outside
%load_ext autoreload
%autoreload 2

import sys
sys.path.insert(0,'..')


In [2]:
import streamlit as st
from copy import deepcopy as dc
import pandas as pd
from pathlib import Path
from src.label_checker_automata import LabelCheckerAutomata
from src.caption import Caption
import src.utils as utils

2022-03-14 23:07:37.735 INFO    numexpr.utils: NumExpr defaulting to 8 threads.


## Load data and all labels

In [3]:
data_dir = Path("../data/cropped_coas/out")

labels = []

for image_fn in data_dir.iterdir():
    if image_fn.suffix == ".jpg" and not image_fn.name.startswith("."):
        labels.append("_".join(image_fn.stem.split("_")[1:]))

df = pd.DataFrame.from_dict({
    "label": labels,
})
df

Unnamed: 0,label
0,G A per chevron
1,S O 3 crosses formy
2,A S crampon per fess
3,G A cross
4,A G fleur-de-lis
...,...
3060,A G qtly
3061,O GA lion _ label
3062,G plain
3063,A G saltire engrailed


# Duplicates in labels: Histogram of labels

In [17]:
import numpy as np
elems, count = np.unique(labels, return_counts=True)
elems, count

(array([' 10 lozenges_ chief bendy_ =_ =  {GA, OX-AG}',
        ' 2 barbels addorsed, crusily_ 3 fleurs-de-lis _ border_ =_ = _E_ bend ch. 3 eagles   {BOO, BOG, OGA}',
        ' 2 bars_ 3 bends_ =_ =  {GO, BA}', ..., 'Z GO bendy _ label',
        'Z GO chief ch. lion isst', 'Z GO lion naissant'], dtype='<U147'),
 array([1, 1, 1, ..., 1, 2, 1]))

In [20]:
output = []
for l, n in zip(elems, count):
    if n > 1:
        print(l)  
        
# check the folder and see. Many images for the same label however they are different. There are wrong labeled data in the ground truth!

 3 fleurs-de-lis_ 3 lions passt guard_ =_ =  {BO, GO}
 3 fleurs-de-lis_ 3 lions passt guard_ =_ = _ book  {BO, GO_ A}
 3 fleurs-de-lis_ lion   {BO, GO}
 blank shield
 castle_ lion_ =_ =  {GO, AP}
 ps[4 pales_ eagle_ =_ =]  {OG, AS}
A B 3 bars
A B 3 bends
A B barruly
A B barry
A B bend
A B bendy
A B checky
A B chief
A B fess
A B fess undy
A B lion cr.
A B lion rampant
A B lozengy
A B pallety
A B paly
A B qtly
A B vairy
A BG barry _ border
A G 2 bars
A G 2 lions rampant addorsed
A G 3 axes
A G 3 bars
A G 3 chevrons
A G 3 eagles
A G 3 escutcheons
A G 3 flanchis
A G 3 fleurs-de-lis p.n.
A G 3 lions
A G 3 roses
A G 3 roses in bend
A G 3 roundels
A G 3 waterlily leaves inv
A G barruly
A G barry
A G bend
A G bend of lozenges
A G bendy
A G bull_s attire
A G castle
A G checky
A G checky of 9
A G chevron
A G chief
A G chief indented
A G cinquefoil
A G crescent
A G cross
A G cross moline
A G eagle
A G eagle doubleheaded
A G fess
A G fess dancetty
A G fess of lozenges
A G fleur-de-lis
A G fretty
A

## Check if the labels match our automata 

In [32]:
captions_val = []
for l in labels:
    c = Caption(l, support_plural=True)
    if c.is_valid:
        captions_val.append(l)

print('Total number of labels:', len(labels))
print('Total number of valid labels in our automata:', len(captions_val))


Total number of labels: 3065
Total number of valid labels in our automata: 900


In [35]:
automata = LabelCheckerAutomata(support_plural=True)
validated_labels = automata.get_valid_labels(labels)

total = len(labels)
valid = len(validated_labels)
print('Total number of labels', total)
print('Valid labels', valid)
print('Invalid labels', total - valid)


Total number of labels 3065
Valid labels 900
Invalid labels 2165


In [36]:
# note that we have duplicate 362 labels!! using a dict vs list made that clear
900 - 538

362

# create simple automata

In [37]:
captions = [ Caption(l, support_plural=False) for l in labels ]
print(len(captions))
valid = [ 1 for c in captions if c.is_valid ]
sum(valid)

3065


661

In [39]:
simple_automata = LabelCheckerAutomata(support_plural=False)
validated_simple_labels = simple_automata.get_valid_labels(labels)
    
total = len(labels)
valid = len(validated_simple_labels)
print('Total number of labels', total)
print('Valid labels', valid)
print('Invalid labels', total - valid)


Total number of labels 3065
Valid labels 661
Invalid labels 2404


# get simple valid labels with lion

In [40]:
lion_labels = simple_automata.get_valid_labels_of(labels, 'lion')
print(len(lion_labels))
lion_labels

157


['O S lion rampant',
 'O B lion rampant',
 'G O lion passt guard',
 'A G lion rampant',
 'B A lion rampant',
 'O G lion rampant',
 'A B lion rampant',
 'B A lion cr.',
 'O B lion rampant',
 'A B lion rampant',
 'B A lion cr.',
 'S OA lion acc. bend',
 'B O lion rampant',
 'B O lion cr.',
 'G O lion rampant',
 'A G lion cr.',
 'B A lion rampant',
 'G X AS lion checky cr.',
 'A S lion rampant',
 'G A lion rampant',
 'A B lion cr.',
 'O SA chief ch. lion isst',
 'O X GB lion hooded',
 'G AS lion acc. bend',
 'A G lion rampant',
 'O GO lion guard ch. cross',
 'O B lion cr.',
 'G Z lion rampant',
 'A G lion rampant',
 'B A lion rampant',
 'O B lion cr.',
 'O BG fess acc. lion isst',
 'A S lion rampant',
 'B O lion rampant',
 'B O lion rampant',
 'G A lion rampant',
 'O SG lion acc. bend',
 'G O lion rampant',
 'O S lion rampant',
 'A GO chief ch. lion passt guard',
 'B A lion rampant',
 'B A lion cr.',
 'G A lion rampant',
 'A G lion rampant',
 'G E lion rampant',
 'O GB lion acc. bend',
 '

In [41]:
# checking the Armoria API manually
    
LION_MODIFIERS_MAP = {
    'lion passt': 'lionPassant',
    'lion passt guard': 'lionPassantGuardant',
    'lion rampant': 'lionRampant',
    "lion's head": 'lionHeadCaboshed'
}



# get simple valid labels with cross

In [42]:
cross_labels = simple_automata.get_valid_labels_of(labels, 'cross')
print(len(cross_labels))
cross_labels

92


['G A cross',
 'A G cross patonce',
 'B O cross engrailed',
 'O B cross engrailed',
 'O G cross engrailed',
 'O G cross',
 'B O cross',
 'A G cross',
 'O G cross',
 'E G cross moline',
 'B O cross potenty',
 'O G cross',
 'G E cross',
 'G AA escutcheon acc. orle of cross crosslets',
 'G Z cross moline',
 'A G cross moline',
 'O G cross moline',
 'S A cross patonce',
 'A S cross',
 'A G cross moline',
 'V A cross',
 'A S cross',
 'O GO lion guard ch. cross',
 'O G cross',
 'B A cross',
 'V O cross',
 'B O cross moline',
 'B O cross',
 'S O cross engrailed',
 'G E cross',
 'A B cross',
 'O SA eagle ch. crescent with cross',
 'O GS cross acc. martlet in chf dx',
 'O X AG cross checky',
 'A B cross moline',
 'O V cross',
 'G A cross potenty',
 'B O cross moline',
 'O G cross',
 'O S cross',
 'G O cross moline',
 'O SS cross acc. orle of martlets',
 'O S cross',
 'O GS cross acc. mullet in chf dx',
 'G O cross',
 'A G cross',
 'B A cross',
 'B O cross',
 'O S cross moline',
 'O G cross',
 '

In [43]:
# checking the Armoria API manually
    
CROSS_MODIFIERS_MAP = {
    'cross': 'crossHummetty' ,
    'cross moline': 'crossMoline',
    'cross patonce': 'crossPatonce',
}



# get simple valid labels with eagle

In [44]:
eagle_labels = simple_automata.get_valid_labels_of(labels, 'eagle')
print(len(eagle_labels))
eagle_labels

74


['S O eagle',
 'S A eagle',
 'G O eagle',
 'G A eagle',
 'B X AG eagle checky cr.',
 'A G eagle cr.',
 'O S eagle',
 'G A eagle cr.',
 'B A eagle',
 'A G eagle',
 'G A eagle doubleheaded',
 'B O eagle',
 'A G eagle doubleheaded',
 'A G dragon with eagle_s wings',
 'B O eagle',
 'S A eagle doubleheaded',
 'O S eagle doubleheaded',
 'O S eagle',
 'A G eagle',
 'A S eagle',
 'B O eagle cr.',
 'A B eagle doubleheaded',
 'G O eagle doubleheaded',
 'G A eagle',
 'A GO eagle ch. crescent',
 'V O eagle',
 'G O eagle',
 'A S eagle',
 'O G eagle',
 'O SA eagle ch. crescent',
 'O SA eagle ch. crescent',
 'O SA eagle ch. crescent with cross',
 'E G eagle',
 'G A eagle',
 'O S eagle doubleheaded',
 'G AS chief ch. eagle isst',
 'O S eagle',
 'B O eagle',
 'A B eagle',
 'O SX AG eagle acc. bend checky',
 'B G eagle',
 'B O eagle',
 'O S eagle',
 'G O eagle',
 'O SA eagle ch. crescent',
 'A S eagle',
 'S A eagle',
 'A G eagle doubleheaded',
 'A G eagle',
 'G AO eagle ch. crescent',
 'G A eagle double

In [45]:
# checking the Armoria API manually
    
EAGLE_MODIFIERS_MAP = {
    'eagle': 'eagle' ,
    'eagle doubleheaded': 'eagleTwoHeards',
}



In [46]:
labels = simple_automata.get_valid_labels_of(labels, 'per pale')
print(len(labels))
labels

0


[]