In [14]:
# auto-load when code changes outside
%load_ext autoreload
%autoreload 2

import sys
sys.path.insert(0,'..')


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [15]:
import numpy as np
import streamlit as st
from copy import deepcopy as dc
import pandas as pd
from pathlib import Path
from src.label_checker_automata import LabelCheckerAutomata
from src.caption import Caption
import src.utils as utils

## Load data and all labels

In [66]:
data_dir = Path("../data/cropped_coas/out")

labels = []

for image_fn in data_dir.iterdir():
    if image_fn.suffix == ".jpg" and not image_fn.name.startswith("."):
        labels.append("_".join(image_fn.stem.split("_")[1:]))

df = pd.DataFrame.from_dict({
    "label": labels,
})
df

Unnamed: 0,label
0,G A per chevron
1,S O 3 crosses formy
2,A S crampon per fess
3,G A cross
4,A G fleur-de-lis
...,...
3060,A G qtly
3061,O GA lion _ label
3062,G plain
3063,A G saltire engrailed


# Duplicates in labels: Histogram of labels

In [67]:
elems, count = np.unique(labels, return_counts=True)
output = []
counter=0
for l, n in zip(elems, count):
    if n > 1:
        print(f'label: "{l}" is repeated "{n}" times')
        counter+=1


label: " 3 fleurs-de-lis_ 3 lions passt guard_ =_ =  {BO, GO}" is repeated "2" times
label: " 3 fleurs-de-lis_ 3 lions passt guard_ =_ = _ book  {BO, GO_ A}" is repeated "2" times
label: " 3 fleurs-de-lis_ lion   {BO, GO}" is repeated "2" times
label: " blank shield" is repeated "2" times
label: " castle_ lion_ =_ =  {GO, AP}" is repeated "2" times
label: " ps[4 pales_ eagle_ =_ =]  {OG, AS}" is repeated "2" times
label: "A B 3 bars" is repeated "2" times
label: "A B 3 bends" is repeated "2" times
label: "A B barruly" is repeated "4" times
label: "A B barry" is repeated "4" times
label: "A B bend" is repeated "2" times
label: "A B bendy" is repeated "3" times
label: "A B checky" is repeated "3" times
label: "A B chief" is repeated "4" times
label: "A B fess" is repeated "3" times
label: "A B fess undy" is repeated "2" times
label: "A B lion cr." is repeated "3" times
label: "A B lion rampant" is repeated "5" times
label: "A B lozengy" is repeated "3" times
label: "A B pallety" is repea

In [68]:
print(f'There are {counter} repeated labels out of {len(elems)} total labels')

There are 352 repeated labels out of 2385 total labels


## Check the folder and see. Many images for the same label however they are different. 
## There are wrong labeled data in the ground truth! :( 
It's visible via streamlit tool .. 

## Check if the labels match our automata 

In [69]:
captions_val = []
for l in labels:
    c = Caption(l, support_plural=True)
    if c.is_valid:
        captions_val.append(l)

print(f'Total number of valid labels in our automata {len(captions_val)} out of {len(labels)}' )
# Total number of valid labels in our automata 1273 out of 3065


label "G A per chevron" cannot be parsed. The chunk "per" cannot be fit into any category.
label "S O 3 crosses formy" cannot be parsed. The chunk "formy" cannot be fit into any category.
label "A S crampon per fess" cannot be parsed. The chunk "crampon" cannot be fit into any category.
label "A S crampon per fess" cannot be parsed. The chunk "per" cannot be fit into any category.
label "B OX AG per bend lion _ 2 bars" cannot be parsed. The chunk "per" cannot be fit into any category.
label "B OX AG per bend lion _ 2 bars" cannot be parsed. The chunk "_" cannot be fit into any category.
label "G plain" cannot be parsed. The chunk "plain" cannot be fit into any category.
label " lion_ bend_ =_ =   {BO, GO}" cannot be parsed. The chunk "lion_" cannot be fit into any category.
label " lion_ bend_ =_ =   {BO, GO}" cannot be parsed. The chunk "bend_" cannot be fit into any category.
label " lion_ bend_ =_ =   {BO, GO}" cannot be parsed. The chunk "=_" cannot be fit into any category.
label 

label "G A 3 bars gemel" cannot be parsed. The chunk "gemel" cannot be fit into any category.
label "O G barruly" cannot be parsed. The chunk "barruly" cannot be fit into any category.
label "S A cauldron" cannot be parsed. The chunk "cauldron" cannot be fit into any category.
label "S A star of 8 pt" cannot be parsed. The chunk "star" cannot be fit into any category.
label "S A star of 8 pt" cannot be parsed. The chunk "pt" cannot be fit into any category.
label "B AO lion passt, roundely" cannot be parsed. The chunk "passt," cannot be fit into any category.
label "G AO bend _ label" cannot be parsed. The chunk "_" cannot be fit into any category.
label "G AO bend _ label" cannot be parsed. The chunk "label" cannot be fit into any category.
label "S A qtly" cannot be parsed. The chunk "qtly" cannot be fit into any category.
label " chief ch. 3 pales_ cross_ =_ =   {OGA, GA}" cannot be parsed. The chunk "pales_" cannot be fit into any category.
label " chief ch. 3 pales_ cross_ =_ =   

label " per fess crenely masoned_ fess_ =_ =   {OB, OG}" cannot be parsed. The chunk "fess_" cannot be fit into any category.
label " per fess crenely masoned_ fess_ =_ =   {OB, OG}" cannot be parsed. The chunk "=_" cannot be fit into any category.
label " per fess crenely masoned_ fess_ =_ =   {OB, OG}" cannot be parsed. The chunk "=" cannot be fit into any category.
label " per fess crenely masoned_ fess_ =_ =   {OB, OG}" cannot be parsed. The chunk "{OB," cannot be fit into any category.
label " per fess crenely masoned_ fess_ =_ =   {OB, OG}" cannot be parsed. The chunk "OG}" cannot be fit into any category.
label "O GG fess _ border" cannot be parsed. The chunk "_" cannot be fit into any category.
label " fess_ checky _ 2 scimitars_ =_ =   {BO, XG-AS}" cannot be parsed. The chunk "fess_" cannot be fit into any category.
label " fess_ checky _ 2 scimitars_ =_ =   {BO, XG-AS}" cannot be parsed. The chunk "_" cannot be fit into any category.
label " fess_ checky _ 2 scimitars_ =_ =  

## Double check again via automata directly

In [70]:
automata = LabelCheckerAutomata(support_plural=True)
validated_labels = automata.get_valid_labels(labels)

label "G A per chevron" cannot be parsed. The chunk "per" cannot be fit into any category.
label "S O 3 crosses formy" cannot be parsed. The chunk "formy" cannot be fit into any category.
label "A S crampon per fess" cannot be parsed. The chunk "crampon" cannot be fit into any category.
label "A S crampon per fess" cannot be parsed. The chunk "per" cannot be fit into any category.
label "B OX AG per bend lion _ 2 bars" cannot be parsed. The chunk "per" cannot be fit into any category.
label "B OX AG per bend lion _ 2 bars" cannot be parsed. The chunk "_" cannot be fit into any category.
label "G plain" cannot be parsed. The chunk "plain" cannot be fit into any category.
label " lion_ bend_ =_ =   {BO, GO}" cannot be parsed. The chunk "lion_" cannot be fit into any category.
label " lion_ bend_ =_ =   {BO, GO}" cannot be parsed. The chunk "bend_" cannot be fit into any category.
label " lion_ bend_ =_ =   {BO, GO}" cannot be parsed. The chunk "=_" cannot be fit into any category.
label 

label "G OO 2 barbels addorsed, trefly" cannot be parsed. The chunk "trefly" cannot be fit into any category.
label "B OO lion, trefly" cannot be parsed. The chunk "lion," cannot be fit into any category.
label "B OO lion, trefly" cannot be parsed. The chunk "trefly" cannot be fit into any category.
label "G AB chief ch. panter isst" cannot be parsed. The chunk "panter" cannot be fit into any category.
label "O B fretty" cannot be parsed. The chunk "fretty" cannot be fit into any category.
label "S AG chief ch. fess of lozenges on partition" cannot be parsed. The chunk "partition" cannot be fit into any category.
label "O G castle" cannot be parsed. The chunk "castle" cannot be fit into any category.
label "A G castle" cannot be parsed. The chunk "castle" cannot be fit into any category.
label " 3 lozenges per bend_ checky_ =_ =   {OG, AG}" cannot be parsed. The chunk "per" cannot be fit into any category.
label " 3 lozenges per bend_ checky_ =_ =   {OG, AG}" cannot be parsed. The chun

In [71]:
total = len(labels)
valid = len(validated_labels)
print('Total number of labels', total)
print('Valid labels', valid)
print('Invalid labels', total - valid)

Total number of labels 3065
Valid labels 1273
Invalid labels 1792


In [72]:
# note that we have duplicate 362 labels!! using a dict vs list made that clear
900 - 538

362

In [75]:
captions = [ Caption(l, support_plural=False) for l in labels ]
print(len(captions))


3065


In [74]:
valid = [ 1 for c in captions if c.is_valid ]
sum(valid)

label "G A per chevron" cannot be parsed. The chunk "per" cannot be fit into any category.
label "S O 3 crosses formy" cannot be parsed. The chunk "formy" cannot be fit into any category.
label "A S crampon per fess" cannot be parsed. The chunk "crampon" cannot be fit into any category.
label "A S crampon per fess" cannot be parsed. The chunk "per" cannot be fit into any category.
label "B OX AG per bend lion _ 2 bars" cannot be parsed. The chunk "per" cannot be fit into any category.
label "B OX AG per bend lion _ 2 bars" cannot be parsed. The chunk "_" cannot be fit into any category.
label "G plain" cannot be parsed. The chunk "plain" cannot be fit into any category.
label " lion_ bend_ =_ =   {BO, GO}" cannot be parsed. The chunk "lion_" cannot be fit into any category.
label " lion_ bend_ =_ =   {BO, GO}" cannot be parsed. The chunk "bend_" cannot be fit into any category.
label " lion_ bend_ =_ =   {BO, GO}" cannot be parsed. The chunk "=_" cannot be fit into any category.
label 

label " tower, bust of man_ =_ =   {AG, AG}" cannot be parsed. The chunk "=" cannot be fit into any category.
label " tower, bust of man_ =_ =   {AG, AG}" cannot be parsed. The chunk "{AG," cannot be fit into any category.
label " tower, bust of man_ =_ =   {AG, AG}" cannot be parsed. The chunk "AG}" cannot be fit into any category.
label "O S per pale _ 2 antlers per pale cch" cannot be parsed. The chunk "per" cannot be fit into any category.
label "O S per pale _ 2 antlers per pale cch" cannot be parsed. The chunk "_" cannot be fit into any category.
label "O S per pale _ 2 antlers per pale cch" cannot be parsed. The chunk "per" cannot be fit into any category.
label "B AOG book betw 3 fleurs-de-lis and over all a bend" cannot be parsed. The chunk "book" cannot be fit into any category.
label "B AOG book betw 3 fleurs-de-lis and over all a bend" cannot be parsed. The chunk "over" cannot be fit into any category.
label "B AOG book betw 3 fleurs-de-lis and over all a bend" cannot be pa

label " wheel_ pq[lion guard_ lion_ =_ =]_ =_ =   {GA_ GA, AG}" cannot be parsed. The chunk "lion_" cannot be fit into any category.
label " wheel_ pq[lion guard_ lion_ =_ =]_ =_ =   {GA_ GA, AG}" cannot be parsed. The chunk "=_" cannot be fit into any category.
label " wheel_ pq[lion guard_ lion_ =_ =]_ =_ =   {GA_ GA, AG}" cannot be parsed. The chunk "=]_" cannot be fit into any category.
label " wheel_ pq[lion guard_ lion_ =_ =]_ =_ =   {GA_ GA, AG}" cannot be parsed. The chunk "=_" cannot be fit into any category.
label " wheel_ pq[lion guard_ lion_ =_ =]_ =_ =   {GA_ GA, AG}" cannot be parsed. The chunk "=" cannot be fit into any category.
label " wheel_ pq[lion guard_ lion_ =_ =]_ =_ =   {GA_ GA, AG}" cannot be parsed. The chunk "{GA_" cannot be fit into any category.
label " wheel_ pq[lion guard_ lion_ =_ =]_ =_ =   {GA_ GA, AG}" cannot be parsed. The chunk "GA," cannot be fit into any category.
label " wheel_ pq[lion guard_ lion_ =_ =]_ =_ =   {GA_ GA, AG}" cannot be parsed. Th

907

# get simple valid labels with lion

In [76]:
lion_labels = simple_automata.get_valid_labels_of(labels, 'lion')


label "G A per chevron" cannot be parsed. The chunk "per" cannot be fit into any category.
label "S O 3 crosses formy" cannot be parsed. The chunk "formy" cannot be fit into any category.
label "A S crampon per fess" cannot be parsed. The chunk "crampon" cannot be fit into any category.
label "A S crampon per fess" cannot be parsed. The chunk "per" cannot be fit into any category.
label "B OX AG per bend lion _ 2 bars" cannot be parsed. The chunk "per" cannot be fit into any category.
label "B OX AG per bend lion _ 2 bars" cannot be parsed. The chunk "_" cannot be fit into any category.
label "G plain" cannot be parsed. The chunk "plain" cannot be fit into any category.
label " lion_ bend_ =_ =   {BO, GO}" cannot be parsed. The chunk "lion_" cannot be fit into any category.
label " lion_ bend_ =_ =   {BO, GO}" cannot be parsed. The chunk "bend_" cannot be fit into any category.
label " lion_ bend_ =_ =   {BO, GO}" cannot be parsed. The chunk "=_" cannot be fit into any category.
label 

label "O B greyhound salient" cannot be parsed. The chunk "greyhound" cannot be fit into any category.
label "A G 3 bars gemel" cannot be parsed. The chunk "gemel" cannot be fit into any category.
label "O SB 2 bars embattled-counterembattled _ label" cannot be parsed. The chunk "embattled-counterembattled" cannot be fit into any category.
label "O SB 2 bars embattled-counterembattled _ label" cannot be parsed. The chunk "_" cannot be fit into any category.
label "O SB 2 bars embattled-counterembattled _ label" cannot be parsed. The chunk "label" cannot be fit into any category.
label "A B barruly" cannot be parsed. The chunk "barruly" cannot be fit into any category.
label "X G AB barruly _ bend" cannot be parsed. The chunk "barruly" cannot be fit into any category.
label "X G AB barruly _ bend" cannot be parsed. The chunk "_" cannot be fit into any category.
label "G A wing terminating in trefoil" cannot be parsed. The chunk "terminating" cannot be fit into any category.
label "G A w

In [77]:
print(len(lion_labels))
lion_labels

244


['O S lion rampant',
 'O B lion rampant',
 'G O lion passt guard',
 'O SG lion _ label',
 'A SG lion holding trunk',
 'A G lion rampant',
 'B A lion rampant',
 'O GS lion cr. _ border engrailed',
 'O G lion q.f.',
 'B AG chief ch. lion q.f.',
 'O G lion rampant',
 'A B lion rampant',
 'B X AG lion barruly cr.',
 'B A lion cr.',
 'A GB lion naissant cr. _ bend',
 'O B lion rampant',
 'A B lion rampant',
 'O GV lion naissant above mount',
 'A GV lion stat guard on mount',
 'B A lion cr.',
 'S OA lion acc. bend',
 'G A lion q.f.',
 'B O lion rampant',
 'B AOG fleur-de-lis _ chief ch. lion isst',
 'G OAB lion cr. _ border compony',
 'O SAG lion passt cr. _ border compony',
 'A SG lion _ label',
 'G O lion guard q.f.',
 'X G AB barruly _ lion cr.',
 'B O lion cr.',
 'A GV lion and eagle_s claw winged sn',
 'A G lion naissant cr.',
 'G O lion rampant',
 'A G lion cr.',
 'B A lion rampant',
 'G O lion q.f.',
 'G X AS lion checky cr.',
 'B AG lion naissant above mount',
 'A S lion rampant',
 '

# get simple valid labels with cross

In [78]:
cross_labels = simple_automata.get_valid_labels_of(labels, 'cross')


label "G A per chevron" cannot be parsed. The chunk "per" cannot be fit into any category.
label "S O 3 crosses formy" cannot be parsed. The chunk "formy" cannot be fit into any category.
label "A S crampon per fess" cannot be parsed. The chunk "crampon" cannot be fit into any category.
label "A S crampon per fess" cannot be parsed. The chunk "per" cannot be fit into any category.
label "B OX AG per bend lion _ 2 bars" cannot be parsed. The chunk "per" cannot be fit into any category.
label "B OX AG per bend lion _ 2 bars" cannot be parsed. The chunk "_" cannot be fit into any category.
label "G plain" cannot be parsed. The chunk "plain" cannot be fit into any category.
label " lion_ bend_ =_ =   {BO, GO}" cannot be parsed. The chunk "lion_" cannot be fit into any category.
label " lion_ bend_ =_ =   {BO, GO}" cannot be parsed. The chunk "bend_" cannot be fit into any category.
label " lion_ bend_ =_ =   {BO, GO}" cannot be parsed. The chunk "=_" cannot be fit into any category.
label 

label "X G AS paly _ chevron" cannot be parsed. The chunk "_" cannot be fit into any category.
label "A SG bend _ chief" cannot be parsed. The chunk "_" cannot be fit into any category.
label "A B barry" cannot be parsed. The chunk "barry" cannot be fit into any category.
label "G O semy of fleurs-de-lis" cannot be parsed. The chunk "semy" cannot be fit into any category.
label "X OV AG vairy _ chief ch. tree" cannot be parsed. The chunk "_" cannot be fit into any category.
label "X OV AG vairy _ chief ch. tree" cannot be parsed. The chunk "tree" cannot be fit into any category.
label "A GO eagle boned trefly" cannot be parsed. The chunk "boned" cannot be fit into any category.
label "A GO eagle boned trefly" cannot be parsed. The chunk "trefly" cannot be fit into any category.
label " per fess _ 2 swords in saltire_ barry acc. crancelin per bend_ lion, semy of hearts_ 3 waterlily leaves inv _E_ eagle    {XG-SA, OSV, OSG, AG_ BO}" cannot be parsed. The chunk "per" cannot be fit into an

label "G AA 3 bars acc. bird in chf" cannot be parsed. The chunk "bird" cannot be fit into any category.
label "G OV 3 reed maces on mount" cannot be parsed. The chunk "reed" cannot be fit into any category.
label "A G 2 piles each tipped with a bluebell" cannot be parsed. The chunk "each" cannot be fit into any category.
label "A G 2 piles each tipped with a bluebell" cannot be parsed. The chunk "tipped" cannot be fit into any category.
label "A G 2 piles each tipped with a bluebell" cannot be parsed. The chunk "bluebell" cannot be fit into any category.
label "G OA qtly acc. mullet in chf dx" cannot be parsed. The chunk "qtly" cannot be fit into any category.
label "A G lion_s head(1)" cannot be parsed. The chunk "head(1)" cannot be fit into any category.
label "B A cushion" cannot be parsed. The chunk "cushion" cannot be fit into any category.
label "O GGB 2 bars undy acc. orle of martlets _ border(1)" cannot be parsed. The chunk "_" cannot be fit into any category.
label "O GGB 2 b

In [79]:
print(len(cross_labels))
cross_labels

127


['G A cross',
 'A G cross patonce',
 'O B cross patriarchal',
 'X G BO per fess _ cross botonny',
 'B O cross engrailed',
 'O B cross engrailed',
 'O G cross engrailed',
 'O G cross',
 'G A+ castle _ chief {pp[Ar cross Gu_ Ar eagle Sa]}',
 'B O cross',
 'A G cross',
 'O G cross',
 'E G cross moline',
 'O SA cross fretty',
 'B O cross potenty',
 'O G cross',
 'G E cross',
 'A SO cross fretty',
 'O A cross botonny',
 'G AA escutcheon acc. orle of cross crosslets',
 'O G cross fleuretty',
 'G Z cross moline',
 'B AO cross and crozier sn',
 'A G cross moline',
 'O G cross moline',
 'A X SO cross lozengy',
 'S A cross patonce',
 'A S cross',
 'G O cross formy',
 'A G cross moline',
 'V A cross',
 'A S cross',
 'O GO lion guard ch. cross',
 'O G cross',
 'B A cross',
 'O GV cross acc. double tressure flory',
 'V O cross',
 'B O cross moline',
 ' cross_ eagle rising holding the hind-part of a lion passt_ =_ =   {GO, BAO}',
 'B O cross',
 'S O cross engrailed',
 'G E cross',
 'A B cross',
 'O 

# get simple valid labels with eagle

In [80]:
eagle_labels = simple_automata.get_valid_labels_of(labels, 'eagle')


label "G A per chevron" cannot be parsed. The chunk "per" cannot be fit into any category.
label "S O 3 crosses formy" cannot be parsed. The chunk "formy" cannot be fit into any category.
label "A S crampon per fess" cannot be parsed. The chunk "crampon" cannot be fit into any category.
label "A S crampon per fess" cannot be parsed. The chunk "per" cannot be fit into any category.
label "B OX AG per bend lion _ 2 bars" cannot be parsed. The chunk "per" cannot be fit into any category.
label "B OX AG per bend lion _ 2 bars" cannot be parsed. The chunk "_" cannot be fit into any category.
label "G plain" cannot be parsed. The chunk "plain" cannot be fit into any category.
label " lion_ bend_ =_ =   {BO, GO}" cannot be parsed. The chunk "lion_" cannot be fit into any category.
label " lion_ bend_ =_ =   {BO, GO}" cannot be parsed. The chunk "bend_" cannot be fit into any category.
label " lion_ bend_ =_ =   {BO, GO}" cannot be parsed. The chunk "=_" cannot be fit into any category.
label 

label " pp[barruly_ 2 barbels addorsed, crusily]_ lion q.f._ =_ = _E_ checky of 9  {OG, GO, GA, AG}" cannot be parsed. The chunk "pp[barruly_" cannot be fit into any category.
label " pp[barruly_ 2 barbels addorsed, crusily]_ lion q.f._ =_ = _E_ checky of 9  {OG, GO, GA, AG}" cannot be parsed. The chunk "addorsed," cannot be fit into any category.
label " pp[barruly_ 2 barbels addorsed, crusily]_ lion q.f._ =_ = _E_ checky of 9  {OG, GO, GA, AG}" cannot be parsed. The chunk "crusily]_" cannot be fit into any category.
label " pp[barruly_ 2 barbels addorsed, crusily]_ lion q.f._ =_ = _E_ checky of 9  {OG, GO, GA, AG}" cannot be parsed. The chunk "q.f._" cannot be fit into any category.
label " pp[barruly_ 2 barbels addorsed, crusily]_ lion q.f._ =_ = _E_ checky of 9  {OG, GO, GA, AG}" cannot be parsed. The chunk "=_" cannot be fit into any category.
label " pp[barruly_ 2 barbels addorsed, crusily]_ lion q.f._ =_ = _E_ checky of 9  {OG, GO, GA, AG}" cannot be parsed. The chunk "=" cannot

In [81]:
print(len(eagle_labels))
eagle_labels

90


['S O eagle',
 'S A eagle',
 'G O eagle',
 'O X GS eagle per pale',
 'G A eagle',
 'B X AG eagle checky cr.',
 'G A+ castle _ chief {pp[Ar cross Gu_ Ar eagle Sa]}',
 'A G eagle cr.',
 'O S eagle',
 'G A eagle cr.',
 'B A eagle',
 'A G eagle',
 'A GV lion and eagle_s claw winged sn',
 'G A eagle doubleheaded',
 'B O eagle',
 'A G eagle doubleheaded',
 'A G dragon with eagle_s wings',
 'B O eagle',
 'S A eagle doubleheaded',
 'O S eagle doubleheaded',
 'O S eagle',
 'A G eagle',
 'A S eagle',
 'B O eagle cr.',
 'A B eagle doubleheaded',
 'G O eagle doubleheaded',
 'G A eagle',
 'O S eagle per fess',
 'A GO eagle ch. crescent',
 'A X BG eagle per pale',
 'V O eagle',
 'G O eagle',
 'A S eagle',
 'O G eagle',
 ' cross_ eagle rising holding the hind-part of a lion passt_ =_ =   {GO, BAO}',
 'O SA eagle ch. crescent',
 'O SA eagle ch. crescent',
 'O SA eagle ch. crescent with cross',
 'A SO eagle biting crescent inv.',
 'E G eagle',
 'G A eagle',
 'O S eagle doubleheaded',
 'G AS chief ch. e

In [82]:
elabels = simple_automata.get_valid_labels_of(labels, 'per pale')


label "G A per chevron" cannot be parsed. The chunk "per" cannot be fit into any category.
label "S O 3 crosses formy" cannot be parsed. The chunk "formy" cannot be fit into any category.
label "A S crampon per fess" cannot be parsed. The chunk "crampon" cannot be fit into any category.
label "A S crampon per fess" cannot be parsed. The chunk "per" cannot be fit into any category.
label "B OX AG per bend lion _ 2 bars" cannot be parsed. The chunk "per" cannot be fit into any category.
label "B OX AG per bend lion _ 2 bars" cannot be parsed. The chunk "_" cannot be fit into any category.
label "G plain" cannot be parsed. The chunk "plain" cannot be fit into any category.
label " lion_ bend_ =_ =   {BO, GO}" cannot be parsed. The chunk "lion_" cannot be fit into any category.
label " lion_ bend_ =_ =   {BO, GO}" cannot be parsed. The chunk "bend_" cannot be fit into any category.
label " lion_ bend_ =_ =   {BO, GO}" cannot be parsed. The chunk "=_" cannot be fit into any category.
label 

label "B XE OG 2 cauldrons full of serpents in pale acc. bend" cannot be parsed. The chunk "full" cannot be fit into any category.
label "A GB per pale lion _ plain" cannot be parsed. The chunk "per" cannot be fit into any category.
label "A GB per pale lion _ plain" cannot be parsed. The chunk "_" cannot be fit into any category.
label "A GB per pale lion _ plain" cannot be parsed. The chunk "plain" cannot be fit into any category.
label " castle_ lion_ =_ =  {GO, BA}" cannot be parsed. The chunk "castle_" cannot be fit into any category.
label " castle_ lion_ =_ =  {GO, BA}" cannot be parsed. The chunk "lion_" cannot be fit into any category.
label " castle_ lion_ =_ =  {GO, BA}" cannot be parsed. The chunk "=_" cannot be fit into any category.
label " castle_ lion_ =_ =  {GO, BA}" cannot be parsed. The chunk "=" cannot be fit into any category.
label " castle_ lion_ =_ =  {GO, BA}" cannot be parsed. The chunk "{GO," cannot be fit into any category.
label " castle_ lion_ =_ =  {GO, B

In [83]:
print(len(elabels))
elabels

7


['O X GS eagle per pale',
 'S O bugle-horn stringed per pale',
 'O X AB chief per pale',
 'G X AS chevron per pale inv',
 'A X BG eagle per pale',
 'B X AG chief per pale',
 'B X AG chief per pale']

# Create simple automata

In [84]:
simple_automata = LabelCheckerAutomata(support_plural=False)
validated_simple_labels = simple_automata.get_valid_labels(labels)
    
total = len(labels)
valid = len(validated_simple_labels)
print('Total number of labels', total)
print('Valid labels', valid)
print('Invalid labels', total - valid)


label "G A per chevron" cannot be parsed. The chunk "per" cannot be fit into any category.
label "S O 3 crosses formy" cannot be parsed. The chunk "formy" cannot be fit into any category.
label "A S crampon per fess" cannot be parsed. The chunk "crampon" cannot be fit into any category.
label "A S crampon per fess" cannot be parsed. The chunk "per" cannot be fit into any category.
label "B OX AG per bend lion _ 2 bars" cannot be parsed. The chunk "per" cannot be fit into any category.
label "B OX AG per bend lion _ 2 bars" cannot be parsed. The chunk "_" cannot be fit into any category.
label "G plain" cannot be parsed. The chunk "plain" cannot be fit into any category.
label " lion_ bend_ =_ =   {BO, GO}" cannot be parsed. The chunk "lion_" cannot be fit into any category.
label " lion_ bend_ =_ =   {BO, GO}" cannot be parsed. The chunk "bend_" cannot be fit into any category.
label " lion_ bend_ =_ =   {BO, GO}" cannot be parsed. The chunk "=_" cannot be fit into any category.
label 

label "O S+ eagle ch. escutch {Or lion Sa}" cannot be parsed. The chunk "S+" cannot be fit into any category.
label "O S+ eagle ch. escutch {Or lion Sa}" cannot be parsed. The chunk "{Or" cannot be fit into any category.
label "O S+ eagle ch. escutch {Or lion Sa}" cannot be parsed. The chunk "Sa}" cannot be fit into any category.
label "Z GO bendy _ label" cannot be parsed. The chunk "_" cannot be fit into any category.
label "Z GO bendy _ label" cannot be parsed. The chunk "label" cannot be fit into any category.
label "G A per fess _ 3 roses cch" cannot be parsed. The chunk "per" cannot be fit into any category.
label "G A per fess _ 3 roses cch" cannot be parsed. The chunk "_" cannot be fit into any category.
label "A V per pale" cannot be parsed. The chunk "per" cannot be fit into any category.
label "S A per chevron" cannot be parsed. The chunk "per" cannot be fit into any category.
label "O G chevronny" cannot be parsed. The chunk "chevronny" cannot be fit into any category.
labe