In [1]:
# auto-load when code changes outside
%load_ext autoreload
%autoreload 2
%load_ext pyinstrument

import sys
sys.path.insert(0,'..')


In [None]:
from src.dataset import Dataset
from src.label_checker_automata import LabelCheckerAutomata
import src.utils as utils
from src.caption import Caption

## Load data and all labels

In [None]:
dataset = Dataset('../dataset/2021-03-19_classification-labeling-results.json')
all_labels = dataset.all_answers
len(all_labels)

## Check if the labels match our automata 

In [72]:
pl_automata = LabelCheckerAutomata(support_plural=True)
validated_pl_labels = pl_automata.get_valid_labels(all_labels)

total = len(all_labels)
valid_plural = sum(validated_pl_labels.values())
print('Total number of labels', total)
print('Valid labels with plural', valid_plural)
print('Invalid labels', total - valid_plural)

Total number of labels 3001
Valid labels with plural 356
Invalid labels 2645


# We now have 356 valid labels
# How many of the charges of the valid labels can be found in the Armoria api?
# Answer: 64

In [75]:
def get_number_of_valid_labels_in_armoria(labels):
    sum_valid_armoria = 0
    valid_labels = []
    for label in labels:
        caption_label = Caption(label)
        if caption_label.is_valid_in_armoria == True:
            sum_valid_armoria+=1
            valid_labels.append(label)

    return sum_valid_armoria, valid_labels


In [76]:
get_number_of_valid_labels_in_armoria(validated_pl_labels)


Caption Class - exception in label A S bendy, ['bendy']
Caption Class - exception in label G OS bend ch. mullet, ['ch.']
Caption Class - exception in label O BG bendy & border engrailed, ['&']
Caption Class - exception in label O S 3 antlers in pale, ['in']
Caption Class - exception in label G OO bend betw 4 lions, ['betw']
Caption Class - exception in label A S bend dancetty, ['bend', 'dancetty']
Caption Class - exception in label O B 3 antlers in pale, ['in']
Caption Class - exception in label G OS pale ch. 3 chevrons, ['ch.']
Caption Class - exception in label A G bendy of 4, ['bendy']
Caption Class - exception in label A BA bend ch. 3 fleurs-de-lis, ['ch.']
Caption Class - exception in label S AA pale & chief, ['&']
Caption Class - exception in label G AO bend betw 6 crowns, ['betw']
Caption Class - exception in label X O AG bendy sn & pale, ['sn', '&']
Caption Class - exception in label A G bendy of 8, ['bendy']
Caption Class - exception in label G A 2 scytheblades addorsed, ['scy

(64,
 ['O S lion rampant',
  'A S 3 lions',
  'O G eagle',
  'O B 3 lions passt guard',
  'B A eagle',
  'O G lion rampant',
  'A G lion rampant',
  'B O 5 eagles',
  'O G 2 lions passt guard',
  "A G lion's head",
  'S O cross moline',
  'G A 3 eagles',
  'B O eagle',
  'G A eagle',
  'S A eagle',
  'G O cross patonce',
  'B A cross',
  'A G cross moline',
  'O S 3 lions passt guard',
  'A G 2 lions passt guard cr.',
  'O S eagle',
  'G A cross',
  'A G cross patonce',
  'S A cross moline',
  'B O lion rampant',
  'B O cross',
  'G A lion rampant',
  'A G 3 eagles',
  'O S eagle doubleheaded',
  'A S lion rampant',
  'O V cross moline',
  'O V cross',
  'O B lion rampant',
  'B O 3 lions',
  'A G cross',
  'G O 2 lions passt guard',
  'G O eagle',
  'G O 3 lions',
  'G O cross moline',
  'A G eagle doubleheaded',
  'O G 3 lions',
  'A B lion rampant',
  'G O lion rampant',
  'S O 2 lions passt guard',
  'A S lion passt',
  'G O lion passt guard',
  'O G 2 lions passt',
  'A B cross',


# Create simple automata (No plural)

In [62]:
simple_automata = LabelCheckerAutomata(support_plural=False)
validated_simple_labels = simple_automata.get_valid_labels(all_labels)
    
total = len(all_labels)
valid_simple = sum(validated_simple_labels.values())
print('Total number of labels', total)
print('Valid labels without plural', valid_simple)
print('Invalid labels', total - valid_simple)


Total number of labels 3001
Valid labels without plural 229
Invalid labels 2772


# How big is your dataset with/out the plural?

In [63]:
print('Valid labels with plural automata', valid_plural)
print('Valid labels with simple automata', valid_simple)
print('Valid plural labels only', valid_plural - valid_simple)


Valid labels with plural automata 356
Valid labels with simple automata 229
Valid plural labels only 127


In [64]:
pl_labels = pl_automata.get_valid_plural_labels(all_labels)
print(len(pl_labels))
pl_labels

127


['G A 3 hares salient',
 'B O 3 fleurs-de-lis',
 "G A 3 lion's heads cr.",
 'B OA 3 fleurs-de-lis & border',
 'B OG 3 fleurs-de-lis & bend',
 'A S 3 lions',
 'A GOO chief ch. 2 roses & border',
 'O B 3 lions passt guard',
 'O GX AG 3 bars & border checky',
 'O GB 3 bars & border',
 'G A branch with 3 oak leaves erect',
 'B O 4 chevrons',
 'B O 5 eagles',
 'A GB fess betw 3 eagles',
 'A S 3 bars',
 'A G 3 roses',
 'G S 3 bugle-horns in pale',
 'G AO 3 dices acc. mount',
 'O G 2 lions passt guard',
 'S O 3 estoiles',
 'G A 3 eagles',
 'O B 3 escutcheons',
 'G A 2 wings',
 'B A 2 wings',
 'G A 2 bars dancetty',
 'B A 2 crescents addorsed',
 'O X SG 2 wings',
 'O X AG 2 bars checky',
 'G A 2 bars',
 'S AA 6 fleurs-de-lis & chief',
 'A S 3 cocks',
 'O S 2 wings',
 'B AO fess betw 3 mullets',
 'B A 3 cocks',
 'O S 3 lions passt guard',
 'B AA 2 bars & chief',
 'A G 3 chevrons',
 'A G 2 lions passt guard cr.',
 'S A 3 fleurs-de-lis',
 'S A 2 crescents addorsed',
 'A SA fess ch. 3 fish',
 'A G

# How big is your dataset with the border? - Plural

In [66]:
b_labels = pl_automata.get_valid_labels_of(all_labels, 'border')
print(len(b_labels))
b_labels

21


['B OA 3 fleurs-de-lis & border',
 'B OO cross & border',
 'A GOO chief ch. 2 roses & border',
 'O GX AG 3 bars & border checky',
 'O GB 3 bars & border',
 'O GZ eagle & border',
 'O SX AG saltire & border compony',
 'S AO lion chained cr. & border',
 'G AO rose & border',
 'G AB fess & border',
 'G AO eagle & border',
 'A VG fess & border',
 'B OG 3 fleurs-de-lis & border',
 'A GSO lion & border roundely',
 'O SBO cross & border flory',
 'A SB saltire & border',
 'A SS 2 fish in saltire & border engrailed',
 'G OB 3 lions & border engrailed',
 'A GO 3 chevrons & border',
 'G OBO 3 lions passt guard & border flory',
 'A GX OB saltire & border compony undy']

# How big is your dataset with the border? - Simple

In [67]:
b_labels = simple_automata.get_valid_labels_of(all_labels, 'border')
print(len(b_labels))
b_labels

12


['B OO cross & border',
 'O GZ eagle & border',
 'O SX AG saltire & border compony',
 'S AO lion chained cr. & border',
 'G AO rose & border',
 'G AB fess & border',
 'G AO eagle & border',
 'A VG fess & border',
 'A GSO lion & border roundely',
 'O SBO cross & border flory',
 'A SB saltire & border',
 'A GX OB saltire & border compony undy']

# get simple valid labels with lion

In [68]:
lion_labels = simple_automata.get_valid_labels_of(all_labels, 'lion')
print(len(lion_labels))
lion_labels

45


['O GO lion guard ch. cross',
 'O S lion rampant',
 'O G lion rampant',
 'B A lion cr.',
 'A G lion rampant',
 'S AO lion chained cr. & border',
 'G BA chief ch. lion isst',
 'A GO lion roundely',
 "A G lion's head",
 'A GB lion acc. fess',
 'A GO chief indented ch. lion passt guard',
 'O G lion guard',
 'A SG lion acc. fess',
 'S X AO lion hooded',
 'O X GB lion hooded',
 'G AB chief ch. lion isst',
 'O GZ lion & chief',
 'B O lion rampant',
 'G A lion rampant',
 'B GO lion & chief',
 'O Z lion cr.',
 'A S lion rampant',
 'O B lion rampant',
 'A GS fess acc. lion isst',
 'G E lion cr.',
 'A B lion rampant',
 'G O lion rampant',
 'S A lion cr.',
 'A S lion passt',
 'O S lion cr.',
 'G O lion passt guard',
 'A GSO lion & border roundely',
 'O V lion passt',
 'A G lion cr.',
 'G A lion cr.',
 'B A lion rampant',
 'O SX EG lion acc. bend compony',
 'S AG lion acc. bend engrailed',
 'O G lion cr.',
 'O GA chief ch. lion passt',
 'S O lion rampant',
 'V O lion rampant',
 'B G lion passt',
 

In [22]:
# checking the Armoria API manually
    
LION_MODIFIERS_MAP = {
    'lion passt': 'lionPassant',
    'lion passt guard': 'lionPassantGuardant',
    'lion rampant': 'lionRampant',
    "lion's head": 'lionHeadCaboshed'
}



# get simple valid labels with cross

In [23]:
cross_labels = simple_automata.get_valid_labels_of(all_labels, 'cross')
print(len(cross_labels))
cross_labels

26


['O GO lion guard ch. cross',
 'B OO cross & border',
 'S O cross moline',
 'G O cross patonce',
 'B A cross',
 'A G cross moline',
 'G A cross',
 'O G cross engrailed',
 'A G cross patonce',
 'S A cross moline',
 'A X AG cross moline vairy',
 'B O cross',
 'S O cross engrailed',
 'O V cross moline',
 'O V cross',
 'A G cross',
 'B AO escutcheon acc. orle of cross crosslets',
 'G O cross moline',
 'G Z cross moline',
 'A B cross',
 'O SBO cross & border flory',
 'G E cross',
 'A G cross engrailed',
 'O S cross engrailed',
 'S A cross engrailed',
 'O G cross']

In [31]:
# checking the Armoria API manually
    
CROSS_MODIFIERS_MAP = {
    'cross': 'crossHummetty' ,
    'cross moline': 'crossMoline',
    'cross patonce': 'crossPatonce',
}



# get simple valid labels with eagle

In [32]:
eagle_labels = simple_automata.get_valid_labels_of(all_labels, 'eagle')
print(len(eagle_labels))
eagle_labels

26


['O G eagle',
 'B A eagle',
 'O GZ eagle & border',
 'O BG eagle ch. crescent',
 'O BX AG eagle ch. crescent checky',
 "O S eagle's head",
 'O SA eagle ch. crescent',
 'G AO eagle ch. crescent',
 "A G dragon with eagle's wings",
 'B O eagle',
 'G A eagle',
 'B O eagle cr.',
 'A GO eagle ch. crescent',
 'S A eagle',
 'G AO eagle & border',
 'O S eagle',
 'O GZ eagle doubleheaded cr. & chief',
 'O S eagle doubleheaded',
 'G O eagle',
 'A G eagle doubleheaded',
 'A SG eagle doubleheaded & bend',
 'O BG eagle & bend',
 'G A eagle cr.',
 'S AO eagle ch. crescent',
 'A S eagle',
 'A B eagle doubleheaded']

In [33]:
# checking the Armoria API manually
    
EAGLE_MODIFIERS_MAP = {
    'eagle': 'eagle' ,
    'eagle doubleheaded': 'eagleTwoHeards',
}



In [34]:
labels = simple_automata.get_valid_labels_of(all_labels, 'per pale')
print(len(labels))
labels

0


[]