In [27]:
import json
import time
import os
import sklearn_crfsuite
import re
import nereval
import pandas as pd

from estnltk import Text
from estnltk.taggers import NerTagger
from estnltk.taggers import WordLevelNerTagger
from estnltk.converters import text_to_json
from estnltk.converters import json_to_text
from estnltk.layer_operations import flatten
from sklearn.metrics import classification_report
from estnltk.taggers import Retagger
from estnltk.taggers import CompoundTokenTagger
from sklearn_crfsuite import metrics

from nervaluate import Evaluator

from estnltk.taggers.estner.ner_trainer import NerTrainer
from estnltk.taggers.estner.model_storage_util import ModelStorageUtil
from estnltk.core import DEFAULT_PY3_NER_MODEL_DIR

In [28]:
def find(name, path):
    for root, dirs, files in os.walk(path):
        if name in files:
            return os.path.join(root, name)

In [29]:
class TokenSplitter( Retagger ):
    """Splits tokens into smaller tokens based on regular expression patterns.""" 
    conf_param = ['patterns', 'break_group_name']
    
    def __init__(self, patterns, break_group_name:str='end'):
        # Set input/output layers
        self.input_layers = ['tokens']
        self.output_layer = 'tokens'
        self.output_attributes = ()
        # Set other configuration parameters
        if not (isinstance(break_group_name, str) and len(break_group_name) > 0):
            raise TypeError('(!) break_group_name should be a non-empty string.')
        self.break_group_name = break_group_name
        # Assert that all patterns are regular expressions in the valid format
        if not isinstance(patterns, list):
            raise TypeError('(!) patterns should be a list of compiled regular expressions.')
        # TODO: we use an adhoc way to verify that patterns are regular expressions 
        #       because there seems to be no common way of doing it both in py35 
        #       and py36
        for pat in patterns:
            # Check for the existence of methods/attributes
            has_match   = callable(getattr(pat, "match", None))
            has_search  = callable(getattr(pat, "search", None))
            has_pattern = getattr(pat, "pattern", None) is not None
            for (k,v) in (('method match()',has_match),\
                          ('method search()',has_search),\
                          ('attribute pattern',has_pattern)):
                if v is False:
                    raise TypeError('(!) Unexpected regex pattern: {!r} is missing {}.'.format(pat, k))
            symbolic_groups = pat.groupindex
            if self.break_group_name not in symbolic_groups.keys():
                raise TypeError('(!) Pattern {!r} is missing symbolic group named {!r}.'.format(pat, self.break_group_name))
        self.patterns = patterns

    def _change_layer(self, text, layers, status):
        # Get changeble layer
        changeble_layer = layers[self.output_layer]
        # Iterate over tokens
        add_spans    = []
        remove_spans = []
        for span in changeble_layer:
            token_str = text.text[span.start:span.end]
            for pat in self.patterns:
                m = pat.search(token_str)
                if m:
                    break_group_end = m.end( self.break_group_name )
                    if break_group_end > -1 and \
                       break_group_end > 0  and \
                       break_group_end < len(token_str):
                        # Make the split
                        add_spans.append( (span.start, span.start+break_group_end) )
                        add_spans.append( (span.start+break_group_end, span.end) )
                        remove_spans.append( span )
                        # Once a token has been split, then break and move on to 
                        # the next token ...
                        break
        if add_spans:
            assert len(remove_spans) > 0
            for old_span in remove_spans:
                changeble_layer.remove_span( old_span )
            for new_span in add_spans:
                changeble_layer.add_annotation( new_span )

token_splitter = TokenSplitter(patterns=[re.compile(r'(?P<end>[A-ZÕÄÖÜ]{1}\w+)[A-ZÕÄÖÜ]{1}\w+'),\
                                         re.compile(r'(?P<end>Piebenomme)metsawaht'),\
                                         re.compile(r'(?P<end>maa)peal'),\
                                         re.compile(r'(?P<end>reppi)käest'),\
                                         re.compile(r'(?P<end>Kiidjerwelt)J'),\
                                         re.compile(r'(?P<end>Ameljanow)Persitski'),\
                                         re.compile(r'(?P<end>mõistmas)Mihkel'),\
                                         re.compile(r'(?P<end>tema)Käkk'),\
                                         re.compile(r'(?P<end>Ahjawalla)liikmed'),\
                                         re.compile(r'(?P<end>kohtumees)A'),\
                                         re.compile(r'(?P<end>Pechmann)x'),\
                                         re.compile(r'(?P<end>pölli)Anni'),\
                                         re.compile(r'(?P<end>külla)Rauba'),\
                                         re.compile(r'(?P<end>kohtowannem)Jaak'),\
                                         re.compile(r'(?P<end>rannast)Leno'),\
                                         re.compile(r'(?P<end>wallast)Kiiwita'),\
                                         re.compile(r'(?P<end>wallas)Kristjan'),\
                                         re.compile(r'(?P<end>Pedoson)rahul'),\
                                         re.compile(r'(?P<end>pere)Jaan'),\
                                         re.compile(r'(?P<end>kohtu)poolest'),\
                                         re.compile(r'(?P<end>Kurrista)kaudo'),\
                                         re.compile(r'(?P<end>mölder)Gottlieb'),\
                                         re.compile(r'(?P<end>wöörmündri)Jaan'),\
                                         re.compile(r'(?P<end>Oinas)ja'),\
                                         re.compile(r'(?P<end>ette)Leenu'),\
                                         re.compile(r'(?P<end>Tommingas)peab'),\
                                         re.compile(r'(?P<end>wäljaja)Kotlep'),\
                                         re.compile(r'(?P<end>pea)A'),\
                                         re.compile(r'(?P<end>talumees)Nikolai')])

These files don't work because the protocols are written in a different language, which the goldstandard didn't recognise, hence have no goldstandard tags.

In [30]:
files_not_working = ['J2rva_Tyri_V22tsa_id22177_1911a.json', \
                     'J2rva_Tyri_V22tsa_id18538_1894a.json', \
                     'J2rva_Tyri_V22tsa_id22155_1911a.json', \
                     'Saare_Kihelkonna_Kotlandi_id18845_1865a.json', \
                     'P2rnu_Halliste_Abja_id257_1844a.json', \
                     'Saare_Kaarma_Loona_id7575_1899a.json', \
                     'J2rva_Tyri_V22tsa_id22266_1913a.json', \
                     'J2rva_Tyri_V22tsa_id22178_1912a.json']

In [31]:
files = {}

with open('divided_corpus.txt', 'r', encoding = 'UTF-8') as f:
    txt = f.readlines()

for fileName in txt:
    file, subdistribution = fileName.split(":")
    files[file] = subdistribution.rstrip("\n")

In [42]:
all_results = {}

for subdistribution in [1, 2, 3, 4, 5]:
    training_subdistributions = []
    for y in [1, 2, 3, 4, 5]:
        if y == subdistribution:
            subdistribution_for_testing = y
        else:
            training_subdistributions.append(y)
    
    # Getting the filenames to be trained on from the files dictionary.
    filenames = {key: value for key, value in files.items() if int(value) in training_subdistributions}
    
    # Creating training_texts from the aforementioned filenames.
    print("Defineerin treenimistekstid.")
    start = time.time()
    training_texts = []
    for filename in filenames:
        with open('./vallakohtufailid_json/' + str(filename), 'r', encoding='UTF-8') as file:
            if filename in files_not_working:
                continue
            else:
                training_texts.append(json_to_text(file.read()).tag_layer(['sentences', 'morph_analysis']))
    print(f"Treenimistekstid defineeritud {time.time() - start} sekundiga.")
    # Setting up the trainer and training.
    print("\n\nAlustan nertaggeri treenimist.")
    start = time.time()
    model_dir=DEFAULT_PY3_NER_MODEL_DIR
    modelUtil = ModelStorageUtil(model_dir)
    nersettings = modelUtil.load_settings()
    trainer = NerTrainer(nersettings)
    trainer.train( training_texts, layer='gold_wordner', model_dir='test' )
    print(f"NerTagger treenitud {time.time() - start} sekundiga.")
    # Setting up the new trained nertagger and defining layers to be removed later on.
    nertagger = NerTagger(model_dir = 'test')
    removed_layers = ['sentences', 'morph_analysis', 'compound_tokens', 'ner', 'words', 'tokens']
    
    # Tagging the files using the new nertagger.
    print("\n\nAlustan failide taggimist.")
    start = time.time()
    for file in {key: value for key, value in files.items() if int(value) == subdistribution_for_testing}:
        with open(find(file.replace(".json", ".txt"), "./vallakohtufailid/"), 'r', encoding='UTF-8') as f:
            text = Text(f.read())
            if f == "Tartu_V6nnu_Ahja_id3502_1882a.txt":
                text = text.replace('..', '. .')

            text = text.tag_layer(['tokens'])
            token_splitter.retag(text)
            CompoundTokenTagger(tag_initials = False, tag_abbreviations = False, tag_hyphenations = False).tag(text)
            text.tag_layer('morph_analysis')

            nertagger.tag(text)
            text.add_layer(flatten(text['ner'], 'flat_ner'))

            for x in removed_layers:
                text.pop_layer(x)
            text_to_json(text, file=os.getcwd() + "/vallakohtufailid_nertagger/" + file)
            print(f'Täägitud fail {file}')
    print(f"Failid taggitud {time.time() - start} sekundiga.")
    
    # Chaning the tags into a readable formats for the evaluator.
    print("\n\nAlustan tulemuste ammutamist.")

    #gold = []
    #test = []
    gold_ner = []
    test_ner = []

    for file in {key: value for key, value in files.items() if int(value) == subdistribution_for_testing}:
        appendable_gold_ner = []
        appendable_test_ner = []

        if file.endswith(".json"):
            if file in files_not_working:
                continue
            else:
                with open("./vallakohtufailid_nertagger/" + str(file), 'r', encoding='UTF-8') as f_test, \
                    open("./vallakohtufailid_json_flat/" + str(file), 'r', encoding='UTF-8') as f_gold:
                        test_import = json_to_text(f_test.read())
                        gold_import = json_to_text(f_gold.read())

                        # The commented part is needed for word-level-ner.
                        '''
                        for i in range(len(gold_import['flat_gold_wordner'])):
                            tag = gold_import['flat_gold_wordner'][i].nertag[0]
                            gold.append(tag)
                        for i in range(len(test_import['flat_wordner'])):
                            tag = test_import['flat_wordner'][i].nertag[0]
                            test.append(tag)
                        '''
                        
                        for i in range(len(gold_import['gold_ner'])):
                            ner = gold_import['gold_ner'][i]
                            label = ner.nertag[0]
                            start = int(ner.start)
                            end = int(ner.end)
                            appendable_gold_ner.append({"label": label, "start": start, "end": end})

                        for i in range(len(test_import['flat_ner'])):
                            ner = test_import['flat_ner'][i]
                            label = ner.nertag[0]
                            start = int(ner.start)
                            end = int(ner.end)
                            appendable_test_ner.append({"label": label, "start": start, "end": end})

        gold_ner.append(appendable_gold_ner)
        test_ner.append(appendable_test_ner)

    evaluator = Evaluator(gold_ner, test_ner, tags=['ORG', 'PER', 'MISC', 'LOC', 'LOC_ORG'])
    results, results_per_tag = evaluator.evaluate()
    all_results[subdistribution_for_testing] = (results, results_per_tag)

Defineerin treenimistekstid.
Treenimistekstid defineeritud 245.80129384994507 sekundiga.


Alustan nertaggeri treenimist.
Feature generation
type: CRF1d
feature.minfreq: 0.000000
feature.possible_states: 0
feature.possible_transitions: 0
0....1....2....3....4....5....6....7....8....9....10
Number of features: 347967
Seconds required: 3.729

Stochastic Gradient Descent (SGD)
c2: 0.001000
max_iterations: 1000
period: 10
delta: 0.000001

Calibrating the learning rate (eta)
calibration.eta: 0.100000
calibration.rate: 2.000000
calibration.samples: 1000
calibration.candidates: 10
calibration.max_trials: 20
Initial loss: 34179.599218
Trial #1 (eta = 0.100000): 4280.545413
Trial #2 (eta = 0.200000): 8370.865299
Trial #3 (eta = 0.400000): 15645.778272
Trial #4 (eta = 0.800000): 35056.555895 (worse)
Trial #5 (eta = 0.050000): 3227.967920
Trial #6 (eta = 0.025000): 3023.813791
Trial #7 (eta = 0.012500): 3292.880865
Trial #8 (eta = 0.006250): 3864.657485
Trial #9 (eta = 0.003125): 4696.880808
Tria

***** Epoch #37 *****
Loss: 876.179032
Improvement ratio: 0.370136
Feature L2-norm: 86.758568
Learning rate (eta): 0.024954
Total number of feature updates: 600991
Seconds required for this iteration: 0.661

***** Epoch #38 *****
Loss: 846.719808
Improvement ratio: 0.369671
Feature L2-norm: 87.419388
Learning rate (eta): 0.024953
Total number of feature updates: 617234
Seconds required for this iteration: 0.668

***** Epoch #39 *****
Loss: 825.552916
Improvement ratio: 0.358062
Feature L2-norm: 88.062906
Learning rate (eta): 0.024951
Total number of feature updates: 633477
Seconds required for this iteration: 0.676

***** Epoch #40 *****
Loss: 805.805082
Improvement ratio: 0.342989
Feature L2-norm: 88.690588
Learning rate (eta): 0.024950
Total number of feature updates: 649720
Seconds required for this iteration: 0.676

***** Epoch #41 *****
Loss: 781.238285
Improvement ratio: 0.342561
Feature L2-norm: 89.302030
Learning rate (eta): 0.024949
Total number of feature updates: 665963
Seco

***** Epoch #78 *****
Loss: 423.951483
Improvement ratio: 0.141272
Feature L2-norm: 105.232807
Learning rate (eta): 0.024903
Total number of feature updates: 1266954
Seconds required for this iteration: 0.727

***** Epoch #79 *****
Loss: 421.382717
Improvement ratio: 0.128318
Feature L2-norm: 105.547205
Learning rate (eta): 0.024902
Total number of feature updates: 1283197
Seconds required for this iteration: 0.740

***** Epoch #80 *****
Loss: 421.337735
Improvement ratio: 0.128563
Feature L2-norm: 105.860378
Learning rate (eta): 0.024900
Total number of feature updates: 1299440
Seconds required for this iteration: 0.747

***** Epoch #81 *****
Loss: 417.056841
Improvement ratio: 0.119701
Feature L2-norm: 106.167455
Learning rate (eta): 0.024899
Total number of feature updates: 1315683
Seconds required for this iteration: 0.794

***** Epoch #82 *****
Loss: 412.592742
Improvement ratio: 0.116776
Feature L2-norm: 106.469560
Learning rate (eta): 0.024898
Total number of feature updates: 13

***** Epoch #117 *****
Loss: 301.332665
Improvement ratio: 0.091290
Feature L2-norm: 115.229092
Learning rate (eta): 0.024855
Total number of feature updates: 1900431
Seconds required for this iteration: 0.977

***** Epoch #118 *****
Loss: 310.350854
Improvement ratio: 0.035555
Feature L2-norm: 115.439314
Learning rate (eta): 0.024853
Total number of feature updates: 1916674
Seconds required for this iteration: 1.021

***** Epoch #119 *****
Loss: 304.385162
Improvement ratio: 0.062398
Feature L2-norm: 115.646478
Learning rate (eta): 0.024852
Total number of feature updates: 1932917
Seconds required for this iteration: 0.826

***** Epoch #120 *****
Loss: 300.774846
Improvement ratio: 0.053226
Feature L2-norm: 115.851673
Learning rate (eta): 0.024851
Total number of feature updates: 1949160
Seconds required for this iteration: 0.658

***** Epoch #121 *****
Loss: 298.265486
Improvement ratio: 0.093241
Feature L2-norm: 116.056481
Learning rate (eta): 0.024850
Total number of feature update

***** Epoch #159 *****
Loss: 237.074141
Improvement ratio: 0.077173
Feature L2-norm: 122.767113
Learning rate (eta): 0.024803
Total number of feature updates: 2582637
Seconds required for this iteration: 0.727

***** Epoch #160 *****
Loss: 234.482514
Improvement ratio: 0.081053
Feature L2-norm: 122.921691
Learning rate (eta): 0.024802
Total number of feature updates: 2598880
Seconds required for this iteration: 0.723

***** Epoch #161 *****
Loss: 240.911518
Improvement ratio: 0.029212
Feature L2-norm: 123.074062
Learning rate (eta): 0.024800
Total number of feature updates: 2615123
Seconds required for this iteration: 0.740

***** Epoch #162 *****
Loss: 241.055904
Improvement ratio: 0.013023
Feature L2-norm: 123.225352
Learning rate (eta): 0.024799
Total number of feature updates: 2631366
Seconds required for this iteration: 0.727

***** Epoch #163 *****
Loss: 241.120490
Improvement ratio: 0.041397
Feature L2-norm: 123.376020
Learning rate (eta): 0.024798
Total number of feature update

***** Epoch #199 *****
Loss: 207.727922
Improvement ratio: 0.043585
Feature L2-norm: 128.255940
Learning rate (eta): 0.024754
Total number of feature updates: 3232357
Seconds required for this iteration: 0.653

***** Epoch #200 *****
Loss: 209.684474
Improvement ratio: 0.036599
Feature L2-norm: 128.377746
Learning rate (eta): 0.024752
Total number of feature updates: 3248600
Seconds required for this iteration: 0.845

***** Epoch #201 *****
Loss: 204.273593
Improvement ratio: 0.050010
Feature L2-norm: 128.499067
Learning rate (eta): 0.024751
Total number of feature updates: 3264843
Seconds required for this iteration: 0.622

***** Epoch #202 *****
Loss: 210.806780
Improvement ratio: -0.001748
Feature L2-norm: 128.621753
Learning rate (eta): 0.024750
Total number of feature updates: 3281086
Seconds required for this iteration: 0.695

SGD terminated with the stopping criteria
Loss: 204.273593
Total seconds required for training: 149.352

Storing the model
Number of active features: 34796

Täägitud fail V6ru_Vastseliina_Misso_id7468_1885a.json
Täägitud fail Saare_K2rla_K2rla_id5736_1827a.json
Täägitud fail Tartu_Sangaste_Kuigatsi_id16414_1872a.json
Täägitud fail Harju_Kose_Kose-Uuem6isa_id2174_1867a.json
Täägitud fail Viljandi_P6ltsamaa_Pajusi_id2717_1871a.json
Täägitud fail Viljandi_P6ltsamaa_Vana-P6ltsamaa_id8104_1888a.json
Täägitud fail Viljandi_K6pu_Suure-K6pu_id7189_1884a.json
Täägitud fail Tartu_V6nnu_Kiidj2rve_id25125_1870a.json
Täägitud fail L22ne_Reigi_K6rgessaare_id22613_1892a.json
Täägitud fail J2rva_Peetri_V2ike-Kareda_id19114_1867a.json
Täägitud fail Harju_Hageri_Kohila_id22158_1890a.json
Täägitud fail P2rnu_P2rnu-Elisabethi_Sauga_id17814_1868a.json
Täägitud fail Harju_Kose_Palvere_id561_1867a.json
Täägitud fail V6ru_R2pina_R2pina_id21267_1867a.json
Täägitud fail J2rva_Tyri_V22tsa_id20541_1902a.json
Täägitud fail Tartu_V6nnu_Ahja_id21768_1867a.json
Täägitud fail Tartu_V6nnu_Ahja_id20314_1888a.json
Täägitud fail Tartu_R6ngu_Aakre_id4282_1888a.json
Täägitud fa

***** Epoch #5 *****
Loss: 3766.530408
Feature L2-norm: 59.706413
Learning rate (eta): 0.049975
Total number of feature updates: 81005
Seconds required for this iteration: 0.661

***** Epoch #6 *****
Loss: 3166.418693
Feature L2-norm: 63.612588
Learning rate (eta): 0.049970
Total number of feature updates: 97206
Seconds required for this iteration: 0.657

***** Epoch #7 *****
Loss: 2646.776785
Feature L2-norm: 66.946955
Learning rate (eta): 0.049965
Total number of feature updates: 113407
Seconds required for this iteration: 0.671

***** Epoch #8 *****
Loss: 2285.580615
Feature L2-norm: 69.844235
Learning rate (eta): 0.049960
Total number of feature updates: 129608
Seconds required for this iteration: 0.658

***** Epoch #9 *****
Loss: 2025.071437
Feature L2-norm: 72.522188
Learning rate (eta): 0.049955
Total number of feature updates: 145809
Seconds required for this iteration: 0.656

***** Epoch #10 *****
Loss: 1792.473416
Feature L2-norm: 74.935699
Learning rate (eta): 0.049950
Total

***** Epoch #48 *****
Loss: 375.720255
Improvement ratio: 0.261059
Feature L2-norm: 112.639018
Learning rate (eta): 0.049761
Total number of feature updates: 777648
Seconds required for this iteration: 0.688

***** Epoch #49 *****
Loss: 360.693753
Improvement ratio: 0.273884
Feature L2-norm: 113.143856
Learning rate (eta): 0.049756
Total number of feature updates: 793849
Seconds required for this iteration: 0.657

***** Epoch #50 *****
Loss: 375.903743
Improvement ratio: 0.197398
Feature L2-norm: 113.631267
Learning rate (eta): 0.049751
Total number of feature updates: 810050
Seconds required for this iteration: 0.670

***** Epoch #51 *****
Loss: 370.618645
Improvement ratio: 0.161148
Feature L2-norm: 114.110611
Learning rate (eta): 0.049746
Total number of feature updates: 826251
Seconds required for this iteration: 0.655

***** Epoch #52 *****
Loss: 364.718900
Improvement ratio: 0.182649
Feature L2-norm: 114.578103
Learning rate (eta): 0.049741
Total number of feature updates: 842452

***** Epoch #88 *****
Loss: 237.837849
Improvement ratio: 0.143643
Feature L2-norm: 127.347971
Learning rate (eta): 0.049564
Total number of feature updates: 1425688
Seconds required for this iteration: 0.668

***** Epoch #89 *****
Loss: 251.915158
Improvement ratio: 0.012582
Feature L2-norm: 127.619943
Learning rate (eta): 0.049559
Total number of feature updates: 1441889
Seconds required for this iteration: 0.659

***** Epoch #90 *****
Loss: 251.465885
Improvement ratio: 0.054406
Feature L2-norm: 127.885845
Learning rate (eta): 0.049554
Total number of feature updates: 1458090
Seconds required for this iteration: 0.665

***** Epoch #91 *****
Loss: 249.921154
Improvement ratio: 0.084411
Feature L2-norm: 128.152216
Learning rate (eta): 0.049549
Total number of feature updates: 1474291
Seconds required for this iteration: 0.671

***** Epoch #92 *****
Loss: 247.096731
Improvement ratio: 0.115131
Feature L2-norm: 128.415702
Learning rate (eta): 0.049544
Total number of feature updates: 14

Täägitud fail Tartu_Kodavere_Pala_id22108_1871a.json
Täägitud fail Tartu_Kambja_Haaslava_id8704_1867a.json
Täägitud fail Tartu_N6o_Aru_id306_1859a.json
Täägitud fail Tartu_Kodavere_Ranna_id15165_1864a.json
Täägitud fail Tartu_V6nnu_Ahja_id17984_1885a.json
Täägitud fail Tartu_Kodavere_Ranna_id14138_1855a.json
Täägitud fail L22ne_Kullamaa_Sooniste_id3541_1880a.json
Täägitud fail J2rva_Tyri_Tyri-Alliku_id2315_1897a.json
Täägitud fail J2rva_Tyri_S2revere_id11683_1874a.json
Täägitud fail Saare_Kaarma_Loona_id7575_1899a.json
Täägitud fail V6ru_P6lva_K2hri_id21590_1851a.json
Täägitud fail Tartu_V6nnu_Ahja_id16351_1884a.json
Täägitud fail Tartu_V6nnu_Ahja_id11361_1872a.json
Täägitud fail Tartu_V6nnu_Ahja_id16121_1883a.json
Täägitud fail Tartu_V6nnu_Ahja_id21444_1866a.json
Täägitud fail J2rva_Tyri_S2revere_id14702_1887a.json
Täägitud fail L22ne_Martna_Martna_id12705_1885a.json
Täägitud fail Tartu_Torma_Avinurme_id6291_1861a.json
Täägitud fail Harju_Kose_Palvere_id16297_1881a.json
Täägitud fail 

Täägitud fail Tartu_Torma_Avinurme_id20455_1871a.json
Täägitud fail L22ne_Martna_Martna_id12611_1884a.json
Täägitud fail Tartu_Maarja-Magdaleena_J6e_id15191_1864a.json
Täägitud fail P2rnu_Halliste_Pornuse_id4791_1869a.json
Täägitud fail Tartu_R6ngu_Aakre_id8042_1827a.json
Täägitud fail Tartu_V6nnu_Ahja_id13144_1876a.json
Täägitud fail Tartu_V6nnu_Ahja_id17542_1885a.json
Täägitud fail Tartu_Kodavere_Pala_id17298_1857a.json
Täägitud fail Tartu_Otep22_Pyhaj2rve_id1642_1884a.json
Täägitud fail Tartu_Kodavere_Alatskivi_id14538_1866a.json
Täägitud fail Tartu_Torma_Avinurme_id24645_1823a.json
Täägitud fail Harju_Keila_Keila_id11680_1886a.json
Täägitud fail Tartu_V6nnu_Ahja_id17059_1884a.json
Täägitud fail Harju_Hageri_Kohila_id10480_1870a.json
Täägitud fail V6ru_Kanepi_Krootuse_id24518_1885a.json
Täägitud fail Harju_Kose_Triigi_id11470_1871a.json
Täägitud fail Tartu_V6nnu_Ahja_id15395_1883a.json
Täägitud fail Tartu_V6nnu_Ahja_id15584_1883a.json
Täägitud fail Harju_J6el2htme_J6el2htme_id9507_1

***** Epoch #24 *****
Loss: 687.369882
Improvement ratio: 0.780349
Feature L2-norm: 95.854510
Learning rate (eta): 0.049880
Total number of feature updates: 388344
Seconds required for this iteration: 0.663

***** Epoch #25 *****
Loss: 661.526812
Improvement ratio: 0.729304
Feature L2-norm: 96.842262
Learning rate (eta): 0.049875
Total number of feature updates: 404525
Seconds required for this iteration: 0.670

***** Epoch #26 *****
Loss: 638.203230
Improvement ratio: 0.662172
Feature L2-norm: 97.790982
Learning rate (eta): 0.049870
Total number of feature updates: 420706
Seconds required for this iteration: 0.668

***** Epoch #27 *****
Loss: 615.435388
Improvement ratio: 0.623929
Feature L2-norm: 98.708761
Learning rate (eta): 0.049865
Total number of feature updates: 436887
Seconds required for this iteration: 0.661

***** Epoch #28 *****
Loss: 604.807922
Improvement ratio: 0.530931
Feature L2-norm: 99.575281
Learning rate (eta): 0.049860
Total number of feature updates: 453068
Seco

***** Epoch #64 *****
Loss: 306.921658
Improvement ratio: 0.095682
Feature L2-norm: 119.546036
Learning rate (eta): 0.049682
Total number of feature updates: 1035584
Seconds required for this iteration: 1.035

***** Epoch #65 *****
Loss: 303.565749
Improvement ratio: 0.057362
Feature L2-norm: 119.914306
Learning rate (eta): 0.049677
Total number of feature updates: 1051765
Seconds required for this iteration: 0.705

***** Epoch #66 *****
Loss: 298.880662
Improvement ratio: 0.176864
Feature L2-norm: 120.282042
Learning rate (eta): 0.049672
Total number of feature updates: 1067946
Seconds required for this iteration: 0.721

***** Epoch #67 *****
Loss: 294.713755
Improvement ratio: 0.079957
Feature L2-norm: 120.649901
Learning rate (eta): 0.049667
Total number of feature updates: 1084127
Seconds required for this iteration: 0.694

***** Epoch #68 *****
Loss: 292.742072
Improvement ratio: 0.082737
Feature L2-norm: 121.000745
Learning rate (eta): 0.049662
Total number of feature updates: 11

Täägitud fail V6ru_Kanepi_Krootuse_id25466_1885a.json
Täägitud fail Tartu_V6nnu_Kiidj2rve_id24772_1867a.json
Täägitud fail J2rva_Tyri_S2revere_id8223_1885a.json
Täägitud fail Tartu_Laiuse_Kivij2rve_id5885_1864a.json
Täägitud fail Harju_Kose_Palvere_id25266_1873a.json
Täägitud fail Tartu_Torma_Avinurme_id6491_1889a.json
Täägitud fail Harju_Juuru_Kaiu_id9068_1881a.json
Täägitud fail L22ne_Vormsi_Vormsi_id14916_1888a.json
Täägitud fail J2rva_Tyri_Kirna_id23407_1872a.json
Täägitud fail J2rva_Peetri_V2ike-Kareda_id20027_1872a.json
Täägitud fail Tartu_Kodavere_Pala_id18165_1862a.json
Täägitud fail Tartu_V6nnu_Ahja_id14178_1882a.json
Täägitud fail Tartu_V6nnu_Ahja_id13959_1882a.json
Täägitud fail Viru_V2ike-Maarja_Porkuni_id11941_1878a.json
Täägitud fail Tartu_Kodavere_Pala_id17750_1861a.json
Täägitud fail V6ru_R2pina_R2pina_id12544_1866a.json
Täägitud fail Tartu_Kodavere_Alatskivi_id9807_1879a.json
Täägitud fail Tartu_V6nnu_Ahja_id19012_1887a.json
Täägitud fail Viljandi_K6pu_Suure-K6pu_id131

Täägitud fail Tartu_V6nnu_Ahja_id21776_1868a.json
Täägitud fail Tartu_V6nnu_Ahja_id14675_1882a.json
Täägitud fail Tartu_V6nnu_Ahja_id21777_1868a.json
Täägitud fail Harju_Rapla_Rapla_id17272_1868a.json
Täägitud fail Harju_Kuusalu_Kolga_id11722_1887a.json
Täägitud fail Tartu_V6nnu_Ahja_id17574_1885a.json
Täägitud fail J2rva_Tyri_Kirna_id23791_1874a.json
Täägitud fail Harju_Juuru_Kaiu_id16276_1873a.json
Täägitud fail V6ru_P6lva_Kiuma_id7579_1880a.json
Täägitud fail V6ru_R2pina_Kahkva_id8809_1888a.json
Täägitud fail Viljandi_K6pu_Suure-K6pu_id12428_1884a.json
Täägitud fail V6ru_R6uge_Saaluse_id11377_1879a.json
Täägitud fail Viljandi_K6pu_Suure-K6pu_id7202_1884a.json
Täägitud fail Viljandi_P6ltsamaa_Uue-P6ltsamaa_id11984_1856a.json
Täägitud fail Tartu_V6nnu_Ahja_id14978_1883a.json
Täägitud fail J2rva_Tyri_V22tsa_id22177_1911a.json
Täägitud fail L22ne_Kullamaa_Piirsalu_id12912_1895a.json
Täägitud fail J2rva_Tyri_V22tsa_id19055_1898a.json
Täägitud fail Harju_Kose_Palvere_id13987_1880a.json
Tä

***** Epoch #14 *****
Loss: 1144.464608
Improvement ratio: 2.915520
Feature L2-norm: 80.985580
Learning rate (eta): 0.049930
Total number of feature updates: 214830
Seconds required for this iteration: 0.652

***** Epoch #15 *****
Loss: 1058.793567
Improvement ratio: 2.338032
Feature L2-norm: 82.546938
Learning rate (eta): 0.049925
Total number of feature updates: 230175
Seconds required for this iteration: 0.651

***** Epoch #16 *****
Loss: 996.151519
Improvement ratio: 1.908475
Feature L2-norm: 84.037795
Learning rate (eta): 0.049920
Total number of feature updates: 245520
Seconds required for this iteration: 0.666

***** Epoch #17 *****
Loss: 927.061781
Improvement ratio: 1.628360
Feature L2-norm: 85.429911
Learning rate (eta): 0.049915
Total number of feature updates: 260865
Seconds required for this iteration: 0.653

***** Epoch #18 *****
Loss: 875.236527
Improvement ratio: 1.384470
Feature L2-norm: 86.745769
Learning rate (eta): 0.049910
Total number of feature updates: 276210
Se

***** Epoch #56 *****
Loss: 306.044011
Improvement ratio: 0.175193
Feature L2-norm: 113.130273
Learning rate (eta): 0.049722
Total number of feature updates: 859320
Seconds required for this iteration: 0.673

***** Epoch #57 *****
Loss: 303.735660
Improvement ratio: 0.171732
Feature L2-norm: 113.545524
Learning rate (eta): 0.049717
Total number of feature updates: 874665
Seconds required for this iteration: 0.678

***** Epoch #58 *****
Loss: 299.936566
Improvement ratio: 0.155120
Feature L2-norm: 113.952007
Learning rate (eta): 0.049712
Total number of feature updates: 890010
Seconds required for this iteration: 0.649

***** Epoch #59 *****
Loss: 295.492708
Improvement ratio: 0.147650
Feature L2-norm: 114.349813
Learning rate (eta): 0.049707
Total number of feature updates: 905355
Seconds required for this iteration: 0.659

***** Epoch #60 *****
Loss: 289.500988
Improvement ratio: 0.163860
Feature L2-norm: 114.743433
Learning rate (eta): 0.049702
Total number of feature updates: 920700

***** Epoch #96 *****
Loss: 210.598952
Improvement ratio: 0.076923
Feature L2-norm: 125.728953
Learning rate (eta): 0.049525
Total number of feature updates: 1473120
Seconds required for this iteration: 0.659

***** Epoch #97 *****
Loss: 211.101982
Improvement ratio: 0.056441
Feature L2-norm: 125.970079
Learning rate (eta): 0.049520
Total number of feature updates: 1488465
Seconds required for this iteration: 0.649

***** Epoch #98 *****
Loss: 209.819893
Improvement ratio: 0.069955
Feature L2-norm: 126.207001
Learning rate (eta): 0.049515
Total number of feature updates: 1503810
Seconds required for this iteration: 0.663

***** Epoch #99 *****
Loss: 203.144573
Improvement ratio: 0.092982
Feature L2-norm: 126.444502
Learning rate (eta): 0.049510
Total number of feature updates: 1519155
Seconds required for this iteration: 0.650

***** Epoch #100 *****
Loss: 203.837971
Improvement ratio: 0.072914
Feature L2-norm: 126.677624
Learning rate (eta): 0.049505
Total number of feature updates: 1

***** Epoch #136 *****
Loss: 171.961312
Improvement ratio: 0.035631
Feature L2-norm: 133.844554
Learning rate (eta): 0.049329
Total number of feature updates: 2086920
Seconds required for this iteration: 0.649

***** Epoch #137 *****
Loss: 170.448075
Improvement ratio: 0.039338
Feature L2-norm: 134.014485
Learning rate (eta): 0.049324
Total number of feature updates: 2102265
Seconds required for this iteration: 0.655

***** Epoch #138 *****
Loss: 168.849091
Improvement ratio: 0.044594
Feature L2-norm: 134.185346
Learning rate (eta): 0.049319
Total number of feature updates: 2117610
Seconds required for this iteration: 0.656

***** Epoch #139 *****
Loss: 169.511166
Improvement ratio: 0.036755
Feature L2-norm: 134.353552
Learning rate (eta): 0.049315
Total number of feature updates: 2132955
Seconds required for this iteration: 0.682

***** Epoch #140 *****
Loss: 168.931318
Improvement ratio: 0.049025
Feature L2-norm: 134.519381
Learning rate (eta): 0.049310
Total number of feature update

Täägitud fail Tartu_Otep22_Pyhaj2rve_id1280_1884a.json
Täägitud fail Saare_Kihelkonna_Lymanda_id8401_1834a.json
Täägitud fail V6ru_R2pina_Kahkva_id10870_1888a.json
Täägitud fail L22ne_Kullamaa_Piirsalu_id14393_1908a.json
Täägitud fail Tartu_V6nnu_Ahja_id9602_1871a.json
Täägitud fail Tartu_V6nnu_Ahja_id13567_1881a.json
Täägitud fail Tartu_Kodavere_Alatskivi_id22106_1881a.json
Täägitud fail L22ne_Martna_Martna_id12249_1881a.json
Täägitud fail Harju_Hageri_Kohila_id1346_1888a.json
Täägitud fail Viljandi_P6ltsamaa_Uue-P6ltsamaa_id12394_1856a.json
Täägitud fail Harju_Hageri_Kohila_id5466_1889a.json
Täägitud fail Harju_Kose_Kose-Uuem6isa_id1725_1867a.json
Täägitud fail Tartu_Torma_Avinurme_id23583_1872a.json
Täägitud fail J2rva_Tyri_V22tsa_id17884_1888a.json
Täägitud fail J2rva_Tyri_S2revere_id16082_1889a.json
Täägitud fail V6ru_Vastseliina_Misso_id17348_1882a.json
Täägitud fail P2rnu_Audru_V6lla_id6632_1878a.json
Täägitud fail P2rnu_Audru_V6lla_id5372_1878a.json
Täägitud fail Tartu_V6nnu_Ah

Täägitud fail V6ru_Vastseliina_Misso_id13301_1880a.json
Täägitud fail L22ne_Emmaste_Emmaste_id15690_1896a.json
Täägitud fail Tartu_V6nnu_Ahja_id14973_1883a.json
Täägitud fail Tartu_V6nnu_Ahja_id21238_1889a.json
Täägitud fail Tartu_V6nnu_Ahja_id9656_1871a.json
Täägitud fail Saare_Kaarma_Loona_id1233_1912a.json
Täägitud fail Tartu_Torma_Avinurme_id21307_1872a.json
Täägitud fail V6ru_Vastseliina_Misso_id20674_1882a.json
Täägitud fail Harju_Jyri_Rae_id3658_1888a.json
Täägitud fail J2rva_Tyri_S2revere_id16041_1889a.json
Täägitud fail Tartu_V6nnu_Ahja_id21350_1889a.json
Täägitud fail Harju_Kose_Palvere_id22755_1886a.json
Täägitud fail Harju_J6el2htme_J6el2htme_id7359_1868a.json
Täägitud fail J2rva_Tyri_Vahastu_id16427_1883a.json
Täägitud fail J2rva_Tyri_S2revere_id5632_1881a.json
Täägitud fail V6ru_Vastseliina_Misso_id11636_1886a.json
Täägitud fail Tartu_V6nnu_Ahja_id19717_1888a.json
Täägitud fail Harju_Hageri_Kohila_id7391_1868a.json
Täägitud fail L22ne_Kullamaa_Sooniste_id3686_1883a.json
T

***** Epoch #15 *****
Loss: 2176.142842
Improvement ratio: 1.552380
Feature L2-norm: 65.022381
Learning rate (eta): 0.024981
Total number of feature updates: 234210
Seconds required for this iteration: 0.774

***** Epoch #16 *****
Loss: 2043.948692
Improvement ratio: 1.357639
Feature L2-norm: 66.458869
Learning rate (eta): 0.024980
Total number of feature updates: 249824
Seconds required for this iteration: 0.641

***** Epoch #17 *****
Loss: 1923.793349
Improvement ratio: 1.225887
Feature L2-norm: 67.831521
Learning rate (eta): 0.024979
Total number of feature updates: 265438
Seconds required for this iteration: 0.646

***** Epoch #18 *****
Loss: 1838.758480
Improvement ratio: 1.086247
Feature L2-norm: 69.150296
Learning rate (eta): 0.024978
Total number of feature updates: 281052
Seconds required for this iteration: 0.753

***** Epoch #19 *****
Loss: 1732.619449
Improvement ratio: 0.996669
Feature L2-norm: 70.400323
Learning rate (eta): 0.024976
Total number of feature updates: 296666

***** Epoch #57 *****
Loss: 578.808992
Improvement ratio: 0.184390
Feature L2-norm: 97.100706
Learning rate (eta): 0.024929
Total number of feature updates: 889998
Seconds required for this iteration: 0.704

***** Epoch #58 *****
Loss: 569.850625
Improvement ratio: 0.186897
Feature L2-norm: 97.530595
Learning rate (eta): 0.024928
Total number of feature updates: 905612
Seconds required for this iteration: 0.728

***** Epoch #59 *****
Loss: 552.008646
Improvement ratio: 0.204395
Feature L2-norm: 97.950311
Learning rate (eta): 0.024926
Total number of feature updates: 921226
Seconds required for this iteration: 0.799

***** Epoch #60 *****
Loss: 545.405551
Improvement ratio: 0.196173
Feature L2-norm: 98.367105
Learning rate (eta): 0.024925
Total number of feature updates: 936840
Seconds required for this iteration: 0.780

***** Epoch #61 *****
Loss: 548.577535
Improvement ratio: 0.175557
Feature L2-norm: 98.777225
Learning rate (eta): 0.024924
Total number of feature updates: 952454
Seco

***** Epoch #99 *****
Loss: 357.123972
Improvement ratio: 0.087272
Feature L2-norm: 110.741665
Learning rate (eta): 0.024877
Total number of feature updates: 1545786
Seconds required for this iteration: 0.707

***** Epoch #100 *****
Loss: 352.440404
Improvement ratio: 0.078156
Feature L2-norm: 110.989852
Learning rate (eta): 0.024876
Total number of feature updates: 1561400
Seconds required for this iteration: 0.864

***** Epoch #101 *****
Loss: 349.877969
Improvement ratio: 0.100137
Feature L2-norm: 111.234204
Learning rate (eta): 0.024874
Total number of feature updates: 1577014
Seconds required for this iteration: 0.799

***** Epoch #102 *****
Loss: 348.656517
Improvement ratio: 0.085407
Feature L2-norm: 111.476315
Learning rate (eta): 0.024873
Total number of feature updates: 1592628
Seconds required for this iteration: 0.815

***** Epoch #103 *****
Loss: 345.549925
Improvement ratio: 0.083238
Feature L2-norm: 111.715764
Learning rate (eta): 0.024872
Total number of feature updates

***** Epoch #140 *****
Loss: 272.114136
Improvement ratio: 0.040894
Feature L2-norm: 119.260687
Learning rate (eta): 0.024826
Total number of feature updates: 2185960
Seconds required for this iteration: 0.784

***** Epoch #141 *****
Loss: 269.695221
Improvement ratio: 0.082004
Feature L2-norm: 119.435616
Learning rate (eta): 0.024825
Total number of feature updates: 2201574
Seconds required for this iteration: 0.997

***** Epoch #142 *****
Loss: 270.391651
Improvement ratio: 0.029834
Feature L2-norm: 119.609194
Learning rate (eta): 0.024824
Total number of feature updates: 2217188
Seconds required for this iteration: 0.685

***** Epoch #143 *****
Loss: 259.533449
Improvement ratio: 0.064267
Feature L2-norm: 119.781303
Learning rate (eta): 0.024823
Total number of feature updates: 2232802
Seconds required for this iteration: 0.665

***** Epoch #144 *****
Loss: 260.640783
Improvement ratio: 0.080302
Feature L2-norm: 119.952811
Learning rate (eta): 0.024821
Total number of feature update

Täägitud fail L22ne_Ridala_Sinalepa_id25437_1889a.json
Täägitud fail J2rva_Tyri_Tyri-Alliku_id3082_1900a.json
Täägitud fail V6ru_R6uge_Saaluse_id10962_1879a.json
Täägitud fail V6ru_Vastseliina_Misso_id22085_1883a.json
Täägitud fail Tartu_V6nnu_Ahja_id13957_1882a.json
Täägitud fail Harju_Kose_Palvere_id16729_1881a.json
Täägitud fail Viljandi_K6pu_Suure-K6pu_id4665_1883a.json
Täägitud fail P2rnu_Tori_Tori_id25337_1890a.json
Täägitud fail Tartu_V6nnu_Ahja_id22380_1869a.json
Täägitud fail Harju_Kose_Triigi_id9421_1867a.json
Täägitud fail Tartu_Kodavere_Ranna_id15035_1862a.json
Täägitud fail Harju_Kose_Triigi_id9938_1870a.json
Täägitud fail Viru_Rakvere_S6meru_id5673_1882a.json
Täägitud fail Tartu_Otep22_Pyhaj2rve_id1540_1884a.json
Täägitud fail J2rva_Tyri_V22tsa_id22178_1912a.json
Täägitud fail Harju_Juuru_Kaiu_id9874_1882a.json
Täägitud fail Tartu_Otep22_Pyhaj2rve_id7840_1885a.json
Täägitud fail V6ru_R2pina_Kahkva_id8869_1888a.json
Täägitud fail Tartu_R6ngu_Aakre_id9106_1827a.json
Täägitu

Täägitud fail Harju_Rapla_Rapla_id18671_1869a.json
Täägitud fail Tartu_V6nnu_Ahja_id18214_1886a.json
Täägitud fail Tartu_Laiuse_Kivij2rve_id1436_1856a.json
Täägitud fail Tartu_V6nnu_Ahja_id22561_1878a.json
Täägitud fail Tartu_Kodavere_Ranna_id14286_1858a.json
Täägitud fail P2rnu_Halliste_Penuja_id758_1885a.json
Täägitud fail Saare_Kihelkonna_Atla_id6893_1872a.json
Täägitud fail Harju_Hageri_Kohila_id10769_1873a.json
Täägitud fail Tartu_V6nnu_Ahja_id16318_1883a.json
Täägitud fail J2rva_Anna_Eivere_id6239_1879a.json
Täägitud fail Tartu_Kodavere_Alatskivi_id12871_1876a.json
Täägitud fail J2rva_Tyri_V22tsa_id16931_1886a.json
Täägitud fail Harju_Harju-Madise_Padise-Kloostri_id2778_1867a.json
Täägitud fail Viljandi_K6pu_Suure-K6pu_id4977_1883a.json
Täägitud fail J2rva_Tyri_S2revere_id8673_1886a.json
Täägitud fail V6ru_R2pina_Kahkva_id14118_1889a.json
Täägitud fail L22ne_Pyhalepa_K2rdla_id22309_1867a.json
Täägitud fail L22ne_Pyhalepa_K2rdla_id25096_1877a.json
Täägitud fail Tartu_Kodavere_Alat