In [1]:
import json
import time
import os
import re

from modules.preprocessing_protocols import preprocess_text
from modules.extract_results import extract_results_to_txt_file, display_results_by_subdistribution,\
                                    display_results_by_named_entity, display_confusion_matrix
from contemporary_ner_training.conll_ner_importer import conll_to_ner_labelling  
from estnltk import Text
from estnltk.taggers import NerTagger
from estnltk.taggers import WordLevelNerTagger
from estnltk.converters import text_to_json
from estnltk.converters import json_to_text
from estnltk.layer_operations import flatten

from estnltk.taggers.estner.ner_trainer import NerTrainer
from estnltk.taggers.estner.model_storage_util import ModelStorageUtil

from estnltk.taggers import VabamorfCorpusTagger
vm_corpus_tagger = VabamorfCorpusTagger()

In [2]:
def find(name, path):
    for root, dirs, files in os.walk(path):
        if name in files:
            return os.path.join(root, name)

CONLL NER test:

In [3]:
'''
texts = []
for file in os.listdir('contemporary_ner_training'):
    if file.startswith('estner_split_1'):
        texts.append(conll_to_ner_labelling(os.path.join('contemporary_ner_training', file), 'gold_wordner'))               
'''

"\ntexts = []\nfor file in os.listdir('contemporary_ner_training'):\n    if file.startswith('estner_split_1'):\n        texts.append(conll_to_ner_labelling(os.path.join('contemporary_ner_training', file), 'gold_wordner'))               \n"

Flags:

In [4]:
use_vabamorfcorpustagger = False

---

In [5]:
files = {}

with open(os.path.join('..', 'data', 'divided_corpus.txt'), 'r', encoding = 'UTF-8') as f:
    txt = f.readlines()

for fileName in txt:
    file, subdistribution = fileName.split(":")
    files[file] = subdistribution.rstrip("\n")

In [6]:
def create_training_texts(filenames):
    print("Valmistan ette treenimistekste")
    
    # These files don't work because the protocols are written in a different language,
    # which the goldstandard didn't recognise, hence have no goldstandard tags.
    files_not_working = ['J2rva_Tyri_V22tsa_id22177_1911a.json', \
                     'J2rva_Tyri_V22tsa_id18538_1894a.json', \
                     'J2rva_Tyri_V22tsa_id22155_1911a.json', \
                     'Saare_Kihelkonna_Kotlandi_id18845_1865a.json', \
                     'P2rnu_Halliste_Abja_id257_1844a.json', \
                     'Saare_Kaarma_Loona_id7575_1899a.json', \
                     'J2rva_Tyri_V22tsa_id22266_1913a.json', \
                     'J2rva_Tyri_V22tsa_id22178_1912a.json']
    
    start = time.time()
    training_texts = []
    for filename in filenames:
        with open(os.path.join('..', 'data', 'vallakohtufailid-json-flattened', filename), 'r', encoding='UTF-8') as file:
            if filename in files_not_working:
                continue
            else:
                tagged_text = preprocess_text(json_to_text(file.read()))
                if use_vabamorfcorpustagger:
                    tagged_text.pop_layer('morph_analysis')
                    vm_corpus_tagger.tag([tagged_text])
                training_texts.append(tagged_text)
    print(f"Treenimistekstid ette valmistatud {time.time() - start} sekundiga.")
    return training_texts

In [7]:
def train_nertagger(training_texts, new_model_dir):
    print("Alustan NerTaggeri treenimist.")
    start = time.time()
    
    modelUtil = ModelStorageUtil( new_model_dir )
    nersettings = modelUtil.load_settings()
    trainer = NerTrainer(nersettings)
    trainer.train( training_texts, layer='gold_wordner', model_dir=new_model_dir )
    print(f"NerTagger treenitud {time.time() - start} sekundiga.")

In [8]:
def get_testing_and_training_subdistribution(subdistribution):
    training = []
    for y in [1, 2, 3, 4, 5]:
        if y == subdistribution:
            testing = y
        else:
            training.append(y)
    return testing, training

In [9]:
def tag_files(model_dir, testing_files, nertagger, use_vabamorfcorpustagger):
    removed_layers = ['sentences', 'morph_analysis', 'compound_tokens', 'ner', 'words', 'tokens']
    
    print("\n\nAlustan failide märgendamist.")
    start = time.time()
    
    for test_file in testing_files:
        with open(find(test_file.replace(".json", ".txt"), os.path.join('..', 'data', 'vallakohtufailid')), 'r', encoding='UTF-8') as f:
            text = f.read()
        
        if test_file == "Tartu_V6nnu_Ahja_id3502_1882a.json":
            text = text.replace('..', '. .')
        text = preprocess_text(Text(text))

        if use_vabamorfcorpustagger:
            text.pop_layer('morph_analysis')
            text = [text]
            vm_corpus_tagger.tag( text )
            text = text[0]
        nertagger.tag(text)
        text.add_layer(flatten(text['ner'], 'flat_ner'))

        for x in removed_layers:
            text.pop_layer(x)

        path = os.path.join(model_dir, 'vallakohtufailid-trained-nertagger')
        if not os.path.exists(path):
            os.mkdir(path)
            
        text_to_json(text, file=os.path.join(model_dir, 'vallakohtufailid-trained-nertagger', test_file))

            
        print(f'Märgendatud fail {test_file}')
    print(f"Failid märgendatud {time.time() - start} sekundiga.")

In [10]:
def train_model(model_directory):
    for subdistribution in [1, 2, 3, 4, 5]:
        testing, training = get_testing_and_training_subdistribution(subdistribution)
        
        # Get the filenames to be trained on from the files dictionary
        filenames = [key for key, value in files.items() if int(value) in training]

        # Create training_texts from the aforementioned filenames
        training_texts = create_training_texts(filenames)
        
        # Set up the trainer and training
        new_model_dir = os.path.join('models', model_directory)
        train_nertagger(training_texts, new_model_dir)
        
        # Set up the new trained nertagger and defining layers to be removed later on
        tagger = NerTagger(model_dir = new_model_dir)
        
        # Tag the files using the new nertagger
        testing_files = [key for key, value in files.items() if int(value) == testing]
        tag_files(new_model_dir, testing_files, tagger, use_vabamorfcorpustagger)
            
    # Get results of model
    extract_results_to_txt_file(model_directory, files)
    
    print(f"Mudel {model_directory} treenitud.")

To train the model the `model_directory` must contain a `settings.py` file

In [12]:
train_model(os.path.join('model_morph_with_lemmas_and_sentences_and_gazetteer_and_global_features','model_gazetteer_both_new'))

Valmistan ette treenimistekste
Treenimistekstid ette valmistatud 103.38949155807495 sekundiga.
Alustan NerTaggeri treenimist.
Feature generation
type: CRF1d
feature.minfreq: 0.000000
feature.possible_states: 0
feature.possible_transitions: 0
0....1....2....3....4....5....6....7....8....9....10
Number of features: 347398
Seconds required: 1.274

Stochastic Gradient Descent (SGD)
c2: 0.001000
max_iterations: 1000
period: 10
delta: 0.000001

Calibrating the learning rate (eta)
calibration.eta: 0.100000
calibration.rate: 2.000000
calibration.samples: 1000
calibration.candidates: 10
calibration.max_trials: 20
Initial loss: 30999.990087
Trial #1 (eta = 0.100000): 4716.521098
Trial #2 (eta = 0.200000): 7963.921876
Trial #3 (eta = 0.400000): 14993.010023
Trial #4 (eta = 0.800000): 28508.454329
Trial #5 (eta = 1.600000): 45844.605022 (worse)
Trial #6 (eta = 0.050000): 3520.310148
Trial #7 (eta = 0.025000): 3252.350917
Trial #8 (eta = 0.012500): 3430.074467
Trial #9 (eta = 0.006250): 3925.276942

***** Epoch #39 *****
Loss: 859.714826
Improvement ratio: 0.353073
Feature L2-norm: 90.092984
Learning rate (eta): 0.024951
Total number of feature updates: 633477
Seconds required for this iteration: 0.459

***** Epoch #40 *****
Loss: 833.842091
Improvement ratio: 0.346102
Feature L2-norm: 90.728224
Learning rate (eta): 0.024950
Total number of feature updates: 649720
Seconds required for this iteration: 0.457

***** Epoch #41 *****
Loss: 821.161730
Improvement ratio: 0.319791
Feature L2-norm: 91.349373
Learning rate (eta): 0.024949
Total number of feature updates: 665963
Seconds required for this iteration: 0.459

***** Epoch #42 *****
Loss: 801.995280
Improvement ratio: 0.307888
Feature L2-norm: 91.953071
Learning rate (eta): 0.024948
Total number of feature updates: 682206
Seconds required for this iteration: 0.466

***** Epoch #43 *****
Loss: 783.237582
Improvement ratio: 0.292873
Feature L2-norm: 92.545692
Learning rate (eta): 0.024946
Total number of feature updates: 698449
Seco

***** Epoch #79 *****
Loss: 439.614803
Improvement ratio: 0.132970
Feature L2-norm: 107.867610
Learning rate (eta): 0.024902
Total number of feature updates: 1283197
Seconds required for this iteration: 0.465

***** Epoch #80 *****
Loss: 435.967560
Improvement ratio: 0.124870
Feature L2-norm: 108.186565
Learning rate (eta): 0.024900
Total number of feature updates: 1299440
Seconds required for this iteration: 0.460

***** Epoch #81 *****
Loss: 429.547547
Improvement ratio: 0.136596
Feature L2-norm: 108.500771
Learning rate (eta): 0.024899
Total number of feature updates: 1315683
Seconds required for this iteration: 0.486

***** Epoch #82 *****
Loss: 430.701944
Improvement ratio: 0.105934
Feature L2-norm: 108.811192
Learning rate (eta): 0.024898
Total number of feature updates: 1331926
Seconds required for this iteration: 0.604

***** Epoch #83 *****
Loss: 419.802564
Improvement ratio: 0.144872
Feature L2-norm: 109.115654
Learning rate (eta): 0.024897
Total number of feature updates: 13

***** Epoch #118 *****
Loss: 317.807807
Improvement ratio: 0.062578
Feature L2-norm: 117.984569
Learning rate (eta): 0.024853
Total number of feature updates: 1916674
Seconds required for this iteration: 0.466

***** Epoch #119 *****
Loss: 319.607057
Improvement ratio: 0.048914
Feature L2-norm: 118.197710
Learning rate (eta): 0.024852
Total number of feature updates: 1932917
Seconds required for this iteration: 0.457

***** Epoch #120 *****
Loss: 311.108665
Improvement ratio: 0.077410
Feature L2-norm: 118.408446
Learning rate (eta): 0.024851
Total number of feature updates: 1949160
Seconds required for this iteration: 0.459

***** Epoch #121 *****
Loss: 310.941548
Improvement ratio: 0.058787
Feature L2-norm: 118.615924
Learning rate (eta): 0.024850
Total number of feature updates: 1965403
Seconds required for this iteration: 0.456

***** Epoch #122 *****
Loss: 309.072144
Improvement ratio: 0.076467
Feature L2-norm: 118.821965
Learning rate (eta): 0.024848
Total number of feature update

***** Epoch #161 *****
Loss: 254.716761
Improvement ratio: 0.014883
Feature L2-norm: 125.788219
Learning rate (eta): 0.024800
Total number of feature updates: 2615123
Seconds required for this iteration: 0.462

***** Epoch #162 *****
Loss: 247.278871
Improvement ratio: 0.057667
Feature L2-norm: 125.943913
Learning rate (eta): 0.024799
Total number of feature updates: 2631366
Seconds required for this iteration: 0.458

***** Epoch #163 *****
Loss: 246.697721
Improvement ratio: 0.050148
Feature L2-norm: 126.099089
Learning rate (eta): 0.024798
Total number of feature updates: 2647609
Seconds required for this iteration: 0.466

***** Epoch #164 *****
Loss: 248.791159
Improvement ratio: 0.039745
Feature L2-norm: 126.252147
Learning rate (eta): 0.024797
Total number of feature updates: 2663852
Seconds required for this iteration: 0.457

***** Epoch #165 *****
Loss: 245.908198
Improvement ratio: 0.046175
Feature L2-norm: 126.405010
Learning rate (eta): 0.024795
Total number of feature update

***** Epoch #203 *****
Loss: 219.380222
Improvement ratio: -0.001774
Feature L2-norm: 131.589555
Learning rate (eta): 0.024749
Total number of feature updates: 3297329
Seconds required for this iteration: 0.457

SGD terminated with the stopping criteria
Loss: 207.374654
Total seconds required for training: 95.304

Storing the model
Number of active features: 347398 (347398)
Number of active attributes: 314124 (314124)
Number of active labels: 11 (11)
Writing labels
Writing attributes
Writing feature references for transitions
Writing feature references for attributes
Seconds required: 0.577

NerTagger treenitud 712.3346600532532 sekundiga.


Alustan failide märgendamist.
Märgendatud fail V6ru_R2pina_Kahkva_id24674_1868a.json
Märgendatud fail L22ne_Martna_Martna_id14205_1869a.json
Märgendatud fail Harju_Juuru_Juuru_id19451_1886a.json
Märgendatud fail Tartu_Kodavere_Ranna_id11316_1845a.json
Märgendatud fail J2rva_Peetri_V2ike-Kareda_id22448_1881a.json
Märgendatud fail L22ne_Vormsi_Vormsi

Märgendatud fail Viljandi_P6ltsamaa_Pajusi_id2717_1871a.json
Märgendatud fail Viljandi_P6ltsamaa_Vana-P6ltsamaa_id8104_1888a.json
Märgendatud fail Viljandi_K6pu_Suure-K6pu_id7189_1884a.json
Märgendatud fail Tartu_V6nnu_Kiidj2rve_id25125_1870a.json
Märgendatud fail L22ne_Reigi_K6rgessaare_id22613_1892a.json
Märgendatud fail J2rva_Peetri_V2ike-Kareda_id19114_1867a.json
Märgendatud fail Harju_Hageri_Kohila_id22158_1890a.json
Märgendatud fail P2rnu_P2rnu-Elisabethi_Sauga_id17814_1868a.json
Märgendatud fail Harju_Kose_Palvere_id561_1867a.json
Märgendatud fail V6ru_R2pina_R2pina_id21267_1867a.json
Märgendatud fail J2rva_Tyri_V22tsa_id20541_1902a.json
Märgendatud fail Tartu_V6nnu_Ahja_id21768_1867a.json
Märgendatud fail Tartu_V6nnu_Ahja_id20314_1888a.json
Märgendatud fail Tartu_R6ngu_Aakre_id4282_1888a.json
Märgendatud fail J2rva_Tyri_S2revere_id6796_1883a.json
Märgendatud fail V6ru_R2pina_R2pina_id10711_1868a.json
Märgendatud fail Harju_Kose_Palvere_id23525_1887a.json
Märgendatud fail Tartu_

***** Epoch #5 *****
Loss: 5855.424391
Feature L2-norm: 44.927401
Learning rate (eta): 0.024994
Total number of feature updates: 81005
Seconds required for this iteration: 0.485

***** Epoch #6 *****
Loss: 5066.262597
Feature L2-norm: 48.113667
Learning rate (eta): 0.024993
Total number of feature updates: 97206
Seconds required for this iteration: 0.486

***** Epoch #7 *****
Loss: 4461.547792
Feature L2-norm: 50.939099
Learning rate (eta): 0.024991
Total number of feature updates: 113407
Seconds required for this iteration: 0.486

***** Epoch #8 *****
Loss: 4019.315350
Feature L2-norm: 53.512169
Learning rate (eta): 0.024990
Total number of feature updates: 129608
Seconds required for this iteration: 0.495

***** Epoch #9 *****
Loss: 3659.294004
Feature L2-norm: 55.891244
Learning rate (eta): 0.024989
Total number of feature updates: 145809
Seconds required for this iteration: 0.492

***** Epoch #10 *****
Loss: 3304.464842
Feature L2-norm: 58.043755
Learning rate (eta): 0.024988
Total

***** Epoch #46 *****
Loss: 747.753006
Improvement ratio: 0.270250
Feature L2-norm: 94.285063
Learning rate (eta): 0.024943
Total number of feature updates: 745246
Seconds required for this iteration: 0.485

***** Epoch #47 *****
Loss: 732.345662
Improvement ratio: 0.261603
Feature L2-norm: 94.831905
Learning rate (eta): 0.024941
Total number of feature updates: 761447
Seconds required for this iteration: 0.484

***** Epoch #48 *****
Loss: 717.301685
Improvement ratio: 0.264255
Feature L2-norm: 95.367198
Learning rate (eta): 0.024940
Total number of feature updates: 777648
Seconds required for this iteration: 0.496

***** Epoch #49 *****
Loss: 705.247684
Improvement ratio: 0.240427
Feature L2-norm: 95.890135
Learning rate (eta): 0.024939
Total number of feature updates: 793849
Seconds required for this iteration: 0.485

***** Epoch #50 *****
Loss: 688.862756
Improvement ratio: 0.249746
Feature L2-norm: 96.401814
Learning rate (eta): 0.024938
Total number of feature updates: 810050
Seco

***** Epoch #88 *****
Loss: 416.255059
Improvement ratio: 0.116479
Feature L2-norm: 110.747991
Learning rate (eta): 0.024890
Total number of feature updates: 1425688
Seconds required for this iteration: 0.487

***** Epoch #89 *****
Loss: 411.898150
Improvement ratio: 0.097775
Feature L2-norm: 111.034820
Learning rate (eta): 0.024889
Total number of feature updates: 1441889
Seconds required for this iteration: 0.485

***** Epoch #90 *****
Loss: 409.516647
Improvement ratio: 0.098540
Feature L2-norm: 111.318786
Learning rate (eta): 0.024888
Total number of feature updates: 1458090
Seconds required for this iteration: 0.489

***** Epoch #91 *****
Loss: 401.196478
Improvement ratio: 0.114974
Feature L2-norm: 111.597876
Learning rate (eta): 0.024887
Total number of feature updates: 1474291
Seconds required for this iteration: 0.484

***** Epoch #92 *****
Loss: 403.697801
Improvement ratio: 0.092513
Feature L2-norm: 111.877291
Learning rate (eta): 0.024886
Total number of feature updates: 14

***** Epoch #127 *****
Loss: 309.233173
Improvement ratio: 0.066236
Feature L2-norm: 120.047023
Learning rate (eta): 0.024842
Total number of feature updates: 2057527
Seconds required for this iteration: 0.485

***** Epoch #128 *****
Loss: 307.595047
Improvement ratio: 0.068528
Feature L2-norm: 120.245356
Learning rate (eta): 0.024841
Total number of feature updates: 2073728
Seconds required for this iteration: 0.484

***** Epoch #129 *****
Loss: 308.349740
Improvement ratio: 0.060821
Feature L2-norm: 120.441276
Learning rate (eta): 0.024840
Total number of feature updates: 2089929
Seconds required for this iteration: 0.489

***** Epoch #130 *****
Loss: 304.725174
Improvement ratio: 0.064206
Feature L2-norm: 120.637458
Learning rate (eta): 0.024839
Total number of feature updates: 2106130
Seconds required for this iteration: 0.488

***** Epoch #131 *****
Loss: 302.471122
Improvement ratio: 0.043801
Feature L2-norm: 120.832050
Learning rate (eta): 0.024837
Total number of feature update

***** Epoch #168 *****
Loss: 253.928703
Improvement ratio: 0.037229
Feature L2-norm: 127.118301
Learning rate (eta): 0.024792
Total number of feature updates: 2721768
Seconds required for this iteration: 0.485

***** Epoch #169 *****
Loss: 253.376964
Improvement ratio: 0.018742
Feature L2-norm: 127.268000
Learning rate (eta): 0.024791
Total number of feature updates: 2737969
Seconds required for this iteration: 0.485

***** Epoch #170 *****
Loss: 250.254723
Improvement ratio: 0.048471
Feature L2-norm: 127.417551
Learning rate (eta): 0.024789
Total number of feature updates: 2754170
Seconds required for this iteration: 0.484

***** Epoch #171 *****
Loss: 251.272379
Improvement ratio: 0.042911
Feature L2-norm: 127.565640
Learning rate (eta): 0.024788
Total number of feature updates: 2770371
Seconds required for this iteration: 0.503

***** Epoch #172 *****
Loss: 250.472340
Improvement ratio: 0.037660
Feature L2-norm: 127.712082
Learning rate (eta): 0.024787
Total number of feature update

***** Epoch #211 *****
Loss: 218.618056
Improvement ratio: 0.030471
Feature L2-norm: 132.855652
Learning rate (eta): 0.024739
Total number of feature updates: 3418411
Seconds required for this iteration: 0.485

***** Epoch #212 *****
Loss: 218.353151
Improvement ratio: 0.030593
Feature L2-norm: 132.974175
Learning rate (eta): 0.024738
Total number of feature updates: 3434612
Seconds required for this iteration: 0.486

***** Epoch #213 *****
Loss: 217.874235
Improvement ratio: 0.032028
Feature L2-norm: 133.091838
Learning rate (eta): 0.024737
Total number of feature updates: 3450813
Seconds required for this iteration: 0.483

***** Epoch #214 *****
Loss: 216.601785
Improvement ratio: 0.025232
Feature L2-norm: 133.209837
Learning rate (eta): 0.024735
Total number of feature updates: 3467014
Seconds required for this iteration: 0.484

***** Epoch #215 *****
Loss: 217.337691
Improvement ratio: 0.018610
Feature L2-norm: 133.326384
Learning rate (eta): 0.024734
Total number of feature update

Märgendatud fail Harju_Kose_Palvere_id14358_1880a.json
Märgendatud fail L22ne_Vormsi_Vormsi_id24517_1888a.json
Märgendatud fail Harju_Kose_Palvere_id18727_1883a.json
Märgendatud fail Saare_P8ide_Laimjala_id6593_1917a.json
Märgendatud fail V6ru_R6uge_Leevi_id24854_1875a.json
Märgendatud fail Harju_Keila_Keila_id13472_1890a.json
Märgendatud fail Tartu_R6ngu_Aakre_id2817_1888a.json
Märgendatud fail Tartu_V6nnu_Ahja_id13953_1882a.json
Märgendatud fail Tartu_Kodavere_Alatskivi_id12235_1856a.json
Märgendatud fail J2rva_Peetri_Silmsi_id23715_1869a.json
Märgendatud fail P2rnu_Tori_Tori_id25326_1890a.json
Märgendatud fail Tartu_Kodavere_Alatskivi_id1266_1865a.json
Märgendatud fail Tartu_Kodavere_Pala_id17804_1861a.json
Märgendatud fail L22ne_Pyhalepa_Kassari_id20356_1889a.json
Märgendatud fail Tartu_Otep22_Pyhaj2rve_id4865_1885a.json
Märgendatud fail Tartu_Kodavere_Alatskivi_id21764_1881a.json
Märgendatud fail Tartu_V6nnu_Ahja_id23497_1894a.json
Märgendatud fail Tartu_Torma_Avinurme_id4091_1858

Märgendatud fail Harju_Kose_Triigi_id11470_1871a.json
Märgendatud fail Tartu_V6nnu_Ahja_id15395_1883a.json
Märgendatud fail Tartu_V6nnu_Ahja_id15584_1883a.json
Märgendatud fail Harju_J6el2htme_J6el2htme_id9507_1889a.json
Märgendatud fail Tartu_V6nnu_Ahja_id12372_1874a.json
Märgendatud fail Harju_Kose_Triigi_id11552_1871a.json
Märgendatud fail L22ne_Kullamaa_Kuij6e_id15386_1874a.json
Märgendatud fail Harju_Hageri_Kohila_id4177_1883a.json
Märgendatud fail V6ru_R6uge_Saaluse_id9629_1878a.json
Märgendatud fail J2rva_Tyri_S2revere_id13094_1880a.json
Märgendatud fail L22ne_Kullamaa_Piirsalu_id7491_1884a.json
Märgendatud fail L22ne_Pyhalepa_K2rdla_id10158_1884a.json
Märgendatud fail V6ru_Vastseliina_Misso_id13577_1881a.json
Märgendatud fail J2rva_Tyri_V22tsa_id22488_1913a.json
Märgendatud fail Harju_Hageri_Kohila_id5465_1889a.json
Märgendatud fail Tartu_V6nnu_Ahja_id21646_1867a.json
Märgendatud fail J2rva_J2rva-Jaani_Einmanni_id6497_1868a.json
Märgendatud fail P2rnu_T6stamaa_Kihnu_id25042_184

***** Epoch #25 *****
Loss: 690.855925
Improvement ratio: 0.700751
Feature L2-norm: 99.149332
Learning rate (eta): 0.049875
Total number of feature updates: 404525
Seconds required for this iteration: 0.498

***** Epoch #26 *****
Loss: 679.232911
Improvement ratio: 0.624233
Feature L2-norm: 100.097207
Learning rate (eta): 0.049870
Total number of feature updates: 420706
Seconds required for this iteration: 0.493

***** Epoch #27 *****
Loss: 654.475237
Improvement ratio: 0.584026
Feature L2-norm: 101.018857
Learning rate (eta): 0.049865
Total number of feature updates: 436887
Seconds required for this iteration: 0.495

***** Epoch #28 *****
Loss: 633.161470
Improvement ratio: 0.535502
Feature L2-norm: 101.916417
Learning rate (eta): 0.049860
Total number of feature updates: 453068
Seconds required for this iteration: 0.494

***** Epoch #29 *****
Loss: 613.998159
Improvement ratio: 0.503903
Feature L2-norm: 102.772100
Learning rate (eta): 0.049855
Total number of feature updates: 469249


***** Epoch #67 *****
Loss: 315.120158
Improvement ratio: 0.077538
Feature L2-norm: 123.411937
Learning rate (eta): 0.049667
Total number of feature updates: 1084127
Seconds required for this iteration: 0.495

***** Epoch #68 *****
Loss: 300.657058
Improvement ratio: 0.112629
Feature L2-norm: 123.775682
Learning rate (eta): 0.049662
Total number of feature updates: 1100308
Seconds required for this iteration: 0.494

***** Epoch #69 *****
Loss: 292.855368
Improvement ratio: 0.141874
Feature L2-norm: 124.136640
Learning rate (eta): 0.049657
Total number of feature updates: 1116489
Seconds required for this iteration: 0.493

***** Epoch #70 *****
Loss: 296.351065
Improvement ratio: 0.057001
Feature L2-norm: 124.485348
Learning rate (eta): 0.049652
Total number of feature updates: 1132670
Seconds required for this iteration: 0.497

***** Epoch #71 *****
Loss: 277.890461
Improvement ratio: 0.187646
Feature L2-norm: 124.839018
Learning rate (eta): 0.049648
Total number of feature updates: 11

Märgendatud fail Tartu_V6nnu_Ahja_id19102_1887a.json
Märgendatud fail Viljandi_K6pu_Suure-K6pu_id3746_1883a.json
Märgendatud fail Tartu_N6o_Aru_id4068_1890a.json
Märgendatud fail P2rnu_Tori_Sindi_id20034_1836a.json
Märgendatud fail Tartu_Kodavere_Pala_id20260_1866a.json
Märgendatud fail L22ne_Vormsi_Vormsi_id24532_1888a.json
Märgendatud fail Tartu_Kodavere_Pala_id25066_1873a.json
Märgendatud fail Tartu_Kodavere_Ranna_id19679_1865a.json
Märgendatud fail J2rva_Peetri_V2ike-Kareda_id19197_1869a.json
Märgendatud fail Harju_J6el2htme_J6el2htme_id8180_1888a.json
Märgendatud fail Harju_Kose_Triigi_id11473_1871a.json
Märgendatud fail V6ru_R2pina_R2pina_id11101_1863a.json
Märgendatud fail Saare_Kihelkonna_Kotlandi_id21849_1869a.json
Märgendatud fail Tartu_V6nnu_Ahja_id20420_1888a.json
Märgendatud fail Tartu_Torma_Avinurme_id22230_1872a.json
Märgendatud fail Tartu_Kodavere_Pala_id18366_1863a.json
Märgendatud fail P2rnu_P2rnu-Elisabethi_Sauga_id17127_1874a.json
Märgendatud fail J2rva_J2rva-Jaani_

Märgendatud fail J2rva_Tyri_V22tsa_id22177_1911a.json
Märgendatud fail L22ne_Kullamaa_Piirsalu_id12912_1895a.json
Märgendatud fail J2rva_Tyri_V22tsa_id19055_1898a.json
Märgendatud fail Harju_Kose_Palvere_id13987_1880a.json
Märgendatud fail V6ru_Vastseliina_Misso_id24810_1886a.json
Märgendatud fail Tartu_Torma_Avinurme_id22547_1872a.json
Märgendatud fail Tartu_N6o_Pangodi_id2808_1889a.json
Märgendatud fail V6ru_P6lva_Kiuma_id7167_1880a.json
Märgendatud fail Tartu_V6nnu_Ahja_id14727_1882a.json
Märgendatud fail Tartu_Kodavere_Alatskivi_id23068_1881a.json
Märgendatud fail Tartu_Torma_Avinurme_id3646_1868a.json
Märgendatud fail V6ru_Vastseliina_Misso_id11543_1886a.json
Märgendatud fail Tartu_V6nnu_Ahja_id22666_1881a.json
Märgendatud fail Harju_J6el2htme_J6el2htme_id6475_1868a.json
Märgendatud fail V6ru_R6uge_Saaluse_id11773_1880a.json
Märgendatud fail L22ne_Vormsi_Vormsi_id25013_1888a.json
Märgendatud fail V6ru_Urvaste_Vaabina_id785_1876a.json
Märgendatud fail V6ru_R2pina_R2pina_id12011_186

***** Epoch #15 *****
Loss: 1107.359004
Improvement ratio: 2.306222
Feature L2-norm: 84.523263
Learning rate (eta): 0.049925
Total number of feature updates: 230175
Seconds required for this iteration: 0.485

***** Epoch #16 *****
Loss: 1029.442870
Improvement ratio: 1.925829
Feature L2-norm: 86.013620
Learning rate (eta): 0.049920
Total number of feature updates: 245520
Seconds required for this iteration: 0.485

***** Epoch #17 *****
Loss: 963.610500
Improvement ratio: 1.682301
Feature L2-norm: 87.428828
Learning rate (eta): 0.049915
Total number of feature updates: 260865
Seconds required for this iteration: 0.490

***** Epoch #18 *****
Loss: 921.729870
Improvement ratio: 1.394506
Feature L2-norm: 88.766334
Learning rate (eta): 0.049910
Total number of feature updates: 276210
Seconds required for this iteration: 0.486

***** Epoch #19 *****
Loss: 865.071994
Improvement ratio: 1.212097
Feature L2-norm: 90.049899
Learning rate (eta): 0.049905
Total number of feature updates: 291555
Se

***** Epoch #57 *****
Loss: 317.822419
Improvement ratio: 0.163813
Feature L2-norm: 116.150845
Learning rate (eta): 0.049717
Total number of feature updates: 874665
Seconds required for this iteration: 0.484

***** Epoch #58 *****
Loss: 311.501572
Improvement ratio: 0.165789
Feature L2-norm: 116.568880
Learning rate (eta): 0.049712
Total number of feature updates: 890010
Seconds required for this iteration: 0.486

***** Epoch #59 *****
Loss: 306.917447
Improvement ratio: 0.168598
Feature L2-norm: 116.977161
Learning rate (eta): 0.049707
Total number of feature updates: 905355
Seconds required for this iteration: 0.491

***** Epoch #60 *****
Loss: 304.992076
Improvement ratio: 0.154031
Feature L2-norm: 117.380168
Learning rate (eta): 0.049702
Total number of feature updates: 920700
Seconds required for this iteration: 0.485

***** Epoch #61 *****
Loss: 297.566540
Improvement ratio: 0.158685
Feature L2-norm: 117.777062
Learning rate (eta): 0.049697
Total number of feature updates: 936045

***** Epoch #98 *****
Loss: 215.450865
Improvement ratio: 0.084623
Feature L2-norm: 129.123169
Learning rate (eta): 0.049515
Total number of feature updates: 1503810
Seconds required for this iteration: 0.484

***** Epoch #99 *****
Loss: 212.054988
Improvement ratio: 0.092924
Feature L2-norm: 129.367468
Learning rate (eta): 0.049510
Total number of feature updates: 1519155
Seconds required for this iteration: 0.486

***** Epoch #100 *****
Loss: 215.277623
Improvement ratio: 0.062376
Feature L2-norm: 129.606990
Learning rate (eta): 0.049505
Total number of feature updates: 1534500
Seconds required for this iteration: 0.492

***** Epoch #101 *****
Loss: 212.183671
Improvement ratio: 0.060981
Feature L2-norm: 129.844753
Learning rate (eta): 0.049500
Total number of feature updates: 1549845
Seconds required for this iteration: 0.486

***** Epoch #102 *****
Loss: 210.532335
Improvement ratio: 0.062038
Feature L2-norm: 130.079921
Learning rate (eta): 0.049495
Total number of feature updates:

***** Epoch #139 *****
Loss: 177.261140
Improvement ratio: 0.029630
Feature L2-norm: 137.468380
Learning rate (eta): 0.049315
Total number of feature updates: 2132955
Seconds required for this iteration: 0.485

***** Epoch #140 *****
Loss: 173.435465
Improvement ratio: 0.035225
Feature L2-norm: 137.638193
Learning rate (eta): 0.049310
Total number of feature updates: 2148300
Seconds required for this iteration: 0.485

***** Epoch #141 *****
Loss: 172.785152
Improvement ratio: 0.042589
Feature L2-norm: 137.809300
Learning rate (eta): 0.049305
Total number of feature updates: 2163645
Seconds required for this iteration: 0.492

***** Epoch #142 *****
Loss: 174.802995
Improvement ratio: 0.037761
Feature L2-norm: 137.976666
Learning rate (eta): 0.049300
Total number of feature updates: 2178990
Seconds required for this iteration: 0.485

***** Epoch #143 *****
Loss: 171.803310
Improvement ratio: 0.052860
Feature L2-norm: 138.143986
Learning rate (eta): 0.049295
Total number of feature update

Märgendatud fail V6ru_R2pina_Kahkva_id5748_1887a.json
Märgendatud fail Harju_Hageri_Kohila_id10684_1872a.json
Märgendatud fail Tartu_N6o_Pangodi_id3130_1889a.json
Märgendatud fail V6ru_R6uge_Saaluse_id9066_1878a.json
Märgendatud fail Tartu_N6o_Aru_id4639_1890a.json
Märgendatud fail Tartu_R6ngu_Aakre_id12559_1827a.json
Märgendatud fail J2rva_Tyri_Kirna_id24586_1880a.json
Märgendatud fail Tartu_V6nnu_Ahja_id23443_1893a.json
Märgendatud fail Tartu_V6nnu_Ahja_id21672_1867a.json
Märgendatud fail L22ne_Kullamaa_Piirsalu_id6018_1875a.json
Märgendatud fail Tartu_R6ngu_Aakre_id6652_1826a.json
Märgendatud fail J2rva_Tyri_S2revere_id13469_1882a.json
Märgendatud fail Tartu_Rannu_Valguta_id15519_1882a.json
Märgendatud fail Harju_Juuru_Juuru_id18980_1885a.json
Märgendatud fail V6ru_P6lva_Kiuma_id6861_1880a.json
Märgendatud fail V6ru_R2pina_R2pina_id9282_1869a.json
Märgendatud fail Tartu_V6nnu_Ahja_id12119_1873a.json
Märgendatud fail Tartu_Maarja-Magdaleena_J6e_id13745_1862a.json
Märgendatud fail Har

Märgendatud fail Tartu_Kodavere_Pala_id23275_1872a.json
Märgendatud fail L22ne_Reigi_K6rgessaare_id23087_1894a.json
Märgendatud fail Tartu_N6o_Aru_id5374_1890a.json
Märgendatud fail Harju_Juuru_Juuru_id20228_1868a.json
Märgendatud fail Tartu_V6nnu_Rasina_id13313_1865a.json
Märgendatud fail Tartu_V6nnu_Ahja_id15819_1883a.json
Märgendatud fail Tartu_V6nnu_Ahja_id13002_1875a.json
Märgendatud fail Tartu_Kodavere_Pala_id21493_1869a.json
Märgendatud fail V6ru_Vastseliina_Misso_id19537_1882a.json
Märgendatud fail Saare_Mustjala_Mustjala_id7076_1821a.json
Märgendatud fail L22ne_Vormsi_Vormsi_id24033_1888a.json
Märgendatud fail Viru_Rakvere_S6meru_id10293_1883a.json
Märgendatud fail Tartu_Torma_Avinurme_id24697_1824a.json
Märgendatud fail Harju_Kose_Palvere_id13988_1880a.json
Märgendatud fail Tartu_V6nnu_Ahja_id12425_1875a.json
Märgendatud fail Viljandi_K6pu_Suure-K6pu_id4649_1883a.json
Märgendatud fail Tartu_V6nnu_Ahja_id20421_1888a.json
Märgendatud fail Harju_Kose_Palvere_id19467_1884a.json
M

***** Epoch #28 *****
Loss: 608.027159
Improvement ratio: 0.573759
Feature L2-norm: 101.436786
Learning rate (eta): 0.049860
Total number of feature updates: 437192
Seconds required for this iteration: 0.480

***** Epoch #29 *****
Loss: 592.960535
Improvement ratio: 0.531213
Feature L2-norm: 102.294547
Learning rate (eta): 0.049855
Total number of feature updates: 452806
Seconds required for this iteration: 0.484

***** Epoch #30 *****
Loss: 569.000076
Improvement ratio: 0.485823
Feature L2-norm: 103.121077
Learning rate (eta): 0.049850
Total number of feature updates: 468420
Seconds required for this iteration: 0.485

***** Epoch #31 *****
Loss: 568.356369
Improvement ratio: 0.443892
Feature L2-norm: 103.917878
Learning rate (eta): 0.049845
Total number of feature updates: 484034
Seconds required for this iteration: 0.480

***** Epoch #32 *****
Loss: 541.693469
Improvement ratio: 0.442090
Feature L2-norm: 104.692505
Learning rate (eta): 0.049841
Total number of feature updates: 499648

***** Epoch #70 *****
Loss: 295.522039
Improvement ratio: 0.070194
Feature L2-norm: 123.823570
Learning rate (eta): 0.049652
Total number of feature updates: 1092980
Seconds required for this iteration: 0.481

***** Epoch #71 *****
Loss: 291.536791
Improvement ratio: 0.109977
Feature L2-norm: 124.166470
Learning rate (eta): 0.049648
Total number of feature updates: 1108594
Seconds required for this iteration: 0.479

***** Epoch #72 *****
Loss: 276.849031
Improvement ratio: 0.148090
Feature L2-norm: 124.500672
Learning rate (eta): 0.049643
Total number of feature updates: 1124208
Seconds required for this iteration: 0.486

***** Epoch #73 *****
Loss: 275.031588
Improvement ratio: 0.147148
Feature L2-norm: 124.835812
Learning rate (eta): 0.049638
Total number of feature updates: 1139822
Seconds required for this iteration: 0.484

***** Epoch #74 *****
Loss: 279.923684
Improvement ratio: 0.064256
Feature L2-norm: 125.172536
Learning rate (eta): 0.049633
Total number of feature updates: 11

Märgendatud fail V6ru_Vastseliina_Misso_id8101_1885a.json
Märgendatud fail V6ru_Vastseliina_Misso_id13681_1886a.json
Märgendatud fail Tartu_N6o_Pangodi_id5262_1889a.json
Märgendatud fail Harju_Keila_Saue_id13436_1885a.json
Märgendatud fail Harju_Kose_Triigi_id9536_1868a.json
Märgendatud fail Tartu_Torma_Avinurme_id21275_1871a.json
Märgendatud fail Tartu_Otep22_Pyhaj2rve_id12075_1885a.json
Märgendatud fail L22ne_Martna_Martna_id23117_1873a.json
Märgendatud fail L22ne_Kullamaa_Piirsalu_id6881_1877a.json
Märgendatud fail Viru_Haljala_Vihula_id4974_1876a.json
Märgendatud fail P2rnu_Tori_Tori_id25322_1889a.json
Märgendatud fail Tartu_Kodavere_Ranna_id19673_1865a.json
Märgendatud fail J2rva_Tyri_S2revere_id9244_1885a.json
Märgendatud fail P2rnu_P2rnu-Elisabethi_Sauga_id22818_1882a.json
Märgendatud fail Tartu_V6nnu_Ahja_id16349_1884a.json
Märgendatud fail Harju_Kuusalu_Kolga_id11902_1888a.json
Märgendatud fail J2rva_Peetri_V2ike-Kareda_id22150_1879a.json
Märgendatud fail Tartu_Kodavere_Pala_i

Märgendatud fail J2rva_Tyri_V22tsa_id16931_1886a.json
Märgendatud fail Harju_Harju-Madise_Padise-Kloostri_id2778_1867a.json
Märgendatud fail Viljandi_K6pu_Suure-K6pu_id4977_1883a.json
Märgendatud fail J2rva_Tyri_S2revere_id8673_1886a.json
Märgendatud fail V6ru_R2pina_Kahkva_id14118_1889a.json
Märgendatud fail L22ne_Pyhalepa_K2rdla_id22309_1867a.json
Märgendatud fail L22ne_Pyhalepa_K2rdla_id25096_1877a.json
Märgendatud fail Tartu_Kodavere_Alatskivi_id7206_1876a.json
Märgendatud fail V6ru_R2pina_Kahkva_id6348_1887a.json
Märgendatud fail Tartu_N6o_Meeri_id4980_1895a.json
Märgendatud fail Tartu_V6nnu_Ahja_id14731_1882a.json
Märgendatud fail Tartu_Kodavere_Alatskivi_id23690_1866a.json
Märgendatud fail Harju_Kose_Palvere_id17074_1882a.json
Märgendatud fail Viljandi_Paistu_Holstre_id515_1843a.json
Märgendatud fail Tartu_Otep22_Pyhaj2rve_id11749_1885a.json
Märgendatud fail Harju_Harju-Madise_Padise-Kloostri_id3283_1885a.json
Märgendatud fail Tartu_V6nnu_Ahja_id15058_1883a.json
Märgendatud fail