In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [2]:
import torch
import torch.nn as nn
import transformers
from transformers import AdamW, get_linear_schedule_with_warmup, get_constant_schedule, XLMRobertaTokenizer, XLMRobertaModel, XLMRobertaConfig
import pandas as pd
import numpy as np
from tqdm import tqdm
import os
import re
from collections import defaultdict
from collections import Counter
from sklearn.model_selection import KFold

In [3]:
MAX_LEN = 500
TRAIN_BATCH_SIZE = 12
VALID_BATCH_SIZE = 8
EPOCHS = 100
BERT_MODEL = 'xlm-roberta-base'
TOKENIZER = transformers.XLMRobertaTokenizer.from_pretrained(BERT_MODEL)
#TRAIN_FILE = "./data/train_data_annotated_BIOES_v3.txt"
#VALID_FILE = "./data/e"
TRAIN_FILE = "./data/xa"
VALID_FILE = "./data/c"
#TRAIN_FILE = "/home/ktlim/code/TTtagger/corpus/ud-treebanks-v2.3/UD_English-EWT/en_ewt-ud-train.conllu"
#VALID_FILE = "/home/ktlim/code/TTtagger/corpus/ud-treebanks-v2.3/UD_English-EWT/en_ewt-ud-test.conllu"

In [4]:
DEVICE=0
ID, FORM, LEMMA, UPOS, XPOS, FEATS, HEAD, DEPREL, DEPS, MISC = range(10)

In [5]:
def normalize(word):
    return re.sub(r"\d", "0", word).lower()


def strong_normalize(word):
    w = ftfy.fix_text(word.lower())
    w = re.sub(r".+@.+", "*EMAIL*", w)
    w = re.sub(r"@\w+", "*AT*", w)
    w = re.sub(r"(https?://|www\.).*", "*url*", w)
    w = re.sub(r"([^\d])\1{2,}", r"\1\1", w)
    w = re.sub(r"([^\d][^\d])\1{2,}", r"\1\1", w)
    w = re.sub(r"``", '"', w)
    w = re.sub(r"''", '"', w)
    w = re.sub(r"\d", "0", w)
    return w


def buildVocab(graphs, cutoff=1):
    wordsCount = Counter()
    charsCount = Counter()
    uposCount = Counter()
    xposCount = Counter()
    relCount = Counter()
    featCount = Counter()
    langCount = Counter()

    for graph in graphs:
        wordsCount.update([node.norm for node in graph.nodes[1:]])
        for node in graph.nodes[1:]:
            charsCount.update(list(node.word))
            featCount.update(node.feats_set)
            #  charsCount.update(list(node.norm))
        uposCount.update([node.upos for node in graph.nodes[1:]])
        xposCount.update([node.xupos for node in graph.nodes[1:]])
        relCount.update([rel for rel in graph.rels[1:]])
        langCount.update([node.lang for node in graph.nodes[1:]])
        

    wordsCount = Counter({w: i for w, i in wordsCount.items() if i >= cutoff})
    print("Vocab containing {} words".format(len(wordsCount)))
    print("Charset containing {} chars".format(len(charsCount)))
    print("UPOS containing {} tags".format(len(uposCount)), uposCount)
    #print("XPOS containing {} tags".format(len(xposCount)), xposCount)
    print("Rels containing {} tags".format(len(relCount)), relCount)
    print("Feats containing {} tags".format(len(featCount)), featCount)
    print("lang containing {} tags".format(len(langCount)), langCount)

    ret = {
        "vocab": list(wordsCount.keys()),
        "wordfreq": wordsCount,
        "charset": list(charsCount.keys()),
        "charfreq": charsCount,
        "upos": list(uposCount.keys()),
        "xpos": list(xposCount.keys()),
        "rels": list(relCount.keys()),
        "feats": list(featCount.keys()),
        "lang": list(langCount.keys()),
    }

    return ret

def shuffled_stream(data):
    len_data = len(data)
    while True:
        for d in random.sample(data, len_data):
            yield d

def shuffled_balanced_stream(data):
    for ds in zip(*[shuffled_stream(s) for s in data]):
        ds = list(ds)
        random.shuffle(ds)
        for d in ds:
            yield d
            
            
def parse_dict(features):
    if features is None or features == "_":
        return {}

    ret = {}
    lst = features.split("|")
    for l in lst:
        k, v = l.split("=")
        ret[k] = v
    return ret


def parse_features(features):
    if features is None or features == "_":
        return set()

    return features.lower().split("|")


class Word:

    def __init__(self, word, upos, lemma=None, xpos=None, feats=None, misc=None, lang=None):
        self.word = word
        self.norm = normalize(word) #strong_normalize(word)
        self.lemma = lemma if lemma else "_"
        self.upos = upos
        self.xpos = xpos if xpos else "_"
        self.xupos = self.upos + "|" + self.xpos
        self.feats = feats if feats else "_"
        self.feats_set = parse_features(self.feats)
        self.misc = misc if misc else "_"
        self.lang = lang if lang else "_"

    def cleaned(self):
        return Word(self.word, "_")

    def clone(self):
        return Word(self.word, self.upos, self.lemma, self.xpos, self.feats, self.misc)

    def __repr__(self):
        return "{}_{}".format(self.word, self.upos)


class DependencyGraph(object):

    def __init__(self, words, tokens=None):
        #  Token is a tuple (start, end, form)
        if tokens is None:
            tokens = []
        self.nodes = np.array([Word("*root*", "*root*")] + list(words))
        self.tokens = tokens
        self.heads = np.array([-1] * len(self.nodes))
        self.rels = np.array(["_"] * len(self.nodes), dtype=object)

    def __copy__(self):
        cls = self.__class__
        result = cls.__new__(cls)
        result.nodes = self.nodes
        result.tokens = self.tokens
        result.heads = self.heads.copy()
        result.rels = self.rels.copy()
        return result

    def cleaned(self, node_level=True):
        if node_level:
            return DependencyGraph([node.cleaned() for node in self.nodes[1:]], self.tokens)
        else:
            return DependencyGraph([node.clone() for node in self.nodes[1:]], self.tokens)

    def attach(self, head, tail, rel):
        self.heads[tail] = head
        self.rels[tail] = rel

    def __repr__(self):
        return "\n".join(["{} ->({})  {} ({})".format(str(self.nodes[i]), self.rels[i], self.heads[i], self.nodes[self.heads[i]]) for i in range(len(self.nodes))])


def read_conll(filename, lang_code=None):
    
    print("read_conll with", lang_code)
    def get_word(columns):
        return Word(columns[FORM], columns[UPOS], lemma=columns[LEMMA], xpos=columns[XPOS], feats=columns[FEATS], misc=columns[MISC], lang=lang_code)

    def get_graph(graphs, words, tokens, edges):
        graph = DependencyGraph(words, tokens)
        for (h, d, r) in edges:
            graph.attach(h, d, r)
        graphs.append(graph)

    file = open(filename, "r", encoding="UTF-8")

    graphs = []
    words = []
    tokens = []
    edges = []

    num_sent = 0
    sentence_start = False
    while True:
        line = file.readline()
        if not line:
            if len(words) > 0:
                get_graph(graphs, words, tokens, edges)
                words, tokens, edges = [], [], []
            break
        line = line.rstrip("\r\n")

        # Handle sentence start boundaries
        if not sentence_start:
            # Skip comments
            if line.startswith("#"):
                continue
            # Start a new sentence
            sentence_start = True
        if not line:
            sentence_start = False
            if len(words) > 0:
                if (len(words) < 250):
                    get_graph(graphs, words, tokens, edges)
                words, tokens, edges = [], [], []
                num_sent += 1
            continue

        # Read next token/word
        columns = line.split("\t")

        # Skip empty nodes
        if "." in columns[ID]:
            continue

        # Handle multi-word tokens to save word(s)
        if "-" in columns[ID]:
            start, end = map(int, columns[ID].split("-"))
            tokens.append((start, end + 1, columns[FORM]))

            for _ in range(start, end + 1):
                word_line = file.readline().rstrip("\r\n")
                word_columns = word_line.split("\t")
                words.append(get_word(word_columns))
                if word_columns[HEAD].isdigit():
                    head = int(word_columns[HEAD])
                else:
                    head = -1
                edges.append((head, int(word_columns[ID]), word_columns[DEPREL].split(":")[0]))
        # Basic tokens/words
        else:
            words.append(get_word(columns))
            if columns[HEAD].isdigit():
                head = int(columns[HEAD])
            else:
                head = -1
            edges.append((head, int(columns[ID]), columns[DEPREL].split(":")[0]))

    file.close()

    return graphs

In [6]:
# 2. Data Loader
class CoNLLDataset:
    def __init__(self, graphs, tokenizer, max_len, fullvocab=None):
        self.conll_graphs = graphs
        self.tokenizer = tokenizer
        self.max_len = max_len
        
        self._fullvocab = fullvocab if fullvocab else buildVocab(self.conll_graphs, cutoff=1)
            
        self._upos = {p: i for i, p in enumerate(self._fullvocab["upos"])}
        self._iupos = self._fullvocab["upos"]
        self._xpos = {p: i for i, p in enumerate(self._fullvocab["xpos"])}
        self._ixpos = self._fullvocab["xpos"]
        self._vocab = {w: i+3 for i, w in enumerate(self._fullvocab["vocab"])}
        self._wordfreq = self._fullvocab["wordfreq"]
        self._charset = {c: i+3 for i, c in enumerate(self._fullvocab["charset"])}
        self._charfreq = self._fullvocab["charfreq"]
        self._rels = {r: i for i, r in enumerate(self._fullvocab["rels"])}
        self._irels = self._fullvocab["rels"]
        self._feats = {f: i for i, f in enumerate(self._fullvocab["feats"])}
        self._langs = {r: i+2 for i, r in enumerate(self._fullvocab["lang"])}
        self._ilangs = self._fullvocab["lang"]
        
        #self._posRels = {r: i for i, r in enumerate(self._fullvocab["posRel"])}
        #self._iposRels = self._fullvocab["posRel"]
        
        self._vocab['*pad*'] = 0
        self._charset['*pad*'] = 0
        self._langs['*pad*'] = 0
        
        self._vocab['*root*'] = 1
        self._charset['*whitespace*'] = 1
        
        self._vocab['*unknown*'] = 2
        self._charset['*unknown*'] = 2
        
        
    
    def __len__(self):
        return len(self.conll_graphs)
        
        
    def __getitem__(self, item):
        
        graph = self.conll_graphs[item]
        word_list = [node.word for node in graph.nodes]
        upos_list = [node.upos for node in graph.nodes]
        feat_list = [node.feats for node in graph.nodes]
        
        encoded = self.tokenizer.encode_plus(' '.join(word_list[1:]),
                                             None,
                                             add_special_tokens=True,
                                             max_length = self.max_len,
                                             truncation=True,
                                             pad_to_max_length = True)
        
        ids, mask = encoded['input_ids'], encoded['attention_mask']
        
        bpe_head_mask = [0]; upos_ids = [-1]; feat_ids = [-1] # --> CLS token
        
        for word, upos, feat in zip(word_list[1:], upos_list[1:], feat_list[1:]):
            bpe_len = len(self.tokenizer.tokenize(word))
            head_mask = [1] + [0]*(bpe_len-1)
            bpe_head_mask.extend(head_mask)
            upos_mask = [self._upos.get(upos)] + [-1]*(bpe_len-1)
            upos_ids.extend(upos_mask)
            feat_mask = [self._feats.get(feat.lower(), 2)] + [-1]*(bpe_len-1)
            feat_ids.extend(feat_mask)
            
            #print("head_mask", head_mask)
        
        bpe_head_mask.append(0); upos_ids.append(-1); feat_ids.append(-1) # --> END token
        bpe_head_mask.extend([0] * (self.max_len - len(bpe_head_mask))) ## --> padding by max_len
        upos_ids.extend([-1] * (self.max_len - len(upos_ids))) ## --> padding by max_len
        feat_ids.extend([-1] * (self.max_len - len(feat_ids))) ## --> padding by max_len
        
        return {
                'ids': torch.tensor(ids, dtype=torch.long),
                'mask': torch.tensor(mask, dtype=torch.long),
                'bpe_head_mask': torch.tensor(bpe_head_mask, dtype=torch.long),
                'upos_ids': torch.tensor(upos_ids, dtype=torch.long),
                'feat_ids': torch.tensor(feat_ids, dtype=torch.long)
               }
    
    

  

In [7]:
def f1_score(total_pred, total_targ, noNER_idx):
    
    p = 0 # (retrived SB and real SB) / retrived SB  # The percentage of (the number of correct predictions) / (the number of predction that system predicts as B-SENT)
    r = 0
    f1= 0
    
    np_total_pred = np.array(total_pred)
    np_total_tag = np.array(total_targ)
    
    #Get noPad
    incidence_nopad = np.where(np_total_tag != -1) ## eliminate paddings
    np_total_pred_nopad = np_total_pred[incidence_nopad]
    np_total_tag_nopad = np_total_tag[incidence_nopad]
    
    
    #precision
    incidence_nopad_sb = np.where(np_total_pred_nopad != noNER_idx)
    np_total_pred_nopad_sb = np_total_pred_nopad[incidence_nopad_sb]
    np_total_tag_nopad_sb = np_total_tag_nopad[incidence_nopad_sb]
    
    count_active_tokens_p = len(np_total_pred_nopad_sb)
    count_correct_p = np.count_nonzero((np_total_pred_nopad_sb==np_total_tag_nopad_sb) == True)
    
    '''
    np_total_pred_incid = np_total_pred[incidence_p]
    print("np_total_pred_incid", np_total_pred_incid)
    ids_sb_pred_p = np.where(np_total_pred_incid==1)
    np_total_pred_p = np_total_pred_incid[ids_sb_pred_p]
    np_total_tag_p = np_total_tag[ids_sb_pred_p]
    
    print("ids_sb_pred_p", ids_sb_pred_p)
    print("np_total_pred_p", np_total_pred_p)
    print("np_total_tag_p", np_total_tag_p)
    
    count_active_tokens_p = len(np_total_pred_p)
    count_correct_p = np.count_nonzero((np_total_pred_p==np_total_tag_p) == True)
    '''
    
    print("count_correct_p", count_correct_p)
    print("count_active_tokens_p", count_active_tokens_p)
    
    p = count_correct_p/count_active_tokens_p
    print("precision:", p)

    
    #recall
    ids_sb_pred_r = np.where(np_total_tag_nopad != noNER_idx)
    np_total_pred_r = np_total_pred_nopad[ids_sb_pred_r]
    np_total_tag_r = np_total_tag_nopad[ids_sb_pred_r]
    
    #print("ids_sb_pred_r", ids_sb_pred_r)
    #print("np_total_pred_r", np_total_pred_r)
    #print("np_total_tag_r", np_total_tag_r)
    
    count_active_tokens_r = len(np_total_pred_r)
    count_correct_r = np.count_nonzero((np_total_pred_r==np_total_tag_r) == True)
    
    print("count_active_tokens_r", count_active_tokens_r)
    print("count_correct_r", count_correct_r)
    
    r = count_correct_r/count_active_tokens_r
    print("recall:", r)
    
    
    #F1
    #f1 = 2*(p*r) / (p+r)
    print("F1:", f1)
    
    #count_active_tokens_recall = np.count_nonzero(np.array(total_targ) > -1)
    #print("count_active_tokens_recall", count_active_tokens_recall)
    #count_active_tokens_precision = np.count_nonzero(np.array(total_targ) > -1)
    
    #count_correct = np.count_nonzero((np.array(total_pred)==np.array(total_targ)) == True)
    #print("count_correct",count_correct)
    #print("ACCURACY:", count_correct/count_active_tokens)
    

In [8]:
class XLMRobertaEncoder(nn.Module):
    def __init__(self, num_upos, num_feat):
        super(XLMRobertaEncoder, self).__init__()
        self.xlm_roberta = transformers.XLMRobertaModel.from_pretrained('xlm-roberta-base')
        self.dropout = nn.Dropout(0.33)
        self.linear = nn.Linear(768, num_upos)
        
        self.f_dropout = nn.Dropout(0.33)
        self.f_linear = nn.Linear(768, num_feat)
            
    def forward(self, ids, mask):
        o1, o2 = self.xlm_roberta(ids, mask)
        
        #apool = torch.mean(o1, 1)
        #mpool, _ = torch.max(o1, 1)
        #cat = torch.cat((apool, mpool), 1)
        #bo = self.dropout(cat)
        p_logits = self.linear(o1)        
        f_logits = self.f_linear(o1)   
        
        return p_logits, f_logits
        


In [9]:
#train_graphs = read_conll(TRAIN_FILE, 'ko')
#cv = KFold(n_splits=5, random_state=1, shuffle=False)
#for t,v in cv.split(train_graphs):
#    train_graph=train_graphs[t]         # Train Set
#    valid_graph=train_graphs[v]         # Validation Set

In [10]:
train_graphs = read_conll(TRAIN_FILE, 'ko')
train_dataset = CoNLLDataset(graphs=train_graphs, tokenizer=TOKENIZER, max_len=MAX_LEN)
train_loader = torch.utils.data.DataLoader(train_dataset, num_workers=4, batch_size=TRAIN_BATCH_SIZE, shuffle=False)
valid_graphs = read_conll(VALID_FILE, 'ko')
valid_dataset = CoNLLDataset(graphs=valid_graphs, tokenizer=TOKENIZER, max_len=MAX_LEN, fullvocab=train_dataset._fullvocab)
valid_loader = torch.utils.data.DataLoader(valid_dataset, num_workers=4, batch_size=VALID_BATCH_SIZE, shuffle=False)

read_conll with ko
Vocab containing 4561 words
Charset containing 1027 chars
UPOS containing 15 tags Counter({'NOUN': 5257, 'PRT': 3801, 'ADP': 2810, 'VERB': 1691, 'PUNCT': 1550, 'PROPN': 645, 'NUM': 557, 'X': 281, 'DET': 220, 'ADJ': 206, 'PRON': 117, 'SYM': 105, 'ADV': 71, 'INTJ': 8, 'CCONJ': 2})
Rels containing 2 tags Counter({'rel': 16630, 'root': 691})
Feats containing 49 tags Counter({'o': 13571, 'cvl_s': 385, 'num_b': 330, 'num_e': 330, 'per_s': 239, 'org_s': 172, 'num_i': 161, 'org_i': 151, 'dat_b': 130, 'dat_e': 129, 'loc_s': 121, 'org_e': 116, 'org_b': 114, 'dat_i': 106, 'trm_i': 104, 'cvl_b': 88, 'cvl_e': 88, 'trm_s': 80, 'trm_e': 79, 'trm_b': 78, 'per_b': 78, 'per_e': 77, 'num_s': 62, 'per_i': 61, 'cvl_i': 52, 'dat_s': 50, 'anm_s': 43, 'evt_b': 40, 'evt_e': 40, 'evt_s': 27, 'evt_i': 26, 'loc_b': 20, 'afw_s': 20, 'loc_e': 19, 'tim_b': 19, 'tim_e': 19, 'tim_i': 15, 'afw_i': 14, 'loc_i': 12, 'fld_s': 11, 'afw_b': 9, 'afw_e': 9, 'tim_s': 7, 'plt_s': 6, 'fld_b': 5, 'fld_e': 5, 'm

In [11]:
num_upos = len(train_dataset._upos)
num_feat = len(train_dataset._feats)
model = XLMRobertaEncoder(num_upos, num_feat)
model = nn.DataParallel(model)
model = model.cuda()

In [12]:
loss_fn = nn.CrossEntropyLoss(ignore_index=-1)
lr = 0.000005
optimizer = AdamW(model.parameters(), lr=lr)

In [13]:
def train_loop_fn(train_loader, model, optimizer, DEVICE, scheduler=None):
    model.train()
    
    p_total_pred = []
    p_total_targ = []
    p_total_loss = []
    
    f_total_pred = []
    f_total_targ = []
    f_total_loss = []
    
    for idx, batch in tqdm(enumerate(train_loader), total=len(train_loader)):
        optimizer.zero_grad()
        
        p_logits, f_logits = model(batch['ids'].cuda(), batch['mask'].cuda())
        
        #UPOS
        b,s,l = p_logits.size()
        #print(p_logits.view(b*s,l), p_logits.view(b*s,l).size())
        #print(batch['upos_ids'].cuda().view(b*s), batch['upos_ids'].cuda().view(b*s).size())
        p_loss = loss_fn(p_logits.view(b*s,l), batch['upos_ids'].cuda().view(b*s))
        p_total_loss.append(p_loss.item())
        p_total_pred.extend(torch.argmax(p_logits.view(b*s,l), 1).cpu().tolist())
        p_total_targ.extend(batch['upos_ids'].cuda().view(b*s).cpu().tolist())
        
        #FEAT
        b,s,l = f_logits.size()
        f_loss = loss_fn(f_logits.view(b*s,l), batch['feat_ids'].cuda().view(b*s))
        f_total_loss.append(f_loss.item())
        f_total_pred.extend(torch.argmax(f_logits.view(b*s,l), 1).cpu().tolist())
        f_total_targ.extend(batch['feat_ids'].cuda().view(b*s).cpu().tolist())
        
        loss = p_loss+f_loss
        loss.backward()
        optimizer.step()
        
    count_active_tokens = np.count_nonzero(np.array(p_total_targ) > -1)
    count_correct = np.count_nonzero((np.array(p_total_pred)==np.array(p_total_targ)) == True)
    print("TRAINING POS ACCURACY:", count_correct/count_active_tokens)
    
    count_active_tokens = np.count_nonzero(np.array(f_total_targ) > -1)
    count_correct = np.count_nonzero((np.array(f_total_pred)==np.array(f_total_targ)) == True)
    f1_score(f_total_pred, f_total_targ, train_dataset._feats.get('o', 2))
    print("TRAINING FEAT ACCURACY:", count_correct/count_active_tokens)


In [14]:
def valid_loop_fn(dev_loader, model, DEVICE):
    model.eval()
    
    p_total_pred = []
    p_total_targ = []
    p_total_loss = []
    
    f_total_pred = []
    f_total_targ = []
    f_total_loss = []
    with torch.no_grad():
        for idx, batch in tqdm(enumerate(dev_loader), total=len(dev_loader)):

            p_logits, f_logits = model(batch['ids'].cuda(), batch['mask'].cuda())

            #UPOS
            b,s,l = p_logits.size()
            p_loss = loss_fn(p_logits.view(b*s,l), batch['upos_ids'].cuda().view(b*s))
            p_total_loss.append(p_loss.item())
            p_total_pred.extend(torch.argmax(p_logits.view(b*s,l), 1).cpu().tolist())
            p_total_targ.extend(batch['upos_ids'].cuda().view(b*s).cpu().tolist())

            #FEAT
            b,s,l = f_logits.size()
            f_loss = loss_fn(f_logits.view(b*s,l), batch['feat_ids'].cuda().view(b*s))
            f_total_loss.append(f_loss.item())
            f_total_pred.extend(torch.argmax(f_logits.view(b*s,l), 1).cpu().tolist())
            f_total_targ.extend(batch['feat_ids'].cuda().view(b*s).cpu().tolist())

            loss = p_loss+f_loss
        
    count_active_tokens = np.count_nonzero(np.array(p_total_targ) > -1)
    count_correct = np.count_nonzero((np.array(p_total_pred)==np.array(p_total_targ)) == True)
    print("VALIDATION POS ACCURACY:", count_correct/count_active_tokens)
    
    count_active_tokens = np.count_nonzero(np.array(f_total_targ) > -1)
    count_correct = np.count_nonzero((np.array(f_total_pred)==np.array(f_total_targ)) == True)
    f1_score(f_total_pred, f_total_targ, train_dataset._feats.get('o', 2))
    print("VALIDATION FEAT ACCURACY:", count_correct/count_active_tokens)


In [15]:
print(train_dataset._feats.get('o'))

0


In [16]:
for idx in range(EPOCHS):
    train_loop_fn(train_loader, model, optimizer, DEVICE)
    #valid_loop_fn(valid_loader, model, DEVICE)

100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.3078921540326771
count_correct_p 6
count_active_tokens_p 6104
precision: 0.000982961992136304
count_active_tokens_r 3750
count_correct_r 6
recall: 0.0016
F1: 0
TRAINING FEAT ACCURACY: 0.5213324865769875


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.5305120951446222
count_correct_p 0
count_active_tokens_p 8
precision: 0.0
count_active_tokens_r 3750
count_correct_r 0
recall: 0.0
F1: 0
TRAINING FEAT ACCURACY: 0.7832688643842735


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.6906645112868772
count_correct_p 0
count_active_tokens_p 9
precision: 0.0
count_active_tokens_r 3750
count_correct_r 0
recall: 0.0
F1: 0
TRAINING FEAT ACCURACY: 0.7834997979331447


100%|██████████| 58/58 [00:23<00:00,  2.49it/s]


TRAINING POS ACCURACY: 0.8128860920270192
count_correct_p 204
count_active_tokens_p 410
precision: 0.4975609756097561
count_active_tokens_r 3750
count_correct_r 204
recall: 0.0544
F1: 0
TRAINING FEAT ACCURACY: 0.7950464753767104


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.853588130015588
count_correct_p 388
count_active_tokens_p 843
precision: 0.46026097271648875
count_active_tokens_r 3750
count_correct_r 388
recall: 0.10346666666666667
F1: 0
TRAINING FEAT ACCURACY: 0.804918884590959


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.8780670861959471
count_correct_p 543
count_active_tokens_p 1348
precision: 0.40281899109792285
count_active_tokens_r 3750
count_correct_r 543
recall: 0.1448
F1: 0
TRAINING FEAT ACCURACY: 0.8130015588014549


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.8957335026846025
count_correct_p 690
count_active_tokens_p 1821
precision: 0.37891268533772654
count_active_tokens_r 3750
count_correct_r 690
recall: 0.184
F1: 0
TRAINING FEAT ACCURACY: 0.8194099647826338


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9093585820680099
count_correct_p 792
count_active_tokens_p 2127
precision: 0.3723554301833568
count_active_tokens_r 3750
count_correct_r 792
recall: 0.2112
F1: 0
TRAINING FEAT ACCURACY: 0.8244327694705849


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9186536574100802
count_correct_p 890
count_active_tokens_p 2290
precision: 0.388646288209607
count_active_tokens_r 3750
count_correct_r 890
recall: 0.23733333333333334
F1: 0
TRAINING FEAT ACCURACY: 0.8288782402863576


100%|██████████| 58/58 [00:23<00:00,  2.49it/s]


TRAINING POS ACCURACY: 0.9248311298423878
count_correct_p 967
count_active_tokens_p 2496
precision: 0.3874198717948718
count_active_tokens_r 3750
count_correct_r 967
recall: 0.2578666666666667
F1: 0
TRAINING FEAT ACCURACY: 0.8333814444893481


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9307776687258241
count_correct_p 1050
count_active_tokens_p 2584
precision: 0.40634674922600617
count_active_tokens_r 3750
count_correct_r 1050
recall: 0.28
F1: 0
TRAINING FEAT ACCURACY: 0.8372495814329427


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.937878875353617
count_correct_p 1174
count_active_tokens_p 2679
precision: 0.43822321761851435
count_active_tokens_r 3750
count_correct_r 1174
recall: 0.31306666666666666
F1: 0
TRAINING FEAT ACCURACY: 0.8451013220945673


100%|██████████| 58/58 [00:23<00:00,  2.49it/s]


TRAINING POS ACCURACY: 0.9420934126205185
count_correct_p 1226
count_active_tokens_p 2718
precision: 0.45106696100073584
count_active_tokens_r 3750
count_correct_r 1226
recall: 0.32693333333333335
F1: 0
TRAINING FEAT ACCURACY: 0.8476415911321518


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9461924831129842
count_correct_p 1315
count_active_tokens_p 2797
precision: 0.470146585627458
count_active_tokens_r 3750
count_correct_r 1315
recall: 0.3506666666666667
F1: 0
TRAINING FEAT ACCURACY: 0.852260262109578


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.949945153282143
count_correct_p 1391
count_active_tokens_p 2856
precision: 0.48704481792717086
count_active_tokens_r 3750
count_correct_r 1391
recall: 0.37093333333333334
F1: 0
TRAINING FEAT ACCURACY: 0.8571098666358755


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9506956873159749
count_correct_p 1423
count_active_tokens_p 2922
precision: 0.4869952087611225
count_active_tokens_r 3750
count_correct_r 1423
recall: 0.3794666666666667
F1: 0
TRAINING FEAT ACCURACY: 0.8579181340569251


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9540442237746088
count_correct_p 1516
count_active_tokens_p 3002
precision: 0.5049966688874084
count_active_tokens_r 3750
count_correct_r 1516
recall: 0.40426666666666666
F1: 0
TRAINING FEAT ACCURACY: 0.8632296056809653


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9568731597482825
count_correct_p 1535
count_active_tokens_p 3026
precision: 0.507270323859881
count_active_tokens_r 3750
count_correct_r 1535
recall: 0.4093333333333333
F1: 0
TRAINING FEAT ACCURACY: 0.8644997401997575


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9581432942670747
count_correct_p 1638
count_active_tokens_p 3115
precision: 0.5258426966292135
count_active_tokens_r 3750
count_correct_r 1638
recall: 0.4368
F1: 0
TRAINING FEAT ACCURACY: 0.8696380116621443


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9597598291091738
count_correct_p 1669
count_active_tokens_p 3130
precision: 0.5332268370607028
count_active_tokens_r 3750
count_correct_r 1669
recall: 0.44506666666666667
F1: 0
TRAINING FEAT ACCURACY: 0.8726401477974712


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9645517002482535
count_correct_p 1743
count_active_tokens_p 3196
precision: 0.545369211514393
count_active_tokens_r 3750
count_correct_r 1743
recall: 0.4648
F1: 0
TRAINING FEAT ACCURACY: 0.8759886842561053


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9675538363835806
count_correct_p 1793
count_active_tokens_p 3244
precision: 0.5527127003699137
count_active_tokens_r 3750
count_correct_r 1793
recall: 0.47813333333333335
F1: 0
TRAINING FEAT ACCURACY: 0.8796258876508285


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9688239709023728
count_correct_p 1838
count_active_tokens_p 3256
precision: 0.5644963144963145
count_active_tokens_r 3750
count_correct_r 1838
recall: 0.4901333333333333
F1: 0
TRAINING FEAT ACCURACY: 0.8814156226545812


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9700363720339472
count_correct_p 1883
count_active_tokens_p 3289
precision: 0.5725144420796595
count_active_tokens_r 3750
count_correct_r 1883
recall: 0.5021333333333333
F1: 0
TRAINING FEAT ACCURACY: 0.8852837595981756


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9715951734888286
count_correct_p 1930
count_active_tokens_p 3309
precision: 0.5832577818071925
count_active_tokens_r 3750
count_correct_r 1930
recall: 0.5146666666666667
F1: 0
TRAINING FEAT ACCURACY: 0.8880549621846314


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9725766410715316
count_correct_p 2019
count_active_tokens_p 3351
precision: 0.6025067144136079
count_active_tokens_r 3750
count_correct_r 2019
recall: 0.5384
F1: 0
TRAINING FEAT ACCURACY: 0.8928468333237111


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.971479706714393
count_correct_p 2044
count_active_tokens_p 3360
precision: 0.6083333333333333
count_active_tokens_r 3750
count_correct_r 2044
recall: 0.5450666666666667
F1: 0
TRAINING FEAT ACCURACY: 0.8945211015530281


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9732117083309277
count_correct_p 2090
count_active_tokens_p 3388
precision: 0.6168831168831169
count_active_tokens_r 3750
count_correct_r 2090
recall: 0.5573333333333333
F1: 0
TRAINING FEAT ACCURACY: 0.8977541712372265


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.973789042203106
count_correct_p 2159
count_active_tokens_p 3411
precision: 0.6329522134271475
count_active_tokens_r 3750
count_correct_r 2159
recall: 0.5757333333333333
F1: 0
TRAINING FEAT ACCURACY: 0.9019109751169101


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.973789042203106
count_correct_p 2203
count_active_tokens_p 3430
precision: 0.6422740524781341
count_active_tokens_r 3750
count_correct_r 2203
recall: 0.5874666666666667
F1: 0
TRAINING FEAT ACCURACY: 0.903931643669534


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9747127763985913
count_correct_p 2261
count_active_tokens_p 3442
precision: 0.6568855316676351
count_active_tokens_r 3750
count_correct_r 2261
recall: 0.6029333333333333
F1: 0
TRAINING FEAT ACCURACY: 0.9077420472259108


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.974828243173027
count_correct_p 2340
count_active_tokens_p 3474
precision: 0.6735751295336787
count_active_tokens_r 3750
count_correct_r 2340
recall: 0.624
F1: 0
TRAINING FEAT ACCURACY: 0.912707118526644


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9781190462444431
count_correct_p 2375
count_active_tokens_p 3489
precision: 0.6807108053883635
count_active_tokens_r 3750
count_correct_r 2375
recall: 0.6333333333333333
F1: 0
TRAINING FEAT ACCURACY: 0.9143813867559609


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9773107788233936
count_correct_p 2430
count_active_tokens_p 3509
precision: 0.6925049871758335
count_active_tokens_r 3750
count_correct_r 2430
recall: 0.648
F1: 0
TRAINING FEAT ACCURACY: 0.9180185901506841


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9783499797933145
count_correct_p 2489
count_active_tokens_p 3514
precision: 0.7083096186681844
count_active_tokens_r 3750
count_correct_r 2489
recall: 0.6637333333333333
F1: 0
TRAINING FEAT ACCURACY: 0.9216557935454073


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9780035794700075
count_correct_p 2549
count_active_tokens_p 3515
precision: 0.7251778093883358
count_active_tokens_r 3750
count_correct_r 2549
recall: 0.6797333333333333
F1: 0
TRAINING FEAT ACCURACY: 0.9262167311356158


100%|██████████| 58/58 [00:23<00:00,  2.49it/s]


TRAINING POS ACCURACY: 0.9795623809248889
count_correct_p 2610
count_active_tokens_p 3551
precision: 0.7350042241622078
count_active_tokens_r 3750
count_correct_r 2610
recall: 0.696
F1: 0
TRAINING FEAT ACCURACY: 0.9292188672709428


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9784654465677501
count_correct_p 2676
count_active_tokens_p 3567
precision: 0.7502102607232969
count_active_tokens_r 3750
count_correct_r 2676
recall: 0.7136
F1: 0
TRAINING FEAT ACCURACY: 0.9327983372784481


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9812943825414238
count_correct_p 2686
count_active_tokens_p 3556
precision: 0.7553430821147357
count_active_tokens_r 3750
count_correct_r 2686
recall: 0.7162666666666667
F1: 0
TRAINING FEAT ACCURACY: 0.9337798048611512


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9809479822181167
count_correct_p 2770
count_active_tokens_p 3593
precision: 0.7709435012524353
count_active_tokens_r 3750
count_correct_r 2770
recall: 0.7386666666666667
F1: 0
TRAINING FEAT ACCURACY: 0.9383984758385775


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9808902488308989
count_correct_p 2808
count_active_tokens_p 3589
precision: 0.7823906380607412
count_active_tokens_r 3750
count_correct_r 2808
recall: 0.7488
F1: 0
TRAINING FEAT ACCURACY: 0.9412851451994688


100%|██████████| 58/58 [00:23<00:00,  2.49it/s]


TRAINING POS ACCURACY: 0.980197448184285
count_correct_p 2868
count_active_tokens_p 3598
precision: 0.7971095052807116
count_active_tokens_r 3750
count_correct_r 2868
recall: 0.7648
F1: 0
TRAINING FEAT ACCURACY: 0.9445182148836673


100%|██████████| 58/58 [00:23<00:00,  2.49it/s]


TRAINING POS ACCURACY: 0.9815830494775128
count_correct_p 2894
count_active_tokens_p 3635
precision: 0.7961485557083906
count_active_tokens_r 3750
count_correct_r 2894
recall: 0.7717333333333334
F1: 0
TRAINING FEAT ACCURACY: 0.9451532821430634


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9827954506090872
count_correct_p 2887
count_active_tokens_p 3592
precision: 0.8037305122494433
count_active_tokens_r 3750
count_correct_r 2887
recall: 0.7698666666666667
F1: 0
TRAINING FEAT ACCURACY: 0.9459615495641129


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9815830494775128
count_correct_p 2960
count_active_tokens_p 3608
precision: 0.8203991130820399
count_active_tokens_r 3750
count_correct_r 2960
recall: 0.7893333333333333
F1: 0
TRAINING FEAT ACCURACY: 0.9503492869926679


100%|██████████| 58/58 [00:23<00:00,  2.47it/s]


TRAINING POS ACCURACY: 0.9823913168985624
count_correct_p 2988
count_active_tokens_p 3642
precision: 0.8204283360790774
count_active_tokens_r 3750
count_correct_r 2988
recall: 0.7968
F1: 0
TRAINING FEAT ACCURACY: 0.9521390219964205


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9816985162519485
count_correct_p 3040
count_active_tokens_p 3636
precision: 0.8360836083608361
count_active_tokens_r 3750
count_correct_r 3040
recall: 0.8106666666666666
F1: 0
TRAINING FEAT ACCURACY: 0.9556607586167081


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9829686507707407
count_correct_p 3064
count_active_tokens_p 3650
precision: 0.8394520547945206
count_active_tokens_r 3750
count_correct_r 3064
recall: 0.8170666666666667
F1: 0
TRAINING FEAT ACCURACY: 0.9568154263610646


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9838346515790082
count_correct_p 3089
count_active_tokens_p 3660
precision: 0.8439890710382514
count_active_tokens_r 3750
count_correct_r 3089
recall: 0.8237333333333333
F1: 0
TRAINING FEAT ACCURACY: 0.958027827492639


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9840078517406616
count_correct_p 3099
count_active_tokens_p 3668
precision: 0.8448745910577972
count_active_tokens_r 3750
count_correct_r 3099
recall: 0.8264
F1: 0
TRAINING FEAT ACCURACY: 0.9589515616881242


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.984873852548929
count_correct_p 3141
count_active_tokens_p 3660
precision: 0.8581967213114754
count_active_tokens_r 3750
count_correct_r 3141
recall: 0.8376
F1: 0
TRAINING FEAT ACCURACY: 0.9617804976617979


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9848161191617112
count_correct_p 3163
count_active_tokens_p 3687
precision: 0.8578790344453485
count_active_tokens_r 3750
count_correct_r 3163
recall: 0.8434666666666667
F1: 0
TRAINING FEAT ACCURACY: 0.962704231857283


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9854511864211073
count_correct_p 3186
count_active_tokens_p 3685
precision: 0.8645861601085482
count_active_tokens_r 3750
count_correct_r 3186
recall: 0.8496
F1: 0
TRAINING FEAT ACCURACY: 0.9642052999249466


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9857398533571965
count_correct_p 3223
count_active_tokens_p 3674
precision: 0.8772455089820359
count_active_tokens_r 3750
count_correct_r 3223
recall: 0.8594666666666667
F1: 0
TRAINING FEAT ACCURACY: 0.967091969285838


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.987587321748167
count_correct_p 3238
count_active_tokens_p 3679
precision: 0.8801304702364773
count_active_tokens_r 3750
count_correct_r 3238
recall: 0.8634666666666667
F1: 0
TRAINING FEAT ACCURACY: 0.9679579700941054


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9877027885226026
count_correct_p 3237
count_active_tokens_p 3692
precision: 0.8767605633802817
count_active_tokens_r 3750
count_correct_r 3237
recall: 0.8632
F1: 0
TRAINING FEAT ACCURACY: 0.9678425033196698


100%|██████████| 58/58 [00:23<00:00,  2.49it/s]


TRAINING POS ACCURACY: 0.9883378557819987
count_correct_p 3271
count_active_tokens_p 3695
precision: 0.8852503382949932
count_active_tokens_r 3750
count_correct_r 3271
recall: 0.8722666666666666
F1: 0
TRAINING FEAT ACCURACY: 0.9695167715489867


100%|██████████| 58/58 [00:23<00:00,  2.49it/s]


TRAINING POS ACCURACY: 0.986259453842157
count_correct_p 3286
count_active_tokens_p 3697
precision: 0.8888287800919664
count_active_tokens_r 3750
count_correct_r 3286
recall: 0.8762666666666666
F1: 0
TRAINING FEAT ACCURACY: 0.9706137059061255


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9892038565902661
count_correct_p 3320
count_active_tokens_p 3703
precision: 0.8965703483661895
count_active_tokens_r 3750
count_correct_r 3320
recall: 0.8853333333333333
F1: 0
TRAINING FEAT ACCURACY: 0.9731539749437099


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9882223890075631
count_correct_p 3322
count_active_tokens_p 3709
precision: 0.8956592073335131
count_active_tokens_r 3750
count_correct_r 3322
recall: 0.8858666666666667
F1: 0
TRAINING FEAT ACCURACY: 0.9734426418797991


100%|██████████| 58/58 [00:23<00:00,  2.47it/s]


TRAINING POS ACCURACY: 0.9879337220714739
count_correct_p 3321
count_active_tokens_p 3698
precision: 0.8980530016224987
count_active_tokens_r 3750
count_correct_r 3321
recall: 0.8856
F1: 0
TRAINING FEAT ACCURACY: 0.9732117083309277


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9885110559436522
count_correct_p 3334
count_active_tokens_p 3695
precision: 0.9023004059539919
count_active_tokens_r 3750
count_correct_r 3334
recall: 0.8890666666666667
F1: 0
TRAINING FEAT ACCURACY: 0.9738467755903238


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9886265227180878
count_correct_p 3370
count_active_tokens_p 3698
precision: 0.9113034072471606
count_active_tokens_r 3750
count_correct_r 3370
recall: 0.8986666666666666
F1: 0
TRAINING FEAT ACCURACY: 0.9759829109173835


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.989954390624098
count_correct_p 3377
count_active_tokens_p 3706
precision: 0.9112250404749056
count_active_tokens_r 3750
count_correct_r 3377
recall: 0.9005333333333333
F1: 0
TRAINING FEAT ACCURACY: 0.9766757115639975


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9908781248195832
count_correct_p 3410
count_active_tokens_p 3712
precision: 0.9186422413793104
count_active_tokens_r 3750
count_correct_r 3410
recall: 0.9093333333333333
F1: 0
TRAINING FEAT ACCURACY: 0.9784654465677501


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9912245251428902
count_correct_p 3403
count_active_tokens_p 3697
precision: 0.9204760616716257
count_active_tokens_r 3750
count_correct_r 3403
recall: 0.9074666666666666
F1: 0
TRAINING FEAT ACCURACY: 0.9784077131805323


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.990935858206801
count_correct_p 3427
count_active_tokens_p 3724
precision: 0.9202470461868958
count_active_tokens_r 3750
count_correct_r 3427
recall: 0.9138666666666667
F1: 0
TRAINING FEAT ACCURACY: 0.9793314473760175


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9907626580451475
count_correct_p 3452
count_active_tokens_p 3717
precision: 0.9287059456550982
count_active_tokens_r 3750
count_correct_r 3452
recall: 0.9205333333333333
F1: 0
TRAINING FEAT ACCURACY: 0.9816407828647307


100%|██████████| 58/58 [00:23<00:00,  2.49it/s]


TRAINING POS ACCURACY: 0.9907049246579297
count_correct_p 3453
count_active_tokens_p 3713
precision: 0.9299757608402909
count_active_tokens_r 3750
count_correct_r 3453
recall: 0.9208
F1: 0
TRAINING FEAT ACCURACY: 0.9816985162519485


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9910513249812366
count_correct_p 3465
count_active_tokens_p 3715
precision: 0.9327052489905787
count_active_tokens_r 3750
count_correct_r 3465
recall: 0.924
F1: 0
TRAINING FEAT ACCURACY: 0.9819871831880377


100%|██████████| 58/58 [00:23<00:00,  2.49it/s]


TRAINING POS ACCURACY: 0.9919173257895041
count_correct_p 3478
count_active_tokens_p 3710
precision: 0.937466307277628
count_active_tokens_r 3750
count_correct_r 3478
recall: 0.9274666666666667
F1: 0
TRAINING FEAT ACCURACY: 0.9830841175451763


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.991282258530108
count_correct_p 3470
count_active_tokens_p 3701
precision: 0.9375844366387462
count_active_tokens_r 3750
count_correct_r 3470
recall: 0.9253333333333333
F1: 0
TRAINING FEAT ACCURACY: 0.9827377172218694


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9923214595000288
count_correct_p 3490
count_active_tokens_p 3721
precision: 0.9379199140016125
count_active_tokens_r 3750
count_correct_r 3490
recall: 0.9306666666666666
F1: 0
TRAINING FEAT ACCURACY: 0.9839501183534438


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9923214595000288
count_correct_p 3499
count_active_tokens_p 3722
precision: 0.9400859752821064
count_active_tokens_r 3750
count_correct_r 3499
recall: 0.9330666666666667
F1: 0
TRAINING FEAT ACCURACY: 0.9842965186767507


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9920905259511575
count_correct_p 3533
count_active_tokens_p 3727
precision: 0.9479474107861551
count_active_tokens_r 3750
count_correct_r 3533
recall: 0.9421333333333334
F1: 0
TRAINING FEAT ACCURACY: 0.9864326540038104


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9939957277293459
count_correct_p 3532
count_active_tokens_p 3729
precision: 0.947170823277018
count_active_tokens_r 3750
count_correct_r 3532
recall: 0.9418666666666666
F1: 0
TRAINING FEAT ACCURACY: 0.986548120778246


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9935338606316032
count_correct_p 3528
count_active_tokens_p 3726
precision: 0.9468599033816425
count_active_tokens_r 3750
count_correct_r 3528
recall: 0.9408
F1: 0
TRAINING FEAT ACCURACY: 0.9860862536805034


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9934761272443854
count_correct_p 3548
count_active_tokens_p 3728
precision: 0.9517167381974249
count_active_tokens_r 3750
count_correct_r 3548
recall: 0.9461333333333334
F1: 0
TRAINING FEAT ACCURACY: 0.9873563881992956


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9941111945037815
count_correct_p 3552
count_active_tokens_p 3730
precision: 0.9522788203753351
count_active_tokens_r 3750
count_correct_r 3552
recall: 0.9472
F1: 0
TRAINING FEAT ACCURACY: 0.9877605219098204


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9937070607932568
count_correct_p 3559
count_active_tokens_p 3732
precision: 0.9536441586280815
count_active_tokens_r 3750
count_correct_r 3559
recall: 0.9490666666666666
F1: 0
TRAINING FEAT ACCURACY: 0.9879337220714739


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9939957277293459
count_correct_p 3561
count_active_tokens_p 3721
precision: 0.9570008062348832
count_active_tokens_r 3750
count_correct_r 3561
recall: 0.9496
F1: 0
TRAINING FEAT ACCURACY: 0.9884533225564344


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9947462617631776
count_correct_p 3567
count_active_tokens_p 3729
precision: 0.9565567176186646
count_active_tokens_r 3750
count_correct_r 3567
recall: 0.9512
F1: 0
TRAINING FEAT ACCURACY: 0.9885110559436522


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9947462617631776
count_correct_p 3565
count_active_tokens_p 3728
precision: 0.9562768240343348
count_active_tokens_r 3750
count_correct_r 3565
recall: 0.9506666666666667
F1: 0
TRAINING FEAT ACCURACY: 0.9885110559436522


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9943998614398707
count_correct_p 3587
count_active_tokens_p 3736
precision: 0.9601177730192719
count_active_tokens_r 3750
count_correct_r 3587
recall: 0.9565333333333333
F1: 0
TRAINING FEAT ACCURACY: 0.9898389238496622


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9956122625714451
count_correct_p 3581
count_active_tokens_p 3732
precision: 0.9595391211146839
count_active_tokens_r 3750
count_correct_r 3581
recall: 0.9549333333333333
F1: 0
TRAINING FEAT ACCURACY: 0.9892615899774839


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9956699959586629
count_correct_p 3588
count_active_tokens_p 3731
precision: 0.9616724738675958
count_active_tokens_r 3750
count_correct_r 3588
recall: 0.9568
F1: 0
TRAINING FEAT ACCURACY: 0.9898966572368801


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9956122625714451
count_correct_p 3593
count_active_tokens_p 3729
precision: 0.9635290962724591
count_active_tokens_r 3750
count_correct_r 3593
recall: 0.9581333333333333
F1: 0
TRAINING FEAT ACCURACY: 0.990243057560187


100%|██████████| 58/58 [00:23<00:00,  2.48it/s]


TRAINING POS ACCURACY: 0.9955545291842273
count_correct_p 3608
count_active_tokens_p 3732
precision: 0.9667738478027867
count_active_tokens_r 3750
count_correct_r 3608
recall: 0.9621333333333333
F1: 0
TRAINING FEAT ACCURACY: 0.9913399919173258


 10%|█         | 6/58 [00:02<00:24,  2.12it/s]


KeyboardInterrupt: 

In [17]:
valid_loop_fn(valid_loader, model, DEVICE)

100%|██████████| 2256/2256 [03:25<00:00, 10.95it/s]


VALIDATION POS ACCURACY: 0.9617551272870771
count_correct_p 69699
count_active_tokens_p 106159
precision: 0.6565529064893226
count_active_tokens_r 107280
count_correct_r 69699
recall: 0.649692393736018
F1: 0
VALIDATION FEAT ACCURACY: 0.8903799213864015
