In [78]:
import os
import numpy as np
from collections import Counter
from sklearn.metrics import f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import SelectFromModel

from generator import Generator
from corpus import ConllCorpusReaderX

import warnings
warnings.filterwarnings('ignore')

TRAINSET_PATH = "./factrueval_trainset.npz"
TESTSET_PATH = "./factrueval_testset.npz"

if os.path.exists(TRAINSET_PATH):
    os.remove(TRAINSET_PATH)
    print("Removed "+ TRAINSET_PATH)
    
if os.path.exists(TESTSET_PATH):
    os.remove(TESTSET_PATH)
    print("Removed "+ TESTSET_PATH)

Removed ./factrueval_trainset.npz
Removed ./factrueval_testset.npz


In [79]:
factrueval_devset = ConllCorpusReaderX('./factrueval2016_dataset/',
                                       fileids='devset.txt', 
                                       columntypes=['words', 'offset', 'len', 'ne'])

factrueval_testset = ConllCorpusReaderX('./factrueval2016_dataset/', 
                                        fileids='testset.txt', 
                                        columntypes=['words', 'offset', 'len', 'ne'])

In [80]:
gen = Generator(column_types=['WORD'], context_len=2)

Y_train = [el[1] for el in factrueval_devset.get_ne()]
Y_test = [el[1] for el in factrueval_testset.get_ne()] 

X_train = gen.fit_transform([[el] for el in factrueval_devset.words()], 
                            Y_train, 
                            path=TRAINSET_PATH)
X_test = gen.transform([[el] for el in factrueval_testset.words()], 
                       path=TESTSET_PATH)

# One classifier for all classes (with prefixes)

In [81]:
clf = LogisticRegression()

clf.fit(X_train, Y_train)
Y_pred = clf.predict(X_test)

In [82]:
# Избавляет данные от случаев O : O #
def clean(Y_pred, Y_test):
    Y_pred = np.array(Y_pred)
    Y_test = np.array(Y_test)

    Y_pred_i = np.array([Y_pred != 'O'])
    Y_test_i = np.array([Y_test != 'O'])

    indexes = (Y_pred_i | Y_test_i).reshape(Y_pred.shape)

    Y_pred_fixed = Y_pred[indexes]
    Y_test_fixed = Y_test[indexes]
    return Y_pred_fixed, Y_test_fixed

In [83]:
Y_pred_c, Y_test_c = clean(Y_pred, Y_test)

### Strict evaluation of results:

In [84]:
counter = Counter(Y_test_c)
labels = list(counter.keys())
labels.remove("O")
results = f1_score(Y_test_c, Y_pred_c, average=None, labels=labels)
for a, b in zip(labels, results):
    print('F1 for {} == {}, with {} entities'.format(a, b, counter[a]))

F1 for B-Person == 0.8549422336328627, with 694 entities
F1 for E-Person == 0.8731294729993494, with 692 entities
F1 for S-Person == 0.5106746370623398, with 697 entities
F1 for S-Location == 0.644524236983842, with 554 entities
F1 for B-Location == 0.25925925925925924, with 114 entities
F1 for I-Location == 0.023809523809523808, with 74 entities
F1 for S-Org == 0.4990157480314961, with 1300 entities
F1 for B-Org == 0.31221719457013575, with 646 entities
F1 for I-Org == 0.295786758383491, with 903 entities
F1 for E-Org == 0.3545568039950062, with 600 entities
F1 for S-LocOrg == 0.5797317436661699, with 666 entities
F1 for I-Person == 0.15, with 27 entities
F1 for E-Location == 0.20454545454545453, with 70 entities
F1 for B-LocOrg == 0.35616438356164387, with 49 entities
F1 for E-LocOrg == 0.0888888888888889, with 40 entities
F1 for I-LocOrg == 0.0, with 13 entities
F1 for B-Project == 0.0, with 16 entities
F1 for I-Project == 0.0, with 12 entities
F1 for S-Project == 0.0, with 11 entit

In [85]:
print(f1_score(Y_test_c, Y_pred_c, average="weighted", labels=list(counter.keys())))

0.494593238884


### Not strict evaluation of results:

In [86]:
def get_el(el):
    if el == "O":
        return el
    else:
        return el[2:]
    
Y_pred_c_light = [get_el(el) for el in Y_pred_c]
Y_test_c_light = [get_el(el) for el in Y_test_c]

In [87]:
light_counter = Counter(Y_test_c_light)
light_labels = list(light_counter.keys())
light_labels.remove("O")
print(light_counter)
light_results = f1_score(Y_test_c_light, Y_pred_c_light, average=None, labels=light_labels)
for a, b in zip(light_labels, light_results):
    print('F1 for {} == {}, with {} entities'.format(a, b, light_counter[a]))

Counter({'Org': 3449, 'Person': 2110, 'Location': 812, 'LocOrg': 768, 'O': 281, 'Project': 54, 'Facility': 2})
F1 for Person == 0.8049233627496515, with 2110 entities
F1 for Location == 0.5580110497237569, with 812 entities
F1 for Org == 0.46639344262295085, with 3449 entities
F1 for LocOrg == 0.5637221847606204, with 768 entities
F1 for Project == 0.0, with 54 entities
F1 for Facility == 0.0, with 2 entities


In [88]:
print(f1_score(Y_test_c_light, Y_pred_c_light, average="weighted", labels=light_labels))

0.582768990866


# One classifier for all classes (without prefixes)

In [89]:
def get_el(el):
    if el == "O":
        return el
    else:
        return el[2:]

Y_train = [get_el(el[1]) for el in factrueval_devset.get_ne()]
Y_test = [get_el(el[1]) for el in factrueval_testset.get_ne()] 

clf = LogisticRegression()

clf.fit(X_train, Y_train)
Y_pred = clf.predict(X_test)

In [90]:
Y_pred_c, Y_test_c = clean(Y_pred, Y_test)

### Not strict evaluation of results

In [91]:
light_counter = Counter(Y_test_c)
light_labels = list(light_counter.keys())
print(light_counter)
light_results = f1_score(Y_test_c, Y_pred_c, average=None, labels=light_labels)
for a, b in zip(light_labels, light_results):
    print('F1 for {} == {}, with {} entities'.format(a, b, light_counter[a]))

Counter({'Org': 3449, 'Person': 2110, 'Location': 812, 'LocOrg': 768, 'O': 347, 'Project': 54, 'Facility': 2})
F1 for Person == 0.8110307414104884, with 2110 entities
F1 for O == 0.0, with 347 entities
F1 for Location == 0.5777777777777777, with 812 entities
F1 for Org == 0.5122047244094489, with 3449 entities
F1 for LocOrg == 0.5821325648414986, with 768 entities
F1 for Project == 0.0, with 54 entities
F1 for Facility == 0.0, with 2 entities


In [92]:
print(f1_score(Y_test_c, Y_pred_c, average="weighted", labels=light_labels))

0.58261765105


# Different classifiers for different classes (without prefixes and with prefixes)

In [93]:
def run_diff_classes(template, prefixes=False):
    def get_only(el):
        if (el[2:] == template):
            return el[2:]
        else:
            return "O"
        
    Y_train = [get_only(el[1]) for el in factrueval_devset.get_ne()]
    Y_test = [get_only(el[1]) for el in factrueval_testset.get_ne()] 
    
    clf = LogisticRegression()

    clf.fit(X_train, Y_train)
    Y_pred = clf.predict(X_test)

    Y_pred_c, Y_test_c = clean(Y_pred, Y_test)
    
    light_counter = Counter(Y_test_c)
    light_counter_2 = Counter(Y_pred_c)
    labels = list(light_counter.keys())
    labels.remove("O")
    print(labels)
    light_result = f1_score(Y_test_c, Y_pred_c, average="binary", pos_label=template)
    print('F1 for {} == {}, with {} entities'.format(template, light_result, light_counter[template]))
        
    return light_result, light_counter[template]

In [94]:
result1, weight1 = run_diff_classes("Person")

['Person']
F1 for Person == 0.7972642892037127, with 2110 entities


In [95]:
result2, weight2 = run_diff_classes("Org")

['Org']
F1 for Org == 0.42703862660944203, with 3449 entities


In [96]:
result3, weight3 = run_diff_classes("LocOrg")

['LocOrg']
F1 for LocOrg == 0.5375972342264478, with 768 entities


In [97]:
result4, weight4 = run_diff_classes("Location")

['Location']
F1 for Location == 0.540090771558245, with 812 entities


In [98]:
total_weight = weight1 + weight2 + weight3 + weight4
total_result = (result1 * weight1 + result2 * weight2 + result3 * weight3 + result4 * weight4) / total_weight

In [99]:
print(total_result)

0.561214771787
