In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
from os.path import realpath
sys.path.insert(0, realpath('../src'))

## Load Datasets

In [3]:
from common.event_script import ScriptCorpus

2018-04-25 20:30:52,894 - INFO - 'pattern' package not found; tag filters are not available for English


In [4]:
with open('../data/ontonotes/on_short_scripts.txt', 'r') as fin:
    on_short_scripts = ScriptCorpus.from_text(fin.read())
print 'Found {} scripts in OnShort dataset'.format(on_short_scripts.num_scripts)

Found 1027 scripts in OnShort dataset


In [5]:
with open('../data/ontonotes/on_long_scripts.txt', 'r') as fin:
    on_long_scripts = ScriptCorpus.from_text(fin.read())
print 'Found {} scripts in OnLong dataset'.format(on_long_scripts.num_scripts)

Found 597 scripts in OnLong dataset


## Create MostFreq Evaluator

In [6]:
from os.path import join

from evaluate import MostFreqEntityEvaluator
from utils import Word2VecModel

word2vec_dir = '/home/pengxiang/workspace/corpora/enwiki-20160901/word2vec/spaces/sample_1e-4_min_500'
word2vec = Word2VecModel.load_model(
    join(word2vec_dir, 'min_500_dim300vecs.bin'),
    fvocab=join(word2vec_dir, 'min_500_dim300vecs.vocab'))

most_freq_evaluator = MostFreqEntityEvaluator(
    use_lemma=True, include_type=True, include_all_pobj=False, ignore_first_mention=False, filter_stop_events=True)

most_freq_evaluator.set_model(word2vec)

2018-04-25 20:35:42,714 - INFO - loading word counts from /home/pengxiang/workspace/corpora/enwiki-20160901/word2vec/spaces/sample_1e-4_min_500/min_500_dim300vecs.vocab
2018-04-25 20:35:42,919 - INFO - loading projection weights from /home/pengxiang/workspace/corpora/enwiki-20160901/word2vec/spaces/sample_1e-4_min_500/min_500_dim300vecs.bin
2018-04-25 20:35:43,408 - INFO - loaded (53345, 300) matrix from /home/pengxiang/workspace/corpora/enwiki-20160901/word2vec/spaces/sample_1e-4_min_500/min_500_dim300vecs.bin
2018-04-25 20:35:43,412 - INFO - precomputing L2-norms of word weight vectors
2018-04-25 20:35:43,657 - INFO - set embedding model: min_500_dim300vecs


## Create EventWord2vec Evaluator

In [7]:
from evaluate import Word2VecEvaluator

word2vec_evaluator = Word2VecEvaluator(
    use_lemma=True, include_type=True, include_all_pobj=False, ignore_first_mention=False,
    filter_stop_events=True, use_max_score=True)

word2vec_evaluator.set_model(word2vec)

2018-04-25 20:37:40,623 - INFO - set embedding model: min_500_dim300vecs


## Evaluate Baselines on OnShort

In [8]:
most_freq_evaluator.evaluate(on_short_scripts.scripts)

2018-04-25 20:38:21,004 - INFO - evaluation based on most_freq_entity, with embedding model min_500_dim300vecs
2018-04-25 20:38:21,006 - INFO - embedding configs: use_lemma = True, include_type = True
2018-04-25 20:38:21,007 - INFO - general configs: include_all_pobj = False, ignore_first_mention = False, filter_stop_events = True
Processed: 100%|███████████████████████████████████████████████| 1027/1027 [00:01<00:00, 901.59it/s]


+-------------------------------------------------------------------------------+
|                    # Cases     # Correct     Accuracy (%)      Avg # Choices  |
|       All           13018         2963           22.76             12.06      |
+-------------------------------------------------------------------------------+
+-------------------------------------------------------------------------------+
|    Arg Type        # Cases     # Correct     Accuracy (%)      Avg # Choices  |
|      SUBJ            7759         2097           27.03             12.08      |
|       OBJ            3110         586            18.84             12.23      |
|      POBJ            2149         280            13.03             11.74      |
+-------------------------------------------------------------------------------+
+-------------------------------------------------------------------------------+
|       POS          # Cases     # Correct     Accuracy (%)      Avg # Choices  |
|      Noun    




In [10]:
word2vec_evaluator.evaluate(on_short_scripts.scripts)

2018-04-25 20:40:00,102 - INFO - evaluation based on word2vec, with embedding model min_500_dim300vecs
2018-04-25 20:40:00,103 - INFO - embedding configs: use_lemma = True, include_type = True
2018-04-25 20:40:00,104 - INFO - general configs: include_all_pobj = False, ignore_first_mention = False, filter_stop_events = True
2018-04-25 20:40:00,106 - INFO - evaluator specific configs: use_max_score = True
Processed: 100%|████████████████████████████████████████████████| 1027/1027 [00:49<00:00, 20.95it/s]


+-------------------------------------------------------------------------------+
|                    # Cases     # Correct     Accuracy (%)      Avg # Choices  |
|       All           13018         4991           38.34             12.06      |
+-------------------------------------------------------------------------------+
+-------------------------------------------------------------------------------+
|    Arg Type        # Cases     # Correct     Accuracy (%)      Avg # Choices  |
|      SUBJ            7759         3043           39.22             12.08      |
|       OBJ            3110         1250           40.19             12.23      |
|      POBJ            2149         698            32.48             11.74      |
+-------------------------------------------------------------------------------+
+-------------------------------------------------------------------------------+
|       POS          # Cases     # Correct     Accuracy (%)      Avg # Choices  |
|      Noun    




## Evaluate Baselines on OnLong

In [11]:
most_freq_evaluator.evaluate(on_long_scripts.scripts)

2018-04-25 20:52:50,364 - INFO - evaluation based on most_freq_entity, with embedding model min_500_dim300vecs
2018-04-25 20:52:50,366 - INFO - embedding configs: use_lemma = True, include_type = True
2018-04-25 20:52:50,367 - INFO - general configs: include_all_pobj = False, ignore_first_mention = False, filter_stop_events = True
Processed: 100%|█████████████████████████████████████████████████| 597/597 [00:02<00:00, 203.96it/s]


+-------------------------------------------------------------------------------+
|                    # Cases     # Correct     Accuracy (%)      Avg # Choices  |
|       All           18208         3138           17.23             36.95      |
+-------------------------------------------------------------------------------+
+-------------------------------------------------------------------------------+
|    Arg Type        # Cases     # Correct     Accuracy (%)      Avg # Choices  |
|      SUBJ           11663         2527           21.67             35.73      |
|       OBJ            4349         452            10.39             38.69      |
|      POBJ            2196         159            7.24              40.00      |
+-------------------------------------------------------------------------------+
+-------------------------------------------------------------------------------+
|       POS          # Cases     # Correct     Accuracy (%)      Avg # Choices  |
|      Noun    




In [12]:
word2vec_evaluator.evaluate(on_long_scripts.scripts)

2018-04-25 20:53:02,088 - INFO - evaluation based on word2vec, with embedding model min_500_dim300vecs
2018-04-25 20:53:02,089 - INFO - embedding configs: use_lemma = True, include_type = True
2018-04-25 20:53:02,091 - INFO - general configs: include_all_pobj = False, ignore_first_mention = False, filter_stop_events = True
2018-04-25 20:53:02,092 - INFO - evaluator specific configs: use_max_score = True
Processed: 100%|██████████████████████████████████████████████████| 597/597 [09:14<00:00,  1.08it/s]


+-------------------------------------------------------------------------------+
|                    # Cases     # Correct     Accuracy (%)      Avg # Choices  |
|       All           18208         3927           21.57             36.95      |
+-------------------------------------------------------------------------------+
+-------------------------------------------------------------------------------+
|    Arg Type        # Cases     # Correct     Accuracy (%)      Avg # Choices  |
|      SUBJ           11663         2679           22.97             35.73      |
|       OBJ            4349         878            20.19             38.69      |
|      POBJ            2196         370            16.85             40.00      |
+-------------------------------------------------------------------------------+
+-------------------------------------------------------------------------------+
|       POS          # Cases     # Correct     Accuracy (%)      Avg # Choices  |
|      Noun    


