In [2]:
import numpy as np
import scipy.sparse
import pickle
import xgboost as xgb
import csv
from collections import defaultdict
import redis
import json
import time
import sklearn.utils
import psycopg2
from psycopg2.sql import Identifier, SQL
from datetime import datetime
import os

In [3]:
conn = redis.Redis(password='3oSYTdtZjsuSigRWLcG6VJt9gm4IMvYjQiqsSuGcAc-U4gMNpWGERAevXi9_SHNrn19piz7bBJG0iTLgx7DvknLHTECcHYrqmWb2rsuCWs89svKmhKDD_aMYaXq8IhSeg_89ooPZb0AqLRyR1-fa1zVjrh2UuV0sWFGSk5SjtW0', 
                   host='pangu', port=6379, decode_responses=True)
conn_psql_kongzi2 = psycopg2.connect(database='aida', user='zding', password='dingzishuo', host='localhost',
                                     port=5432)
conn_psql = psycopg2.connect(database='zding', user='zding', password='dingzishuo', host='localhost',
                                     port=5432)

In [4]:
def get_feature_results(exp_id, d_t, data_source='aida_conll'):
    fea_res = conn.lrange('result:::::' + str(exp_id) + ':::::' + data_source + ':::::' + str(d_t), 0, -1)
    fea_vecs = [json.loads(res) for res in fea_res]
    return fea_vecs

def get_dataset_info(exp_id, d_t, data_source='aida_conll', can_size=50):
    valid_mens_size = conn.scard('valid_qry_ids:::::' + str(exp_id) + ':::::' + data_source + ':::::' + str(d_t))
    no_g_can_info_size = conn.scard('no_g_candidate_info:::::' + str(exp_id) + ':::::' + data_source + ':::::' + str(d_t))
    missed_g_candidate_size = conn.scard('missed_g_candidate:::::' + str(exp_id) + ':::::' + data_source + 
                                         ':::::' + str(d_t) + ':::::' + str(can_size))
    no_candidate_size = conn.scard('no_candidate:::::' + str(exp_id) + ':::::' + data_source + ':::::' + str(d_t))
    return valid_mens_size, no_candidate_size, no_g_can_info_size, missed_g_candidate_size

def fetch_all_features(exp_id, data_type, data_source='aida_conll'):
    res_feas = get_feature_results(exp_id, data_type, data_source)
    res_feas_ids = [[res[0].strip('(').split(', ')[0]] + res[1:-1] + [res[-1]] for res in res_feas]
    res_feas_ids = np.array(res_feas_ids, dtype=np.float64)
    return res_feas_ids

def fetch_all_features_delete_max_prior(exp_id, data_type, data_source='aida_conll'):
    res_feas = get_feature_results(exp_id, data_type, data_source)
    res_feas_ids = [[res[0].strip('(').split(', ')[0]] + res[1:2] + [res[2] if res[2]!=0 else res[1]] + res[3:] for res in res_feas]
    res_feas_ids = np.array(res_feas_ids, dtype=np.float64)
    return res_feas_ids

def trans_data(data):
    d_np = data[:, 1:-1]
    #print(d_np)
    d_labels = data[:, -1]
    #print(d_labels)
    idxs = np.where(d_labels == 1)[0]
    d_groups = np.append(np.delete(idxs, 0), len(d_labels)) - idxs
    xgb_data = xgb.DMatrix(data=d_np, label=d_labels)
    xgb_data.set_group(d_groups)
    return xgb_data

def combine_features(original_feas, new_features):
    men_id_feas_dict = defaultdict(list)
    print("Building idx for new features...")
    for fea in new_features:
        men_id_feas_dict[fea[0]].append(fea)
    #for k, v in men_id_feas_dict.items():
    #    print(k)
    #    print(v)
    comb_feas = []
    pre_men_id = 0
    print("Combine original and new features...")
    for fea_idx, fea in enumerate(original_feas):
        #print('Processing: [%d] %s' % (fea_idx, fea))
        if pre_men_id == fea[0]:
            #print('skip')
            continue
        else:
            pre_men_id = fea[0]
            fea_size = len(men_id_feas_dict[fea[0]])
            res = np.append(original_feas[fea_idx: fea_idx + fea_size, :-1], np.array(men_id_feas_dict[fea[0]])[:, 1:], axis=1)
            #print('res:', res)
            comb_feas.append(res)
    return np.concatenate(comb_feas, axis=0)

def evalerror(preds, dt, d_tal_size):
    d_l = dt.get_label()
    idxs = np.where(d_l == 1)[0]
    d_groups = np.append(np.delete(idxs, 0), len(d_l)) - idxs
    matched_ids = []
    q_id = 0
    for x in d_groups:
        pre_res = preds[q_id: x + q_id]
        if(preds[q_id] == max(pre_res)):
            if len([x for x in pre_res if x == preds[q_id]]) == 1:
                matched_ids.append(q_id)
        q_id += x
    precision = float(len(matched_ids)) / len(d_groups)
    recall = float(len(matched_ids)) / d_tal_size
    f1 = 2 * precision * recall / (precision + recall)
    return len(matched_ids), precision, recall, f1

from collections import defaultdict
def evalerror_detail_log(preds, dt, d_tal_size):
    d_l = dt.get_label()
    idxs = np.where(d_l == 1)[0]
    d_groups = np.append(np.delete(idxs, 0), len(d_l)) - idxs
    correct_results = {}
    wrong_results = {}
    duplicates_results = {}
    group_info = {}
    matched_ids = []
    q_id = 0
    for x in d_groups:
        pre_res = preds[q_id: x + q_id]
        if(preds[q_id] == max(pre_res)):
            correct_results[q_id] = pre_res
            if len([x for x in pre_res if x == preds[q_id]]) == 1:
                matched_ids.append(q_id)
            else:
                duplicates_results[q_id] = pre_res
        else:
            wrong_results[q_id] = pre_res
        q_id += x
    precision = float(len(matched_ids)) / len(d_groups)
    recall = float(len(matched_ids)) / d_tal_size
    f1 = 2 * precision * recall / (precision + recall)
    return len(matched_ids), precision, recall, f1, correct_results, wrong_results, duplicates_results

from collections import defaultdict
from ast import literal_eval
def get_groups_results(preds, dt, res_features, top_k=None):
    d_l = dt.get_label()
    idxs = np.where(d_l == 1)[0]
    d_groups = np.append(np.delete(idxs, 0), len(d_l)) - idxs
    correct_res_groups = []
    wrong_res_groups = []
    dup_res_groups = []
    top_k_indices = [] if top_k is not None else None
    q_id = 0
    for x in d_groups:
        pre_res = preds[q_id: x + q_id]
        pre_res_feas = res_features[q_id: x + q_id]
        pred_q_id, pred_ent = literal_eval(res_features[q_id+np.argmax(pre_res)][0])
        
        if(preds[q_id] == max(pre_res)):
            correct_res_groups.append([pred_q_id, pred_ent])
        else:
            wrong_res_groups.append([pred_q_id, pred_ent])
            
        if top_k is not None:  # save indices of top-k scores in each group
            for i, score in sorted(enumerate(pre_res),key=lambda x:x[1], reverse=True)[:top_k]:
                # print(i, score)
                top_k_indices.append(i+q_id)
            
        q_id += x
        
    return correct_res_groups, wrong_res_groups, top_k_indices

def fetch_inlinks_by_ent(ent):
    cur = conn_psql_kongzi2.cursor()
    sql = "SELECT _id FROM wikipedia_links_2014 WHERE target=%s;"
    cur.execute(sql, (ent,))
    rows = cur.fetchall()
    cur.close()
    return rows

# fetch an entity's outlinks with duplicates
def fetch_outlinks_by_ent(ent):
    cur = conn_psql_kongzi2.cursor()
    sql = "SELECT target FROM wikipedia_links_2014 WHERE _id=%s;"
    cur.execute(sql, (ent,))
    rows = cur.fetchall()
    cur.close()
    return rows

def fetch_entity_by_mention_emnlp17(mention):
    # print(mention)
    cur = conn_psql_kongzi2.cursor()
    # do a PostgreSQL join to select the entity namestring from the tables dictionary and entity_ids
    sql = "SELECT entity, prior FROM men_ent_dict_emnlp2017 WHERE men_ent_dict_emnlp2017.mention = (E\'%s\') ORDER BY prior DESC;"
    cur.execute(sql % mention.replace("'", "\\'"))
    rows = cur.fetchall()
    cur.close()
    return rows

def fetch_inlinks_redis(ent, link_type='inlinks'):
    inlinks = conn.hmget(link_type, ent)[0]
    return json.loads(inlinks) if inlinks else []

def has_inlinks_redis(ent, link_type='inlinks'):
    inlinks = conn.hexists(link_type, ent)
    return inlinks

def save_inlinks_redis(ent, inlinks, link_type='inlinks'):
    conn.hset(link_type, ent, json.dumps(inlinks))
    
def fetch_outlinks_redis(ent, link_type='outlinks'):
    outlinks = conn.hmget(link_type, ent)[0]
    return json.loads(outlinks) if outlinks else []

def has_outlinks_redis(ent, link_type='outlinks'):
    inlinks = conn.hexists(link_type, ent)
    return inlinks

def save_outlinks_redis(ent, outlinks, link_type='outlinks'):
    conn.hset(link_type, ent, json.dumps(outlinks))
    
def check_links_between_ents(ent_1, ent_2, bidirection=False):
    wiki_pre_str = 'en.wikipedia.org/wiki/'
    inlinks_ent_1 = fetch_inlinks_redis(ent_1, link_type='inlinks')
    if not inlinks_ent_1 and not has_inlinks_redis(ent_1):
        print("PostgreSQL: fetching inlinks for entity {}...".format(ent_1))
        wiki_ents_1 = wiki_pre_str + ent_1
        inlinks_ent_1_db = fetch_inlinks_by_ent(wiki_ents_1)
        inlinks_ent_1 = [x[0].replace(wiki_pre_str, '') for x in inlinks_ent_1_db]
        print("Redis: caching inlinks for entity {}...".format(ent_1))
        save_inlinks_redis(ent_1, inlinks_ent_1)
    inlinks_ent_2 = fetch_inlinks_redis(ent_2, link_type='inlinks')
    if not inlinks_ent_2 and not has_inlinks_redis(ent_2):        
        wiki_ents_2 = wiki_pre_str + ent_2
        print("PostgreSQL: fetching inlinks for entity {}...".format(ent_2))
        inlinks_ent_2_db = fetch_inlinks_by_ent(wiki_ents_2)
        inlinks_ent_2 = [x[0].replace(wiki_pre_str, '') for x in inlinks_ent_2_db]
        print("Redis: caching inlinks for entity {}...".format(ent_2))
        save_inlinks_redis(ent_2, inlinks_ent_2)
    return (ent_1 in inlinks_ent_2 or ent_2 in inlinks_ent_1) if not bidirection else (ent_1 in inlinks_ent_2 and ent_2 in inlinks_ent_1)

def get_links_by_ent(ent, link_type='inlinks'):
    wiki_pre_str = 'en.wikipedia.org/wiki/'
    if link_type == 'inlinks':
        inlinks_ent = fetch_inlinks_redis(ent, link_type='inlinks')
        if not inlinks_ent and not has_inlinks_redis(ent):
#             print("PostgreSQL: fetching inlinks for entity {}...".format(ent))
            wiki_ents = wiki_pre_str + ent
            inlinks_ent_db = fetch_inlinks_by_ent(wiki_ents)
            inlinks_ent = [x[0].replace(wiki_pre_str, '') for x in inlinks_ent_db]
            print("Redis: caching inlinks for entity {}...".format(ent))
            save_inlinks_redis(ent, inlinks_ent)
        return inlinks_ent
    if link_type == 'outlinks':
        outlinks_ent = fetch_outlinks_redis(ent)
        if not outlinks_ent and not has_outlinks_redis(ent):
#             print("PostgreSQL: fetching outlinks for entity {}...".format(ent))
            wiki_ents = wiki_pre_str + ent
            outlinks_ent_db = fetch_outlinks_by_ent(wiki_ents)
            outlinks_ent = [x[0].replace(wiki_pre_str, '') for x in outlinks_ent_db]
            print("Redis: caching outlinks for entity {}...".format(ent))
            save_outlinks_redis(ent, outlinks_ent)
        return outlinks_ent
    
def fetch_ents_by_doc_redis(doc_id):
    id_ents = conn.hmget('doc-predicted-ents-coref-new', doc_id)[0]
    return json.loads(id_ents) if id_ents else []

## Normalized Google Distance
import math
def ngd_similarity(ents_s, ents_t, index_size = 6274625):
    ent_sets_s = set(ents_s)
    ent_sets_t = set(ents_t)
    min_links, max_links = min(len(ent_sets_s), len(ent_sets_t)), max(len(ent_sets_s), len(ent_sets_t))
    com_links = len(ent_sets_s & ent_sets_t)
    if min_links and max_links and com_links:
        return 1 - (math.log(max_links) - math.log(com_links))/ (math.log(index_size) - math.log(min_links))
    else:
        return 0
    
# PMI
def pmi_similarity(ents_s, ents_t, index_size = 6274625, normalize=False):
    ent_sets_s = set(ents_s)
    ent_sets_t = set(ents_t)
    s_links, t_links = len(ent_sets_s), len(ent_sets_t)
    com_links = len(ent_sets_s & ent_sets_t)
    p_s = s_links / index_size
    p_t = t_links / index_size
    p_c = com_links / index_size
    print(p_s, p_t, p_c)
    if p_s and p_t and p_c:
        return p_c/(p_s * p_t) if not normalize else p_c / (p_s * p_t) / min(1/p_s, 1/p_t)
    else:
        return 0

In [20]:
model_dir_path = './new_models_14_Aug'
def save_model(model, name):
    if not os.path.exists(model_dir_path):
        os.makedirs(model_dir_path)
    model_path = os.path.join(model_dir_path, '%s.mdl' % name)
    with open(model_path, 'wb') as f:
        pickle.dump(model, f)
        
def load_model(name):
    model_path = os.path.join(model_dir_path, '%s.mdl' % name)
    with open(model_path, 'rb') as f:
        return pickle.load(f)
    
def get_total_mentions(data_source, data_type) -> int:
    with conn_psql.cursor() as cur:
        sql = SQL("select count(*) from {} where annotation != 'NIL' and annotation !='none' and type=%s").format(Identifier(data_source))
        cur.execute(sql, (data_type,))
        return cur.fetchone()[0]
    
def process(process_name ,test_set, test_total, 
            n_estimators, max_depths, test_filter=None,
            eval_func=evalerror_detail_log):
    if test_filter is not None:
        dtest_xgboost = trans_data(test_set[test_filter])    
    else:
        dtest_xgboost = trans_data(test_set)
    
    for x in n_estimators:
        num_round = x
        for dep in max_depths:
            model_name = '%d_%d_%s' % (num_round, dep, process_name)
            print(datetime.now(), 'Loading model: %s' % model_name)
            bst = load_model(model_name)

            print(datetime.now(), 'Start evaluation')
            preds = bst.predict(dtest_xgboost)
            a = eval_func(preds, dtest_xgboost, test_total)
            print("n_estimators: {}, max_depth: {}, precision: {}, recall: {}, f1: {}, corr_num: {}"
                  .format(num_round, dep, a[1] ,a[2], a[3], a[0]))
            print(datetime.now(), 'Evaluation finished')
            

def process_model(model ,test_set, test_total, test_filter=None, eval_func=evalerror_detail_log):
    if test_filter is not None:
        dtest_xgboost = trans_data(test_set[test_filter])    
    else:
        dtest_xgboost = trans_data(test_set)

    print(datetime.now(), 'Start evaluation')
    preds = model.predict(dtest_xgboost)
    a = eval_func(preds, dtest_xgboost, test_total)
    print("precision: {}, recall: {}, f1: {}, corr_num: {}"
          .format(a[1] ,a[2], a[3], a[0]))
    print(datetime.now(), 'Evaluation finished')
    
    
def process_iterative_filtering(data_source):
    # get dict of current global values for dynamic variable lookup
    global_vals = globals()  
    
    ctx_raw = global_vals['d_%s_ctx_emnlp17_raw' % data_source]
    docs_dict = global_vals['docs_dict_%s' % data_source]
    total = global_vals['d_%s_total' % data_source]
    
    model_names = []
    models = []
    features = []
    for i in range(0, 6):
        if i == 0:  # local 
            model_name = '4900_6_ctx_coref'
            feature_name = 'd_%s_ctx_coref_emnlp17' % data_source
        elif i == 1: # initial global
            model_name = '4900_6_ctx_coref_coh'
            feature_name = 'd_%s_ctx_coref_coh_emnlp17' % data_source
        elif i == 2: # first iterated global
            model_name = '4900_6_ctx_coref_coh_global'
            feature_name = 'd_%s_ctx_coref_coh_emnlp17_global' % data_source
        else:  # next iterated global
            model_name = '4900_6_ctx_coref_coh_global%d' % (i-1)
            feature_name = 'd_%s_ctx_coref_coh_emnlp17_global%d' % (data_source, (i-1))
            
        model_names.append(model_name)
        models.append(load_model(model_name))
        features.append(global_vals[feature_name])
    
    for top_k in [3, 5]:
        print('=== Top K=%d ===' % top_k)

        for i in range(1, 6):
            print(datetime.now(), 'Processing model: %s' % model_names[i])
            
            # get filters from the model for the previous iteration
            test_filter=get_top_k_prediction_indices(models[i-1], features[i-1], ctx_raw, docs_dict, top_k)
            print(datetime.now(), 'test_filter', len(test_filter))
            
            # evaluate the model for the current iteration
            process_model(models[i], features[i], total, test_filter=test_filter)
            

def fetch_q_ids_docs(data_source):
    cur = conn_psql.cursor()
    sql = "SELECT id, doc_id FROM %s WHERE annotation != 'NIL' and annotation != 'none';" % data_source
    cur.execute(sql)
    row = cur.fetchall()
    cur.close()
    return dict(row)
        
def get_top_k_prediction_indices(model, d_test, raw_test, docs_dict, top_k):
    d_test_xgboost = trans_data(d_test)
    preds_test = model.predict(d_test_xgboost)
    
    correct_test, wrong_test, top_k_indices_test = get_groups_results(preds_test, d_test_xgboost, raw_test, top_k)
    return top_k_indices_test
        
def get_true_labels(data_set):
    return data_set[:, -1].nonzero()[0].size

In [6]:
def evalerror_detail_log_xl(preds, dt, d_tal_size):
    top2_correct = 0
    top3_correct = 0
    correct_top2_diff_total = 0
    wrong_top2_diff_total = 0
    correct_has_top2 = 0
    wrong_has_top2 = 0
    
    
    d_l = dt.get_label()
    idxs = np.where(d_l == 1)[0]
    d_groups = np.append(np.delete(idxs, 0), len(d_l)) - idxs
    correct_results = {}
    wrong_results = {}
    duplicates_results = {}
    group_info = {}
    matched_ids = []
    q_id = 0
    for x in d_groups:
        #print("For group {}, id = {}".format(x, q_id+x))
        pre_res = preds[q_id: x + q_id]
        
        sorted_pre_res = np.sort(pre_res)
        reverse_res = sorted_pre_res[::-1]
        #print(reverse_res[0:3])
        pre_res = reverse_res
        if (max(pre_res) != pre_res[0]):
            print("ERROR")
            break
        
        if(len(pre_res) == 1 or preds[q_id] == pre_res[0] or preds[q_id] == pre_res[1]):
                top2_correct += 1
                top3_correct += 1
        else:   
            if(len(pre_res) == 2 or preds[q_id] == pre_res[2]):
                top3_correct += 1

        
        if(preds[q_id] == max(pre_res)):
            correct_results[q_id] = pre_res
            
            if len([x for x in pre_res if x == preds[q_id]]) == 1:
                matched_ids.append(q_id)
                
                if (len(pre_res) > 1):
                    correct_top2_diff_total += ( pre_res[0] - pre_res[1]) 
                    correct_has_top2 +=1
                    
            else:
                duplicates_results[q_id] = pre_res
        else:
            wrong_results[q_id] = pre_res
            if (len(pre_res) > 1):
                wrong_top2_diff_total += ( pre_res[0] - pre_res[1])
                wrong_has_top2 += 1
                
            #print (pre_res[0:3])
        q_id += x
    precision = float(len(matched_ids)) / len(d_groups)
    recall = float(len(matched_ids)) / d_tal_size
    f1 = 2 * precision * recall / (precision + recall)
    top2_acc = top2_correct / d_tal_size
    top3_acc = top3_correct / d_tal_size
    print (top2_correct, top3_correct, top2_acc, top3_acc)
    
    correct_top2_diff_avg = correct_top2_diff_total / correct_has_top2
    wrong_top2_diff_avg = wrong_top2_diff_total / wrong_has_top2
    print (correct_top2_diff_avg, wrong_top2_diff_avg)
    
    return len(matched_ids), precision, recall, f1, correct_results, wrong_results, duplicates_results

In [7]:
docs_dict_msnbc = fetch_q_ids_docs('msnbc_new')
docs_dict_aquaint = fetch_q_ids_docs('aquaint_new')

In [139]:
d_msnbc_ctx = fetch_all_features('basic_fea_ctx', 'test', 'msnbc_new')
d_msnbc_ctx_raw = get_feature_results('basic_fea_ctx', 'test', 'msnbc_new')
d_msnbc_coref = fetch_all_features('basic_fea_coref', 'test', 'msnbc_new')
d_msnbc_total = get_total_mentions('msnbc_new', 'test')
d_msnbc_true_labels = get_true_labels(d_msnbc_ctx)

d_aquaint_ctx = fetch_all_features('basic_fea_ctx', 'test', 'aquaint_new')
d_aquaint_ctx_raw = get_feature_results('basic_fea_ctx', 'test', 'aquaint_new')
d_aquaint_coref = fetch_all_features('basic_fea_coref', 'test', 'aquaint_new')
d_aquaint_total = get_total_mentions('aquaint_new', 'test')
d_aquaint_true_labels = get_true_labels(d_aquaint_ctx)

print(d_msnbc_ctx.shape)
print(d_msnbc_coref.shape)
print(d_msnbc_total)
print(d_msnbc_true_labels)
print('upper bound', d_msnbc_true_labels / d_msnbc_total)
print()
print(d_aquaint_ctx.shape)
print(d_aquaint_coref.shape)
print(d_aquaint_total)
print(d_aquaint_true_labels)
print('upper bound', d_aquaint_true_labels / d_aquaint_total)

(10316, 21)
(10316, 13)
656
543
upper bound 0.8277439024390244

(7410, 21)
(7410, 13)
727
422
upper bound 0.5804676753782668


In [140]:
d_msnbc_ctx_coref = combine_features(d_msnbc_ctx, d_msnbc_coref)
d_aquaint_ctx_coref = combine_features(d_aquaint_ctx, d_aquaint_coref)

print(d_msnbc_ctx_coref.shape)
print(d_aquaint_ctx_coref.shape)

Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
(10316, 32)
(7410, 32)


In [18]:
process('ctx', d_msnbc_ctx, d_msnbc_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref', d_msnbc_ctx_coref, d_msnbc_total, n_estimators=[4900], max_depths=[6])

2019-08-19 14:49:08.349804 Loading model: 4900_6_ctx
2019-08-19 14:49:08.370359 Start evaluation
n_estimators: 4900, max_depth: 6, acc_validation: 0.6278755074424899, corr_num: 464
2019-08-19 14:49:08.662315 Evaluation finished
2019-08-19 14:49:08.667954 Loading model: 4900_6_ctx_coref
2019-08-19 14:49:08.693380 Start evaluation
n_estimators: 4900, max_depth: 6, acc_validation: 0.7117726657645467, corr_num: 526
2019-08-19 14:49:09.003979 Evaluation finished


In [19]:
process('ctx', d_aquaint_ctx, d_aquaint_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref', d_aquaint_ctx_coref, d_aquaint_total, n_estimators=[4900], max_depths=[6])

2019-08-19 14:49:24.595869 Loading model: 4900_6_ctx
2019-08-19 14:49:24.616475 Start evaluation
n_estimators: 4900, max_depth: 6, acc_validation: 0.53232462173315, corr_num: 387
2019-08-19 14:49:24.859625 Evaluation finished
2019-08-19 14:49:24.865911 Loading model: 4900_6_ctx_coref
2019-08-19 14:49:24.884611 Start evaluation
n_estimators: 4900, max_depth: 6, acc_validation: 0.5281980742778541, corr_num: 384
2019-08-19 14:49:25.100430 Evaluation finished


In [37]:
model = load_model('4900_6_ctx_coref')
save_local_model_predictions(model, d_msnbc_ctx_coref, d_msnbc_ctx_raw, docs_dict_msnbc, top_k=None)

Number of groups: 543


In [141]:
d_msnbc_coh = fetch_all_features('basic_fea_coh', 'test', 'msnbc_new')
print(d_msnbc_coh.shape)
d_msnbc_ctx_coref_coh = combine_features(d_msnbc_ctx_coref, d_msnbc_coh)
print(d_msnbc_ctx_coref_coh.shape)

(10316, 24)
Building idx for new features...
Combine original and new features...
(10316, 54)


In [56]:
process('ctx_coref_coh', d_msnbc_ctx_coref_coh, d_msnbc_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_global', d_msnbc_ctx_coref_coh, d_msnbc_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_global2', d_msnbc_ctx_coref_coh, d_msnbc_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_global3', d_msnbc_ctx_coref_coh, d_msnbc_total, n_estimators=[4900], max_depths=[6])

2019-08-19 20:05:38.915382 Loading model: 4900_6_ctx_coref_coh
2019-08-19 20:05:38.969239 Start evaluation
n_estimators: 4900, max_depth: 6, acc_validation: 0.7090663058186739, corr_num: 524
2019-08-19 20:05:39.317069 Evaluation finished
2019-08-19 20:05:39.322345 Loading model: 4900_6_ctx_coref_coh_global
2019-08-19 20:05:39.357196 Start evaluation
n_estimators: 4900, max_depth: 6, acc_validation: 0.7009472259810555, corr_num: 518
2019-08-19 20:05:39.685902 Evaluation finished
2019-08-19 20:05:39.698341 Loading model: 4900_6_ctx_coref_coh_global2
2019-08-19 20:05:39.729205 Start evaluation
n_estimators: 4900, max_depth: 6, acc_validation: 0.7036535859269283, corr_num: 520
2019-08-19 20:05:40.099459 Evaluation finished
2019-08-19 20:05:40.106258 Loading model: 4900_6_ctx_coref_coh_global3
2019-08-19 20:05:40.139155 Start evaluation
n_estimators: 4900, max_depth: 6, acc_validation: 0.7036535859269283, corr_num: 520
2019-08-19 20:05:40.460328 Evaluation finished


In [43]:
model = load_model('4900_6_ctx_coref')
save_local_model_predictions(model, d_aquaint_ctx_coref, d_aquaint_ctx_raw, docs_dict_aquaint, top_k=None)

Number of groups: 422


In [142]:
d_aquaint_coh = fetch_all_features('basic_fea_coh', 'test', 'aquaint_new')
print(d_aquaint_coh.shape)
d_aquaint_ctx_coref_coh = combine_features(d_aquaint_ctx_coref, d_aquaint_coh)
print(d_aquaint_ctx_coref_coh.shape)

(7410, 24)
Building idx for new features...
Combine original and new features...
(7410, 54)


In [57]:
process('ctx_coref_coh', d_aquaint_ctx_coref_coh, d_aquaint_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_global', d_aquaint_ctx_coref_coh, d_aquaint_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_global2', d_aquaint_ctx_coref_coh, d_aquaint_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_global3', d_aquaint_ctx_coref_coh, d_aquaint_total, n_estimators=[4900], max_depths=[6])

2019-08-19 20:06:04.207108 Loading model: 4900_6_ctx_coref_coh
2019-08-19 20:06:04.229556 Start evaluation
n_estimators: 4900, max_depth: 6, acc_validation: 0.546079779917469, corr_num: 397
2019-08-19 20:06:04.496667 Evaluation finished
2019-08-19 20:06:04.504233 Loading model: 4900_6_ctx_coref_coh_global
2019-08-19 20:06:04.525355 Start evaluation
n_estimators: 4900, max_depth: 6, acc_validation: 0.5433287482806052, corr_num: 395
2019-08-19 20:06:04.761073 Evaluation finished
2019-08-19 20:06:04.766377 Loading model: 4900_6_ctx_coref_coh_global2
2019-08-19 20:06:04.797776 Start evaluation
n_estimators: 4900, max_depth: 6, acc_validation: 0.5433287482806052, corr_num: 395
2019-08-19 20:06:05.042151 Evaluation finished
2019-08-19 20:06:05.048701 Loading model: 4900_6_ctx_coref_coh_global3
2019-08-19 20:06:05.071005 Start evaluation
n_estimators: 4900, max_depth: 6, acc_validation: 0.5433287482806052, corr_num: 395
2019-08-19 20:06:05.322700 Evaluation finished


In [58]:
model = load_model('4900_6_ctx_coref')
save_local_model_predictions(model, d_msnbc_ctx_coref, d_msnbc_ctx_raw, docs_dict_msnbc, top_k=10)

test_filter=json.loads(conn.hget('doc-predicted-ents-top-k', 'test'))
print('test_filter', len(test_filter))

process('ctx_coref_coh', d_msnbc_ctx_coref_coh, d_msnbc_total, n_estimators=[4900], max_depths=[6],
       test_filter=test_filter)
process('ctx_coref_coh_global', d_msnbc_ctx_coref_coh, d_msnbc_total, n_estimators=[4900], max_depths=[6],
       test_filter=test_filter)
process('ctx_coref_coh_global2', d_msnbc_ctx_coref_coh, d_msnbc_total, n_estimators=[4900], max_depths=[6],
       test_filter=test_filter)
process('ctx_coref_coh_global3', d_msnbc_ctx_coref_coh, d_msnbc_total, n_estimators=[4900], max_depths=[6],
       test_filter=test_filter)

Number of groups: 543
test_filter 3154
2019-08-19 20:06:41.339525 Loading model: 4900_6_ctx_coref_coh
2019-08-19 20:06:41.361468 Start evaluation
n_estimators: 4900, max_depth: 6, acc_validation: 0.7090663058186739, corr_num: 524
2019-08-19 20:06:41.509527 Evaluation finished
2019-08-19 20:06:41.514514 Loading model: 4900_6_ctx_coref_coh_global
2019-08-19 20:06:41.538876 Start evaluation
n_estimators: 4900, max_depth: 6, acc_validation: 0.7023004059539919, corr_num: 519
2019-08-19 20:06:41.673800 Evaluation finished
2019-08-19 20:06:41.678678 Loading model: 4900_6_ctx_coref_coh_global2
2019-08-19 20:06:41.715452 Start evaluation
n_estimators: 4900, max_depth: 6, acc_validation: 0.7050067658998647, corr_num: 521
2019-08-19 20:06:41.863569 Evaluation finished
2019-08-19 20:06:41.868628 Loading model: 4900_6_ctx_coref_coh_global3
2019-08-19 20:06:41.889333 Start evaluation
n_estimators: 4900, max_depth: 6, acc_validation: 0.7036535859269283, corr_num: 520
2019-08-19 20:06:42.036787 Evalua

In [21]:
model = load_model('4900_6_ctx_coref')
save_local_model_predictions(model, d_msnbc_ctx_coref, d_msnbc_ctx_raw, docs_dict_msnbc, top_k=5)

test_filter=json.loads(conn.hget('doc-predicted-ents-top-k', 'test'))
print('test_filter', len(test_filter))

process('ctx_coref_coh', d_msnbc_ctx_coref_coh, d_msnbc_total, n_estimators=[4900], max_depths=[6],
       test_filter=test_filter)
process('ctx_coref_coh_global', d_msnbc_ctx_coref_coh, d_msnbc_total, n_estimators=[4900], max_depths=[6],
       test_filter=test_filter)
process('ctx_coref_coh_global2', d_msnbc_ctx_coref_coh, d_msnbc_total, n_estimators=[4900], max_depths=[6],
       test_filter=test_filter)
process('ctx_coref_coh_global3', d_msnbc_ctx_coref_coh, d_msnbc_total, n_estimators=[4900], max_depths=[6],
       test_filter=test_filter)

  "because it will generate extra copies and increase memory consumption")


Number of groups: 543
test_filter 1888
2019-08-20 15:55:45.590694 Loading model: 4900_6_ctx_coref_coh
2019-08-20 15:55:45.621928 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9704251386321626, recall: 0.7104194857916103, f1: 0.8203125, corr_num: 525
2019-08-20 15:55:45.721348 Evaluation finished
2019-08-20 15:55:45.725157 Loading model: 4900_6_ctx_coref_coh_global
2019-08-20 15:55:45.746560 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9611829944547134, recall: 0.7036535859269283, f1: 0.8125, corr_num: 520
2019-08-20 15:55:45.826134 Evaluation finished
2019-08-20 15:55:45.829885 Loading model: 4900_6_ctx_coref_coh_global2
2019-08-20 15:55:45.859472 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9648798521256932, recall: 0.706359945872801, f1: 0.8156249999999999, corr_num: 522
2019-08-20 15:55:45.928826 Evaluation finished
2019-08-20 15:55:45.932619 Loading model: 4900_6_ctx_coref_coh_global3
2019-08-20 15:55:45.950334 Start evaluatio

In [23]:
process('ctx_coref_coh', d_msnbc_ctx_coref_coh, d_msnbc_total, n_estimators=[4900], max_depths=[6],
       eval_func=evalerror_detail_log_xl)

  "because it will generate extra copies and increase memory consumption")


2019-08-20 16:06:55.150812 Loading model: 4900_6_ctx_coref_coh
2019-08-20 16:06:55.181316 Start evaluation
536 539 0.7253044654939107 0.7293640054127198
6.3292312818621825 2.353678960549204
n_estimators: 4900, max_depth: 6, precision: 0.9650092081031307, recall: 0.7090663058186739, f1: 0.8174726989079564, corr_num: 524
2019-08-20 16:06:55.488000 Evaluation finished


In [60]:
model = load_model('4900_6_ctx_coref')
save_local_model_predictions(model, d_aquaint_ctx_coref, d_aquaint_ctx_raw, docs_dict_aquaint, top_k=10)

test_filter=json.loads(conn.hget('doc-predicted-ents-top-k', 'test'))
print('test_filter', len(test_filter))

process('ctx_coref_coh', d_aquaint_ctx_coref_coh, d_aquaint_total, n_estimators=[4900], max_depths=[6],
       test_filter=test_filter)
process('ctx_coref_coh_global', d_aquaint_ctx_coref_coh, d_aquaint_total, n_estimators=[4900], max_depths=[6],
       test_filter=test_filter)
process('ctx_coref_coh_global2', d_aquaint_ctx_coref_coh, d_aquaint_total, n_estimators=[4900], max_depths=[6],
       test_filter=test_filter)
process('ctx_coref_coh_global3', d_aquaint_ctx_coref_coh, d_aquaint_total, n_estimators=[4900], max_depths=[6],
       test_filter=test_filter)

Number of groups: 422
test_filter 2449
2019-08-19 20:07:13.622427 Loading model: 4900_6_ctx_coref_coh
2019-08-19 20:07:13.644330 Start evaluation
n_estimators: 4900, max_depth: 6, acc_validation: 0.5433287482806052, corr_num: 395
2019-08-19 20:07:13.747977 Evaluation finished
2019-08-19 20:07:13.751916 Loading model: 4900_6_ctx_coref_coh_global
2019-08-19 20:07:13.772372 Start evaluation
n_estimators: 4900, max_depth: 6, acc_validation: 0.5488308115543329, corr_num: 399
2019-08-19 20:07:13.875349 Evaluation finished
2019-08-19 20:07:13.879932 Loading model: 4900_6_ctx_coref_coh_global2
2019-08-19 20:07:13.910235 Start evaluation
n_estimators: 4900, max_depth: 6, acc_validation: 0.5433287482806052, corr_num: 395
2019-08-19 20:07:14.021755 Evaluation finished
2019-08-19 20:07:14.027908 Loading model: 4900_6_ctx_coref_coh_global3
2019-08-19 20:07:14.047841 Start evaluation
n_estimators: 4900, max_depth: 6, acc_validation: 0.547455295735901, corr_num: 398
2019-08-19 20:07:14.158286 Evaluat

In [61]:
model = load_model('4900_6_ctx_coref')
save_local_model_predictions(model, d_aquaint_ctx_coref, d_aquaint_ctx_raw, docs_dict_aquaint, top_k=5)

test_filter=json.loads(conn.hget('doc-predicted-ents-top-k', 'test'))
print('test_filter', len(test_filter))

process('ctx_coref_coh', d_aquaint_ctx_coref_coh, d_aquaint_total, n_estimators=[4900], max_depths=[6],
       test_filter=test_filter)
process('ctx_coref_coh_global', d_aquaint_ctx_coref_coh, d_aquaint_total, n_estimators=[4900], max_depths=[6],
       test_filter=test_filter)
process('ctx_coref_coh_global2', d_aquaint_ctx_coref_coh, d_aquaint_total, n_estimators=[4900], max_depths=[6],
       test_filter=test_filter)
process('ctx_coref_coh_global3', d_aquaint_ctx_coref_coh, d_aquaint_total, n_estimators=[4900], max_depths=[6],
       test_filter=test_filter)

Number of groups: 422
test_filter 1459
2019-08-19 20:07:26.549779 Loading model: 4900_6_ctx_coref_coh
2019-08-19 20:07:26.572371 Start evaluation
n_estimators: 4900, max_depth: 6, acc_validation: 0.5447042640990372, corr_num: 396
2019-08-19 20:07:26.644762 Evaluation finished
2019-08-19 20:07:26.648254 Loading model: 4900_6_ctx_coref_coh_global
2019-08-19 20:07:26.668812 Start evaluation
n_estimators: 4900, max_depth: 6, acc_validation: 0.547455295735901, corr_num: 398
2019-08-19 20:07:26.733905 Evaluation finished
2019-08-19 20:07:26.737629 Loading model: 4900_6_ctx_coref_coh_global2
2019-08-19 20:07:26.768811 Start evaluation
n_estimators: 4900, max_depth: 6, acc_validation: 0.5433287482806052, corr_num: 395
2019-08-19 20:07:26.837858 Evaluation finished
2019-08-19 20:07:26.841833 Loading model: 4900_6_ctx_coref_coh_global3
2019-08-19 20:07:26.860268 Start evaluation
n_estimators: 4900, max_depth: 6, acc_validation: 0.546079779917469, corr_num: 397
2019-08-19 20:07:26.930322 Evaluati

In [12]:
model = load_model('4900_6_ctx_coref')
save_local_model_predictions(model, d_aquaint_ctx_coref, d_aquaint_ctx_raw, docs_dict_aquaint, top_k=1)

test_filter=json.loads(conn.hget('doc-predicted-ents-top-k', 'test'))
print('test_filter', len(test_filter))

process('ctx_coref_coh', d_aquaint_ctx_coref_coh, d_aquaint_total, n_estimators=[4900], max_depths=[6],
       test_filter=test_filter)
process('ctx_coref_coh_global', d_aquaint_ctx_coref_coh, d_aquaint_total, n_estimators=[4900], max_depths=[6],
       test_filter=test_filter)
process('ctx_coref_coh_global2', d_aquaint_ctx_coref_coh, d_aquaint_total, n_estimators=[4900], max_depths=[6],
       test_filter=test_filter)
process('ctx_coref_coh_global3', d_aquaint_ctx_coref_coh, d_aquaint_total, n_estimators=[4900], max_depths=[6],
       test_filter=test_filter)

Number of groups: 422
test_filter 422
2019-08-20 15:30:42.316719 Loading model: 4900_6_ctx_coref_coh
2019-08-20 15:30:42.342411 Start evaluation
n_estimators: 4900, max_depth: 6, acc_validation: 0.5144429160935351, corr_num: 374
2019-08-20 15:30:42.371565 Evaluation finished
2019-08-20 15:30:42.374310 Loading model: 4900_6_ctx_coref_coh_global
2019-08-20 15:30:42.394737 Start evaluation
n_estimators: 4900, max_depth: 6, acc_validation: 0.5185694635488308, corr_num: 377
2019-08-20 15:30:42.423970 Evaluation finished
2019-08-20 15:30:42.426259 Loading model: 4900_6_ctx_coref_coh_global2
2019-08-20 15:30:42.456364 Start evaluation
n_estimators: 4900, max_depth: 6, acc_validation: 0.515818431911967, corr_num: 375
2019-08-20 15:30:42.481416 Evaluation finished
2019-08-20 15:30:42.484440 Loading model: 4900_6_ctx_coref_coh_global3
2019-08-20 15:30:42.502135 Start evaluation
n_estimators: 4900, max_depth: 6, acc_validation: 0.5185694635488308, corr_num: 377
2019-08-20 15:30:42.531739 Evaluati

In [31]:
d_msnbc_ctx_all_can = fetch_all_features('basic_fea_ctx_all_can', 'test', 'msnbc_new')
d_msnbc_ctx_all_can_raw = get_feature_results('basic_fea_ctx_all_can', 'test', 'msnbc_new')
d_msnbc_all_can_true_labels = get_true_labels(d_msnbc_ctx_all_can)

d_aquaint_ctx_all_can = fetch_all_features('basic_fea_ctx_all_can', 'test', 'aquaint_new')
d_aquaint_ctx_all_can_raw = get_feature_results('basic_fea_ctx_all_can', 'test', 'aquaint_new')
d_aquaint_all_can_true_labels = get_true_labels(d_aquaint_ctx_all_can)

print(d_msnbc_ctx_all_can.shape)
print(d_msnbc_all_can_true_labels)
print('upper bound', d_msnbc_all_can_true_labels / d_msnbc_total)
print()
print(d_aquaint_ctx_all_can.shape)
print(d_aquaint_all_can_true_labels)
print('upper bound', d_aquaint_all_can_true_labels / d_aquaint_total)

(44901, 21)
573
upper bound 0.7753721244925575

(14417, 21)
422
upper bound 0.5804676753782668


In [30]:
process('ctx', d_msnbc_ctx_all_can, d_msnbc_total, n_estimators=[4900], max_depths=[6])

2019-08-20 20:57:46.296654 Loading model: 4900_6_ctx
2019-08-20 20:57:46.326381 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.806282722513089, recall: 0.625169147496617, f1: 0.7042682926829268, corr_num: 462
2019-08-20 20:57:47.476605 Evaluation finished


In [10]:
d_msnbc_ctx_emnlp17 = fetch_all_features('basic_fea_ctx_emnlp17', 'test', 'msnbc_new')
d_msnbc_ctx_emnlp17_raw = get_feature_results('basic_fea_ctx_emnlp17', 'test', 'msnbc_new')
d_msnbc_ctx_emnlp17_true_labels = get_true_labels(d_msnbc_ctx_emnlp17)
d_msnbc_total = get_total_mentions('msnbc_new', 'test')

d_aquaint_ctx_emnlp17 = fetch_all_features('basic_fea_ctx_emnlp17', 'test', 'aquaint_new')
d_aquaint_ctx_emnlp17_raw = get_feature_results('basic_fea_ctx_emnlp17', 'test', 'aquaint_new')
d_aquaint_ctx_emnlp17_true_labels = get_true_labels(d_aquaint_ctx_emnlp17)
d_aquaint_total = get_total_mentions('aquaint_new', 'test')

print(d_msnbc_ctx_emnlp17.shape)
print(d_msnbc_ctx_emnlp17_true_labels)
print('upper bound', d_msnbc_ctx_emnlp17_true_labels / d_msnbc_total)
print()
print(d_aquaint_ctx_emnlp17.shape)
print(d_aquaint_ctx_emnlp17_true_labels)
print('upper bound', d_aquaint_ctx_emnlp17_true_labels / d_aquaint_total)

(14287, 21)
625
upper bound 0.9527439024390244

(15255, 21)
674
upper bound 0.9270976616231087


In [11]:
d_msnbc_coref_emnlp17 = fetch_all_features('basic_fea_coref_emnlp17', 'test', 'msnbc_new')
d_aquaint_coref_emnlp17 = fetch_all_features('basic_fea_coref_emnlp17', 'test', 'aquaint_new')

print(d_msnbc_coref_emnlp17.shape)
print(d_aquaint_coref_emnlp17.shape)

(14287, 13)
(15255, 13)


In [12]:
d_msnbc_ctx_coref_emnlp17 = combine_features(d_msnbc_ctx_emnlp17, d_msnbc_coref_emnlp17)
d_aquaint_ctx_coref_emnlp17 = combine_features(d_aquaint_ctx_emnlp17, d_aquaint_coref_emnlp17)

print(d_msnbc_ctx_coref_emnlp17.shape)
print(d_aquaint_ctx_coref_emnlp17.shape)

Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
(14287, 32)
(15255, 32)


In [41]:
process('ctx', d_msnbc_ctx_emnlp17, d_msnbc_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref', d_msnbc_ctx_coref_emnlp17, d_msnbc_total, n_estimators=[4900], max_depths=[6])

  "because it will generate extra copies and increase memory consumption")


2019-08-22 02:28:00.157089 Loading model: 4900_6_ctx
2019-08-22 02:28:00.198489 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.848, recall: 0.8079268292682927, f1: 0.8274785323965652, corr_num: 530
2019-08-22 02:28:00.588304 Evaluation finished
2019-08-22 02:28:00.596332 Loading model: 4900_6_ctx_coref
2019-08-22 02:28:00.636391 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9424, recall: 0.8978658536585366, f1: 0.9195940671350508, corr_num: 589
2019-08-22 02:28:01.028469 Evaluation finished


In [55]:
process('ctx', d_aquaint_ctx_emnlp17, d_aquaint_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref', d_aquaint_ctx_coref_emnlp17, d_aquaint_total, n_estimators=[4900], max_depths=[6])

2019-08-22 05:18:54.067484 Loading model: 4900_6_ctx
2019-08-22 05:18:54.103691 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.8768545994065282, recall: 0.81292984869326, f1: 0.8436830835117772, corr_num: 591
2019-08-22 05:18:54.507712 Evaluation finished
2019-08-22 05:18:54.512944 Loading model: 4900_6_ctx_coref
2019-08-22 05:18:54.529308 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.8961424332344213, recall: 0.8308115543328748, f1: 0.8622412562455389, corr_num: 604
2019-08-22 05:18:54.913751 Evaluation finished


In [45]:
model = load_model('4900_6_ctx_coref')
save_local_model_predictions(model, d_msnbc_ctx_coref_emnlp17, d_msnbc_ctx_emnlp17_raw, docs_dict_msnbc)

  "because it will generate extra copies and increase memory consumption")


Number of groups: 625


In [15]:
d_msnbc_coh_emnlp17 = fetch_all_features('basic_fea_coh_emnlp17', 'test', 'msnbc_new')
print(d_msnbc_coh_emnlp17.shape)
d_msnbc_ctx_coref_coh_emnlp17 = combine_features(d_msnbc_ctx_coref_emnlp17, d_msnbc_coh_emnlp17)
print(d_msnbc_ctx_coref_coh_emnlp17.shape)

(14287, 24)
Building idx for new features...
Combine original and new features...
(14287, 54)


In [13]:
d_msnbc_coh_emnlp17_global = fetch_all_features('basic_fea_coh_emnlp17_global', 'test', 'msnbc_new')
d_msnbc_coh_emnlp17_global2 = fetch_all_features('basic_fea_coh_emnlp17_global2', 'test', 'msnbc_new')
d_msnbc_coh_emnlp17_global3 = fetch_all_features('basic_fea_coh_emnlp17_global3', 'test', 'msnbc_new')
d_msnbc_coh_emnlp17_global4 = fetch_all_features('basic_fea_coh_emnlp17_global4', 'test', 'msnbc_new')

d_msnbc_ctx_coref_coh_emnlp17_global = combine_features(d_msnbc_ctx_coref_emnlp17, d_msnbc_coh_emnlp17_global)
d_msnbc_ctx_coref_coh_emnlp17_global2 = combine_features(d_msnbc_ctx_coref_emnlp17, d_msnbc_coh_emnlp17_global2)
d_msnbc_ctx_coref_coh_emnlp17_global3 = combine_features(d_msnbc_ctx_coref_emnlp17, d_msnbc_coh_emnlp17_global3)
d_msnbc_ctx_coref_coh_emnlp17_global4 = combine_features(d_msnbc_ctx_coref_emnlp17, d_msnbc_coh_emnlp17_global4)

Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...


In [16]:
process('ctx_coref_coh', d_msnbc_ctx_coref_coh_emnlp17, d_msnbc_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_global', d_msnbc_ctx_coref_coh_emnlp17_global, d_msnbc_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_global2', d_msnbc_ctx_coref_coh_emnlp17_global2, d_msnbc_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_global3', d_msnbc_ctx_coref_coh_emnlp17_global3, d_msnbc_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_global4', d_msnbc_ctx_coref_coh_emnlp17_global4, d_msnbc_total, n_estimators=[4900], max_depths=[6])

  "because it will generate extra copies and increase memory consumption")


2019-08-26 06:56:54.121861 Loading model: 4900_6_ctx_coref_coh
2019-08-26 06:56:54.169507 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9632, recall: 0.9176829268292683, f1: 0.9398907103825137, corr_num: 602
2019-08-26 06:56:54.722508 Evaluation finished
2019-08-26 06:56:54.729642 Loading model: 4900_6_ctx_coref_coh_global
2019-08-26 06:56:54.753766 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9488, recall: 0.9039634146341463, f1: 0.9258391881342701, corr_num: 593
2019-08-26 06:56:55.227299 Evaluation finished
2019-08-26 06:56:55.234578 Loading model: 4900_6_ctx_coref_coh_global2
2019-08-26 06:56:55.260403 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9552, recall: 0.9100609756097561, f1: 0.9320843091334894, corr_num: 597
2019-08-26 06:56:55.729951 Evaluation finished
2019-08-26 06:56:55.737569 Loading model: 4900_6_ctx_coref_coh_global3
2019-08-26 06:56:55.756187 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9568

In [37]:
process_iterative_filtering('msnbc')

=== Top K=3 ===
2019-08-26 14:46:09.167239 Processing model: 4900_6_ctx_coref_coh
2019-08-26 14:46:09.551352 test_filter 1599
2019-08-26 14:46:09.552966 Start evaluation
precision: 0.9824, recall: 0.9359756097560976, f1: 0.9586260733801717, corr_num: 614
2019-08-26 14:46:09.614585 Evaluation finished
2019-08-26 14:46:09.615152 Processing model: 4900_6_ctx_coref_coh_global
2019-08-26 14:46:09.983221 test_filter 1599
2019-08-26 14:46:09.984651 Start evaluation
precision: 0.9759615384615384, recall: 0.9283536585365854, f1: 0.9515625, corr_num: 609
2019-08-26 14:46:10.048960 Evaluation finished
2019-08-26 14:46:10.049404 Processing model: 4900_6_ctx_coref_coh_global2
2019-08-26 14:46:10.413397 test_filter 1599
2019-08-26 14:46:10.415058 Start evaluation
precision: 0.9871794871794872, recall: 0.9390243902439024, f1: 0.9625, corr_num: 616
2019-08-26 14:46:10.478534 Evaluation finished
2019-08-26 14:46:10.479373 Processing model: 4900_6_ctx_coref_coh_global3
2019-08-26 14:46:10.863881 test_fi

In [50]:
model = load_model('4900_6_ctx_coref')
save_local_model_predictions(model, d_aquaint_ctx_coref_emnlp17, d_aquaint_ctx_emnlp17_raw, docs_dict_aquaint)

Number of groups: 674


In [42]:
d_aquaint_coh_emnlp17 = fetch_all_features('basic_fea_coh_emnlp17', 'test', 'aquaint_new')
print(d_aquaint_coh_emnlp17.shape)
d_aquaint_ctx_coref_coh_emnlp17 = combine_features(d_aquaint_ctx_coref_emnlp17, d_aquaint_coh_emnlp17)
print(d_aquaint_ctx_coref_coh_emnlp17.shape)

(15255, 24)
Building idx for new features...
Combine original and new features...
(15255, 54)


In [43]:
d_aquaint_coh_emnlp17_global = fetch_all_features('basic_fea_coh_emnlp17_global', 'test', 'aquaint_new')
d_aquaint_coh_emnlp17_global2 = fetch_all_features('basic_fea_coh_emnlp17_global2', 'test', 'aquaint_new')
d_aquaint_coh_emnlp17_global3 = fetch_all_features('basic_fea_coh_emnlp17_global3', 'test', 'aquaint_new')
d_aquaint_coh_emnlp17_global4 = fetch_all_features('basic_fea_coh_emnlp17_global4', 'test', 'aquaint_new')

d_aquaint_ctx_coref_coh_emnlp17_global = combine_features(d_aquaint_ctx_coref_emnlp17, d_aquaint_coh_emnlp17_global)
d_aquaint_ctx_coref_coh_emnlp17_global2 = combine_features(d_aquaint_ctx_coref_emnlp17, d_aquaint_coh_emnlp17_global2)
d_aquaint_ctx_coref_coh_emnlp17_global3 = combine_features(d_aquaint_ctx_coref_emnlp17, d_aquaint_coh_emnlp17_global3)
d_aquaint_ctx_coref_coh_emnlp17_global4 = combine_features(d_aquaint_ctx_coref_emnlp17, d_aquaint_coh_emnlp17_global4)

Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...


In [44]:
process('ctx_coref_coh', d_aquaint_ctx_coref_coh_emnlp17, d_aquaint_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_global', d_aquaint_ctx_coref_coh_emnlp17_global, d_aquaint_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_global2', d_aquaint_ctx_coref_coh_emnlp17_global2, d_aquaint_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_global3', d_aquaint_ctx_coref_coh_emnlp17_global3, d_aquaint_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_global4', d_aquaint_ctx_coref_coh_emnlp17_global4, d_aquaint_total, n_estimators=[4900], max_depths=[6])

2019-08-26 14:57:56.690012 Loading model: 4900_6_ctx_coref_coh
2019-08-26 14:57:56.716544 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9109792284866469, recall: 0.844566712517194, f1: 0.8765167737330479, corr_num: 614
2019-08-26 14:57:57.145629 Evaluation finished
2019-08-26 14:57:57.152432 Loading model: 4900_6_ctx_coref_coh_global
2019-08-26 14:57:57.168327 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9169139465875371, recall: 0.8500687757909215, f1: 0.8822269807280514, corr_num: 618
2019-08-26 14:57:57.573218 Evaluation finished
2019-08-26 14:57:57.580772 Loading model: 4900_6_ctx_coref_coh_global2
2019-08-26 14:57:57.597046 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9183976261127597, recall: 0.8514442916093535, f1: 0.8836545324768023, corr_num: 619
2019-08-26 14:57:57.997194 Evaluation finished
2019-08-26 14:57:58.004542 Loading model: 4900_6_ctx_coref_coh_global3
2019-08-26 14:57:58.020674 Start evaluation
n_estimators: 49

In [45]:
process_iterative_filtering('aquaint')

=== Top K=3 ===
2019-08-26 14:59:11.641162 Processing model: 4900_6_ctx_coref_coh


  "because it will generate extra copies and increase memory consumption")


2019-08-26 14:59:12.029981 test_filter 1732
2019-08-26 14:59:12.031547 Start evaluation
precision: 0.9484066767830045, recall: 0.859697386519945, f1: 0.9018759018759018, corr_num: 625
2019-08-26 14:59:12.107318 Evaluation finished
2019-08-26 14:59:12.107791 Processing model: 4900_6_ctx_coref_coh_global
2019-08-26 14:59:12.483615 test_filter 1732
2019-08-26 14:59:12.485193 Start evaluation
precision: 0.9636363636363636, recall: 0.874828060522696, f1: 0.9170872386445567, corr_num: 636
2019-08-26 14:59:12.550547 Evaluation finished
2019-08-26 14:59:12.551320 Processing model: 4900_6_ctx_coref_coh_global2
2019-08-26 14:59:12.947782 test_filter 1732
2019-08-26 14:59:12.949313 Start evaluation
precision: 0.9653092006033183, recall: 0.8803301237964236, f1: 0.9208633093525179, corr_num: 640
2019-08-26 14:59:13.016273 Evaluation finished
2019-08-26 14:59:13.016662 Processing model: 4900_6_ctx_coref_coh_global3
2019-08-26 14:59:13.404597 test_filter 1732
2019-08-26 14:59:13.406234 Start evaluati

In [143]:
d_ace_ctx_emnlp17 = fetch_all_features('basic_fea_ctx_emnlp17', 'test', 'ace2014_uiuc')
d_ace_ctx_emnlp17_raw = get_feature_results('basic_fea_ctx_emnlp17', 'test', 'ace2014_uiuc')
d_ace_coref_emnlp17 = fetch_all_features('basic_fea_coref_emnlp17', 'test', 'ace2014_uiuc')
d_ace_ctx_emnlp17_true_labels = get_true_labels(d_ace_ctx_emnlp17)
d_ace_total = get_total_mentions('ace2014_uiuc', 'test')
docs_dict_ace = fetch_q_ids_docs('ace2014_uiuc')

print(d_ace_ctx_emnlp17.shape)
print(d_ace_coref_emnlp17.shape)
print(d_ace_total)
print(d_ace_ctx_emnlp17_true_labels)
print('upper bound', d_ace_ctx_emnlp17_true_labels / d_ace_total)

(6733, 21)
(6733, 13)
257
225
upper bound 0.8754863813229572


In [144]:
d_ace_ctx_coref_emnlp17 = combine_features(d_ace_ctx_emnlp17, d_ace_coref_emnlp17)
print(d_ace_ctx_coref_emnlp17.shape)

Building idx for new features...
Combine original and new features...
(6733, 32)


In [60]:
process('ctx', d_ace_ctx_emnlp17, d_ace_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref', d_ace_ctx_coref_emnlp17, d_ace_total, n_estimators=[4900], max_depths=[6])

  "because it will generate extra copies and increase memory consumption")


2019-08-22 13:02:05.423324 Loading model: 4900_6_ctx
2019-08-22 13:02:05.472597 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9377777777777778, recall: 0.8210116731517509, f1: 0.8755186721991702, corr_num: 211
2019-08-22 13:02:05.678417 Evaluation finished
2019-08-22 13:02:05.682331 Loading model: 4900_6_ctx_coref
2019-08-22 13:02:05.706784 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9377777777777778, recall: 0.8210116731517509, f1: 0.8755186721991702, corr_num: 211
2019-08-22 13:02:05.888451 Evaluation finished


In [62]:
model = load_model('4900_6_ctx_coref')
save_local_model_predictions(model, d_ace_ctx_coref_emnlp17, d_ace_ctx_emnlp17_raw, docs_dict_ace)

Number of groups: 225


In [145]:
d_ace_coh_emnlp17 = fetch_all_features('basic_fea_coh_emnlp17', 'test', 'ace2014_uiuc')
print(d_ace_coh_emnlp17.shape)

d_ace_ctx_coref_coh_emnlp17 = combine_features(d_ace_ctx_coref_emnlp17, d_ace_coh_emnlp17)
print(d_ace_ctx_coref_coh_emnlp17.shape)

(6733, 24)
Building idx for new features...
Combine original and new features...
(6733, 54)


In [49]:
d_ace_coh_emnlp17_global = fetch_all_features('basic_fea_coh_emnlp17_global', 'test', 'ace2014_uiuc')
d_ace_coh_emnlp17_global2 = fetch_all_features('basic_fea_coh_emnlp17_global2', 'test', 'ace2014_uiuc')
d_ace_coh_emnlp17_global3 = fetch_all_features('basic_fea_coh_emnlp17_global3', 'test', 'ace2014_uiuc')
d_ace_coh_emnlp17_global4 = fetch_all_features('basic_fea_coh_emnlp17_global4', 'test', 'ace2014_uiuc')

d_ace_ctx_coref_coh_emnlp17_global = combine_features(d_ace_ctx_coref_emnlp17, d_ace_coh_emnlp17_global)
d_ace_ctx_coref_coh_emnlp17_global2 = combine_features(d_ace_ctx_coref_emnlp17, d_ace_coh_emnlp17_global2)
d_ace_ctx_coref_coh_emnlp17_global3 = combine_features(d_ace_ctx_coref_emnlp17, d_ace_coh_emnlp17_global3)
d_ace_ctx_coref_coh_emnlp17_global4 = combine_features(d_ace_ctx_coref_emnlp17, d_ace_coh_emnlp17_global4)

Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...


In [50]:
process('ctx_coref_coh', d_ace_ctx_coref_coh_emnlp17, d_ace_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_global', d_ace_ctx_coref_coh_emnlp17_global, d_ace_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_global2', d_ace_ctx_coref_coh_emnlp17_global2, d_ace_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_global3', d_ace_ctx_coref_coh_emnlp17_global3, d_ace_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_global4', d_ace_ctx_coref_coh_emnlp17_global4, d_ace_total, n_estimators=[4900], max_depths=[6])

2019-08-26 15:04:26.663873 Loading model: 4900_6_ctx_coref_coh
2019-08-26 15:04:26.692278 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9333333333333333, recall: 0.8171206225680934, f1: 0.871369294605809, corr_num: 210
2019-08-26 15:04:26.914122 Evaluation finished
2019-08-26 15:04:26.918788 Loading model: 4900_6_ctx_coref_coh_global
2019-08-26 15:04:26.936052 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9377777777777778, recall: 0.8210116731517509, f1: 0.8755186721991702, corr_num: 211
2019-08-26 15:04:27.120944 Evaluation finished
2019-08-26 15:04:27.125794 Loading model: 4900_6_ctx_coref_coh_global2
2019-08-26 15:04:27.142407 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9377777777777778, recall: 0.8210116731517509, f1: 0.8755186721991702, corr_num: 211
2019-08-26 15:04:27.325318 Evaluation finished
2019-08-26 15:04:27.331678 Loading model: 4900_6_ctx_coref_coh_global3
2019-08-26 15:04:27.348558 Start evaluation
n_estimators: 49

In [51]:
process_iterative_filtering('ace')

=== Top K=3 ===
2019-08-26 15:05:14.770370 Processing model: 4900_6_ctx_coref_coh
2019-08-26 15:05:14.966795 test_filter 607
2019-08-26 15:05:14.968378 Start evaluation


  "because it will generate extra copies and increase memory consumption")


precision: 0.9596412556053812, recall: 0.8326848249027238, f1: 0.8916666666666667, corr_num: 214
2019-08-26 15:05:14.998707 Evaluation finished
2019-08-26 15:05:14.999358 Processing model: 4900_6_ctx_coref_coh_global
2019-08-26 15:05:15.188781 test_filter 607
2019-08-26 15:05:15.190097 Start evaluation
precision: 0.9681818181818181, recall: 0.8287937743190662, f1: 0.8930817610062893, corr_num: 213
2019-08-26 15:05:15.217604 Evaluation finished
2019-08-26 15:05:15.217894 Processing model: 4900_6_ctx_coref_coh_global2
2019-08-26 15:05:15.405608 test_filter 607
2019-08-26 15:05:15.406703 Start evaluation
precision: 0.9726027397260274, recall: 0.8287937743190662, f1: 0.8949579831932774, corr_num: 213
2019-08-26 15:05:15.433971 Evaluation finished
2019-08-26 15:05:15.434256 Processing model: 4900_6_ctx_coref_coh_global3
2019-08-26 15:05:15.625145 test_filter 607
2019-08-26 15:05:15.626339 Start evaluation
precision: 0.9772727272727273, recall: 0.8365758754863813, f1: 0.9014675052410902, cor

In [146]:
d_clueweb_ctx_emnlp17 = fetch_all_features('basic_fea_ctx_emnlp17', 'test', 'clueweb12')
d_clueweb_ctx_emnlp17_raw = get_feature_results('basic_fea_ctx_emnlp17', 'test', 'clueweb12')
d_clueweb_coref_emnlp17 = fetch_all_features('basic_fea_coref_emnlp17', 'test', 'clueweb12')
d_clueweb_ctx_emnlp17_true_labels = get_true_labels(d_clueweb_ctx_emnlp17)
d_clueweb_total = get_total_mentions('clueweb12', 'test')
docs_dict_clueweb = fetch_q_ids_docs('clueweb12')

print(d_clueweb_ctx_emnlp17.shape)
print(d_clueweb_coref_emnlp17.shape)
print(d_clueweb_total)
print(d_clueweb_ctx_emnlp17_true_labels)
print('upper bound', d_clueweb_ctx_emnlp17_true_labels / d_clueweb_total)

(277802, 21)
(277802, 13)
11154
10372
upper bound 0.9298906221983145


In [147]:
d_clueweb_ctx_coref_emnlp17 = combine_features(d_clueweb_ctx_emnlp17, d_clueweb_coref_emnlp17)
print(d_clueweb_ctx_coref_emnlp17.shape)

Building idx for new features...
Combine original and new features...
(277802, 32)


In [76]:
process('ctx', d_clueweb_ctx_emnlp17, d_clueweb_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref', d_clueweb_ctx_coref_emnlp17, d_clueweb_total, n_estimators=[4900], max_depths=[6])

  "because it will generate extra copies and increase memory consumption")


2019-08-23 16:43:34.544666 Loading model: 4900_6_ctx
2019-08-23 16:43:34.575550 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.7525067489394524, recall: 0.6997489689797383, f1: 0.7251695623896683, corr_num: 7805
2019-08-23 16:43:42.251803 Evaluation finished
2019-08-23 16:43:42.370171 Loading model: 4900_6_ctx_coref
2019-08-23 16:43:42.421077 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.7827805630543772, recall: 0.7279003048233817, f1: 0.7543435845024622, corr_num: 8119
2019-08-23 16:43:49.961039 Evaluation finished


In [77]:
model = load_model('4900_6_ctx_coref')
save_local_model_predictions(model, d_clueweb_ctx_coref_emnlp17, d_clueweb_ctx_emnlp17_raw, docs_dict_clueweb)

Number of groups: 10372


In [148]:
d_clueweb_coh_emnlp17 = fetch_all_features('basic_fea_coh_emnlp17', 'test', 'clueweb12')
print(d_clueweb_coh_emnlp17.shape)

d_clueweb_ctx_coref_coh_emnlp17 = combine_features(d_clueweb_ctx_coref_emnlp17, d_clueweb_coh_emnlp17)
print(d_clueweb_ctx_coref_coh_emnlp17.shape)

(277802, 24)
Building idx for new features...
Combine original and new features...
(277802, 54)


In [55]:
d_clueweb_coh_emnlp17_global = fetch_all_features('basic_fea_coh_emnlp17_global', 'test', 'clueweb12')
d_clueweb_coh_emnlp17_global2 = fetch_all_features('basic_fea_coh_emnlp17_global2', 'test', 'clueweb12')
d_clueweb_coh_emnlp17_global3 = fetch_all_features('basic_fea_coh_emnlp17_global3', 'test', 'clueweb12')
d_clueweb_coh_emnlp17_global4 = fetch_all_features('basic_fea_coh_emnlp17_global4', 'test', 'clueweb12')

d_clueweb_ctx_coref_coh_emnlp17_global = combine_features(d_clueweb_ctx_coref_emnlp17, d_clueweb_coh_emnlp17_global)
d_clueweb_ctx_coref_coh_emnlp17_global2 = combine_features(d_clueweb_ctx_coref_emnlp17, d_clueweb_coh_emnlp17_global2)
d_clueweb_ctx_coref_coh_emnlp17_global3 = combine_features(d_clueweb_ctx_coref_emnlp17, d_clueweb_coh_emnlp17_global3)
d_clueweb_ctx_coref_coh_emnlp17_global4 = combine_features(d_clueweb_ctx_coref_emnlp17, d_clueweb_coh_emnlp17_global4)

Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...


In [56]:
process('ctx_coref_coh', d_clueweb_ctx_coref_coh_emnlp17, d_clueweb_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_global', d_clueweb_ctx_coref_coh_emnlp17_global, d_clueweb_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_global2', d_clueweb_ctx_coref_coh_emnlp17_global2, d_clueweb_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_global3', d_clueweb_ctx_coref_coh_emnlp17_global3, d_clueweb_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_global4', d_clueweb_ctx_coref_coh_emnlp17_global4, d_clueweb_total, n_estimators=[4900], max_depths=[6])

2019-08-26 15:08:34.983044 Loading model: 4900_6_ctx_coref_coh
2019-08-26 15:08:34.999386 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.8230813729271115, recall: 0.7653756499910346, f1: 0.7931803400538883, corr_num: 8537
2019-08-26 15:08:42.549075 Evaluation finished
2019-08-26 15:08:42.695944 Loading model: 4900_6_ctx_coref_coh_global
2019-08-26 15:08:42.712014 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.8349402236791361, recall: 0.7764030840953918, f1: 0.8046083805630401, corr_num: 8660
2019-08-26 15:08:50.223081 Evaluation finished
2019-08-26 15:08:50.370131 Loading model: 4900_6_ctx_coref_coh_global2
2019-08-26 15:08:50.386258 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.8349402236791361, recall: 0.7764030840953918, f1: 0.8046083805630401, corr_num: 8660
2019-08-26 15:08:57.652245 Evaluation finished
2019-08-26 15:08:57.799302 Loading model: 4900_6_ctx_coref_coh_global3
2019-08-26 15:08:57.816136 Start evaluation
n_estimators

In [57]:
process_iterative_filtering('clueweb')

=== Top K=3 ===
2019-08-26 15:10:13.261103 Processing model: 4900_6_ctx_coref_coh


  "because it will generate extra copies and increase memory consumption")


2019-08-26 15:10:21.012128 test_filter 27342
2019-08-26 15:10:21.030459 Start evaluation
precision: 0.9040250914793518, recall: 0.7752375829298906, f1: 0.8346927940537672, corr_num: 8647
2019-08-26 15:10:21.967892 Evaluation finished
2019-08-26 15:10:21.969875 Processing model: 4900_6_ctx_coref_coh_global
2019-08-26 15:10:29.543511 test_filter 27342
2019-08-26 15:10:29.557366 Start evaluation
precision: 0.91439332247557, recall: 0.8053613053613053, f1: 0.8564210124892745, corr_num: 8983
2019-08-26 15:10:30.447095 Evaluation finished
2019-08-26 15:10:30.449245 Processing model: 4900_6_ctx_coref_coh_global2
2019-08-26 15:10:37.668362 test_filter 27342
2019-08-26 15:10:37.682066 Start evaluation
precision: 0.9243594957299716, recall: 0.8151335843643536, f1: 0.8663172939494997, corr_num: 9092
2019-08-26 15:10:38.577835 Evaluation finished
2019-08-26 15:10:38.580632 Processing model: 4900_6_ctx_coref_coh_global3
2019-08-26 15:10:45.870292 test_filter 27342
2019-08-26 15:10:45.884201 Start e

In [149]:
d_wiki_wned_ctx_emnlp17 = fetch_all_features('basic_fea_ctx_emnlp17', 'test', 'wiki_wned')
d_wiki_wned_ctx_emnlp17_raw = get_feature_results('basic_fea_ctx_emnlp17', 'test', 'wiki_wned')
d_wiki_wned_coref_emnlp17 = fetch_all_features('basic_fea_coref_emnlp17', 'test', 'wiki_wned')
d_wiki_wned_ctx_emnlp17_true_labels = get_true_labels(d_wiki_wned_ctx_emnlp17)
d_wiki_wned_total = get_total_mentions('wiki_wned', 'test')
docs_dict_wiki_wned = fetch_q_ids_docs('wiki_wned')

print(d_wiki_wned_ctx_emnlp17.shape)
print(d_wiki_wned_coref_emnlp17.shape)
print(d_wiki_wned_total)
print(d_wiki_wned_ctx_emnlp17_true_labels)
print('upper bound', d_wiki_wned_ctx_emnlp17_true_labels / d_wiki_wned_total)

(119788, 21)
(119788, 13)
6821
6224
upper bound 0.9124761765137077


In [150]:
d_wiki_wned_ctx_coref_emnlp17 = combine_features(d_wiki_wned_ctx_emnlp17, d_wiki_wned_coref_emnlp17)
print(d_wiki_wned_ctx_coref_emnlp17.shape)

Building idx for new features...
Combine original and new features...
(119788, 32)


In [93]:
process('ctx', d_wiki_wned_ctx_emnlp17, d_wiki_wned_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref', d_wiki_wned_ctx_coref_emnlp17, d_wiki_wned_total, n_estimators=[4900], max_depths=[6])

2019-08-24 16:40:46.985497 Loading model: 4900_6_ctx
2019-08-24 16:40:47.012461 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.7802056555269923, recall: 0.7119190734496408, f1: 0.7444998083556917, corr_num: 4856
2019-08-24 16:40:50.146120 Evaluation finished
2019-08-24 16:40:50.171767 Loading model: 4900_6_ctx_coref
2019-08-24 16:40:50.193264 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.7840616966580977, recall: 0.7154376191174314, f1: 0.7481793790724416, corr_num: 4880
2019-08-24 16:40:53.655632 Evaluation finished


In [90]:
model = load_model('4900_6_ctx_coref')
save_local_model_predictions(model, d_wiki_wned_ctx_coref_emnlp17, d_wiki_wned_ctx_emnlp17_raw, 
                             docs_dict_wiki_wned)

Number of groups: 6224


In [151]:
d_wiki_wned_coh_emnlp17 = fetch_all_features('basic_fea_coh_emnlp17', 'test', 'wiki_wned')
print(d_wiki_wned_coh_emnlp17.shape)

d_wiki_wned_ctx_coref_coh_emnlp17 = combine_features(d_wiki_wned_ctx_coref_emnlp17, d_wiki_wned_coh_emnlp17)
print(d_wiki_wned_ctx_coref_coh_emnlp17.shape)

(119788, 24)
Building idx for new features...
Combine original and new features...
(119788, 54)


In [61]:
d_wiki_wned_coh_emnlp17_global = fetch_all_features('basic_fea_coh_emnlp17_global', 'test', 'wiki_wned')
d_wiki_wned_coh_emnlp17_global2 = fetch_all_features('basic_fea_coh_emnlp17_global2', 'test', 'wiki_wned')
d_wiki_wned_coh_emnlp17_global3 = fetch_all_features('basic_fea_coh_emnlp17_global3', 'test', 'wiki_wned')
d_wiki_wned_coh_emnlp17_global4 = fetch_all_features('basic_fea_coh_emnlp17_global4', 'test', 'wiki_wned')

d_wiki_wned_ctx_coref_coh_emnlp17_global = combine_features(d_wiki_wned_ctx_coref_emnlp17, d_wiki_wned_coh_emnlp17_global)
d_wiki_wned_ctx_coref_coh_emnlp17_global2 = combine_features(d_wiki_wned_ctx_coref_emnlp17, d_wiki_wned_coh_emnlp17_global2)
d_wiki_wned_ctx_coref_coh_emnlp17_global3 = combine_features(d_wiki_wned_ctx_coref_emnlp17, d_wiki_wned_coh_emnlp17_global3)
d_wiki_wned_ctx_coref_coh_emnlp17_global4 = combine_features(d_wiki_wned_ctx_coref_emnlp17, d_wiki_wned_coh_emnlp17_global4)

Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...


In [62]:
process('ctx_coref_coh', d_wiki_wned_ctx_coref_coh_emnlp17, d_wiki_wned_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_global', d_wiki_wned_ctx_coref_coh_emnlp17_global, d_wiki_wned_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_global2', d_wiki_wned_ctx_coref_coh_emnlp17_global2, d_wiki_wned_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_global3', d_wiki_wned_ctx_coref_coh_emnlp17_global3, d_wiki_wned_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_global4', d_wiki_wned_ctx_coref_coh_emnlp17_global4, d_wiki_wned_total, n_estimators=[4900], max_depths=[6])

2019-08-26 15:13:48.607568 Loading model: 4900_6_ctx_coref_coh
2019-08-26 15:13:48.637531 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.8550771208226221, recall: 0.7802375018325759, f1: 0.8159448064392488, corr_num: 5322
2019-08-26 15:13:51.784403 Evaluation finished
2019-08-26 15:13:51.843664 Loading model: 4900_6_ctx_coref_coh_global
2019-08-26 15:13:51.867369 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.8639138817480719, recall: 0.7883008356545961, f1: 0.8243771559984668, corr_num: 5377
2019-08-26 15:13:54.983183 Evaluation finished
2019-08-26 15:13:55.042788 Loading model: 4900_6_ctx_coref_coh_global2
2019-08-26 15:13:55.067164 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.8642352185089974, recall: 0.7885940477935787, f1: 0.8246837868915293, corr_num: 5379
2019-08-26 15:13:58.089868 Evaluation finished
2019-08-26 15:13:58.148941 Loading model: 4900_6_ctx_coref_coh_global3
2019-08-26 15:13:58.173386 Start evaluation
n_estimators

In [63]:
process_iterative_filtering('wiki_wned')

=== Top K=3 ===
2019-08-26 15:14:40.647852 Processing model: 4900_6_ctx_coref_coh


  "because it will generate extra copies and increase memory consumption")


2019-08-26 15:14:43.950725 test_filter 14827
2019-08-26 15:14:43.958090 Start evaluation
precision: 0.9201943095072866, recall: 0.7775985925817329, f1: 0.8429082240762813, corr_num: 5304
2019-08-26 15:14:44.483611 Evaluation finished
2019-08-26 15:14:44.485048 Processing model: 4900_6_ctx_coref_coh_global
2019-08-26 15:14:47.703420 test_filter 14827
2019-08-26 15:14:47.710742 Start evaluation
precision: 0.9270157938487116, recall: 0.8174754434833602, f1: 0.8688064817700218, corr_num: 5576
2019-08-26 15:14:48.235299 Evaluation finished
2019-08-26 15:14:48.236655 Processing model: 4900_6_ctx_coref_coh_global2
2019-08-26 15:14:51.449763 test_filter 14827
2019-08-26 15:14:51.457345 Start evaluation
precision: 0.9266886326194399, recall: 0.8246591408884327, f1: 0.8727018850360716, corr_num: 5625
2019-08-26 15:14:51.997465 Evaluation finished
2019-08-26 15:14:51.998865 Processing model: 4900_6_ctx_coref_coh_global3
2019-08-26 15:14:55.243425 test_filter 14827
2019-08-26 15:14:55.250991 Start

In [93]:
process('ctx_coref_coh_gt', d_msnbc_ctx_coref_coh_emnlp17, d_msnbc_total, n_estimators=[4900], max_depths=[6])

2019-08-26 17:19:52.621753 Loading model: 4900_6_ctx_coref_coh_gt
2019-08-26 17:19:52.649353 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9616, recall: 0.9161585365853658, f1: 0.9383294301327088, corr_num: 601
2019-08-26 17:19:53.064080 Evaluation finished


In [94]:
model = load_model('4900_6_ctx_coref')

for top_k in [3, 5]:
    print('=== Top K=%d ===' % top_k)
    test_filter=get_top_k_prediction_indices(model, d_msnbc_ctx_coref_emnlp17, 
                                             d_msnbc_ctx_emnlp17_raw, docs_dict_msnbc, top_k)
    print('test_filter', len(test_filter))

    process('ctx_coref_coh_gt', d_msnbc_ctx_coref_coh_emnlp17, d_msnbc_total, 
            n_estimators=[4900], max_depths=[6], test_filter=test_filter)

=== Top K=3 ===
test_filter 1599
2019-08-26 17:20:08.327171 Loading model: 4900_6_ctx_coref_coh_gt
2019-08-26 17:20:08.344111 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9808, recall: 0.9344512195121951, f1: 0.957064793130367, corr_num: 613
2019-08-26 17:20:08.416684 Evaluation finished
=== Top K=5 ===
test_filter 2432
2019-08-26 17:20:08.831479 Loading model: 4900_6_ctx_coref_coh_gt
2019-08-26 17:20:08.848586 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9776, recall: 0.9314024390243902, f1: 0.9539422326307573, corr_num: 611
2019-08-26 17:20:08.933964 Evaluation finished


In [91]:
process('ctx_coref_coh_gt', d_wiki_wned_ctx_coref_coh_emnlp17, d_wiki_wned_total, n_estimators=[4900], max_depths=[6])

2019-08-26 17:17:30.376205 Loading model: 4900_6_ctx_coref_coh_gt
2019-08-26 17:17:30.396524 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.8631105398457584, recall: 0.7875678053071398, f1: 0.8236105787658107, corr_num: 5372
2019-08-26 17:17:33.495488 Evaluation finished


In [92]:
model = load_model('4900_6_ctx_coref')

for top_k in [3, 5]:
    print('=== Top K=%d ===' % top_k)
    test_filter=get_top_k_prediction_indices(model, d_wiki_wned_ctx_coref_emnlp17, 
                                             d_wiki_wned_ctx_emnlp17_raw, docs_dict_wiki_wned, top_k)
    print('test_filter', len(test_filter))

    process('ctx_coref_coh_gt', d_wiki_wned_ctx_coref_coh_emnlp17, d_wiki_wned_total, 
            n_estimators=[4900], max_depths=[6], test_filter=test_filter)

=== Top K=3 ===
test_filter 14827
2019-08-26 17:18:17.538507 Loading model: 4900_6_ctx_coref_coh_gt
2019-08-26 17:18:17.554571 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9200208188757807, recall: 0.7774519865122416, f1: 0.8427493047278506, corr_num: 5303
2019-08-26 17:18:18.099190 Evaluation finished
=== Top K=5 ===
test_filter 22076
2019-08-26 17:18:21.376689 Loading model: 4900_6_ctx_coref_coh_gt
2019-08-26 17:18:21.392769 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9076278290025147, recall: 0.7937252602257734, f1: 0.8468637572344753, corr_num: 5414
2019-08-26 17:18:22.151045 Evaluation finished


In [141]:
def print_ground_truth_statistics(dataset_name, dataset_type, total):
    count_has_ground_truth = conn.scard('dict-has-ground-truth:%s:%s' % (dataset_name, dataset_type))
    count_top_is_ground_truth = conn.scard('dict-top-is-ground-truth:%s:%s' % (dataset_name, dataset_type))
    
    print('%s %s' % (dataset_name, dataset_type))
    print('- dict has ground truth: %d (%.2f%%)' % (count_has_ground_truth, (100 * count_has_ground_truth / total)))
    print('- dict top is ground truth: %d (%.2f%%)' % (count_top_is_ground_truth, (100 * count_top_is_ground_truth / total)))

In [132]:
d_aida_conll_train_total = get_total_mentions('aida_conll', 'train')
d_aida_conll_test_a_total = get_total_mentions('aida_conll', 'testa')
d_aida_conll_test_b_total = get_total_mentions('aida_conll', 'testb')
print(d_aida_conll_train_total, d_aida_conll_test_a_total, d_aida_conll_test_b_total)

18541 4791 4485


In [142]:
print_ground_truth_statistics('aida_conll', 'train', d_aida_conll_train_total)
print_ground_truth_statistics('aida_conll', 'testa', d_aida_conll_test_a_total)
print_ground_truth_statistics('aida_conll', 'testb', d_aida_conll_test_b_total)
print_ground_truth_statistics('msnbc_new', 'test', d_msnbc_total)
print_ground_truth_statistics('aquaint_new', 'test', d_aquaint_total)
print_ground_truth_statistics('ace2014_uiuc', 'test', d_ace_total)
print_ground_truth_statistics('clueweb12', 'test', d_clueweb_total)
print_ground_truth_statistics('wiki_wned', 'test', d_wiki_wned_total)

aida_conll train
- dict has ground truth: 18479 (99.67%)
- dict top is ground truth: 13666 (73.71%)
aida_conll testa
- dict has ground truth: 4780 (99.77%)
- dict top is ground truth: 3377 (70.49%)
aida_conll testb
- dict has ground truth: 4475 (99.78%)
- dict top is ground truth: 3060 (68.23%)
msnbc_new test
- dict has ground truth: 641 (97.71%)
- dict top is ground truth: 493 (75.15%)
aquaint_new test
- dict has ground truth: 677 (93.12%)
- dict top is ground truth: 611 (84.04%)
ace2014_uiuc test
- dict has ground truth: 228 (88.72%)
- dict top is ground truth: 209 (81.32%)
clueweb12 test
- dict has ground truth: 10565 (94.72%)
- dict top is ground truth: 7436 (66.67%)
wiki_wned test
- dict has ground truth: 6406 (93.92%)
- dict top is ground truth: 4357 (63.88%)


In [89]:
process('ctx_coref_coh_gt', d_clueweb_ctx_coref_coh_emnlp17, d_clueweb_total, n_estimators=[4900], max_depths=[6])

2019-08-26 17:15:48.332295 Loading model: 4900_6_ctx_coref_coh_gt
2019-08-26 17:15:48.348604 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.827998457385268, recall: 0.7699480007172315, f1: 0.7979187958747562, corr_num: 8588
2019-08-26 17:15:55.422135 Evaluation finished


In [90]:
model = load_model('4900_6_ctx_coref')

for top_k in [3, 5]:
    print('=== Top K=%d ===' % top_k)
    test_filter=get_top_k_prediction_indices(model, d_clueweb_ctx_coref_emnlp17, 
                                             d_clueweb_ctx_emnlp17_raw, docs_dict_clueweb, top_k)
    print('test_filter', len(test_filter))

    process('ctx_coref_coh_gt', d_clueweb_ctx_coref_coh_emnlp17, d_clueweb_total, 
            n_estimators=[4900], max_depths=[6], test_filter=test_filter)

=== Top K=3 ===
test_filter 27342
2019-08-26 17:16:23.523512 Loading model: 4900_6_ctx_coref_coh_gt
2019-08-26 17:16:23.539974 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9029796131730267, recall: 0.7743410435718128, f1: 0.8337274965007964, corr_num: 8637
2019-08-26 17:16:24.412521 Evaluation finished
=== Top K=5 ===
test_filter 42649
2019-08-26 17:16:32.239568 Loading model: 4900_6_ctx_coref_coh_gt
2019-08-26 17:16:32.256228 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.8811981380287391, recall: 0.7807064730141653, f1: 0.8279140521011599, corr_num: 8708
2019-08-26 17:16:33.504030 Evaluation finished


In [85]:
process('ctx_coref_coh_gt', d_ace_ctx_coref_coh_emnlp17, d_ace_total, n_estimators=[4900], max_depths=[6])

2019-08-26 17:09:03.581973 Loading model: 4900_6_ctx_coref_coh_gt
2019-08-26 17:09:03.611694 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9422222222222222, recall: 0.8249027237354085, f1: 0.879668049792531, corr_num: 212
2019-08-26 17:09:03.820831 Evaluation finished


In [86]:
model = load_model('4900_6_ctx_coref')

for top_k in [3, 5]:
    print('=== Top K=%d ===' % top_k)
    test_filter=get_top_k_prediction_indices(model, d_ace_ctx_coref_emnlp17, 
                                             d_ace_ctx_emnlp17_raw, docs_dict_ace, top_k)
    print('test_filter', len(test_filter))

    process('ctx_coref_coh_gt', d_ace_ctx_coref_coh_emnlp17, d_ace_total, 
            n_estimators=[4900], max_depths=[6], test_filter=test_filter)

=== Top K=3 ===
test_filter 607
2019-08-26 17:09:44.331742 Loading model: 4900_6_ctx_coref_coh_gt
2019-08-26 17:09:44.349502 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9596412556053812, recall: 0.8326848249027238, f1: 0.8916666666666667, corr_num: 214
2019-08-26 17:09:44.383515 Evaluation finished
=== Top K=5 ===
test_filter 951
2019-08-26 17:09:44.576043 Loading model: 4900_6_ctx_coref_coh_gt
2019-08-26 17:09:44.593605 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9596412556053812, recall: 0.8326848249027238, f1: 0.8916666666666667, corr_num: 214
2019-08-26 17:09:44.631508 Evaluation finished


In [87]:
process('ctx_coref_coh_gt', d_aquaint_ctx_coref_coh_emnlp17, d_aquaint_total, n_estimators=[4900], max_depths=[6])

2019-08-26 17:12:28.328204 Loading model: 4900_6_ctx_coref_coh_gt
2019-08-26 17:12:28.355620 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9169139465875371, recall: 0.8500687757909215, f1: 0.8822269807280514, corr_num: 618
2019-08-26 17:12:28.761684 Evaluation finished


In [88]:
model = load_model('4900_6_ctx_coref')

for top_k in [3, 5]:
    print('=== Top K=%d ===' % top_k)
    test_filter=get_top_k_prediction_indices(model, d_aquaint_ctx_coref_emnlp17, 
                                             d_aquaint_ctx_emnlp17_raw, docs_dict_aquaint, top_k)
    print('test_filter', len(test_filter))

    process('ctx_coref_coh_gt', d_aquaint_ctx_coref_coh_emnlp17, d_aquaint_total, 
            n_estimators=[4900], max_depths=[6], test_filter=test_filter)

=== Top K=3 ===
test_filter 1732
2019-08-26 17:12:38.644514 Loading model: 4900_6_ctx_coref_coh_gt
2019-08-26 17:12:38.660718 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9453717754172989, recall: 0.8569463548830811, f1: 0.898989898989899, corr_num: 623
2019-08-26 17:12:38.728560 Evaluation finished
=== Top K=5 ===
test_filter 2647
2019-08-26 17:12:39.172496 Loading model: 4900_6_ctx_coref_coh_gt
2019-08-26 17:12:39.188736 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9400299850074962, recall: 0.8624484181568088, f1: 0.8995695839311334, corr_num: 627
2019-08-26 17:12:39.285363 Evaluation finished


In [100]:
d_msnbc_coh_ext = fetch_all_features('basic_fea_coh_ext', 'test', 'msnbc_new')
d_aquaint_coh_ext = fetch_all_features('basic_fea_coh_ext', 'test', 'aquaint_new')
d_ace_coh_ext = fetch_all_features('basic_fea_coh_ext', 'test', 'ace2014_uiuc')
d_clueweb_coh_ext = fetch_all_features('basic_fea_coh_ext', 'test', 'clueweb12')
d_wiki_wned_coh_ext = fetch_all_features('basic_fea_coh_ext', 'test', 'wiki_wned')

print(d_msnbc_coh_ext.shape)
print(d_aquaint_coh_ext.shape)
print(d_ace_coh_ext.shape)
print(d_clueweb_coh_ext.shape)
print(d_wiki_wned_coh_ext.shape)

d_msnbc_ctx_coref_coh_ext = combine_features(d_msnbc_ctx_coref_emnlp17, d_msnbc_coh_ext)
d_aquaint_ctx_coref_coh_ext = combine_features(d_aquaint_ctx_coref_emnlp17, d_aquaint_coh_ext)
d_ace_ctx_coref_coh_ext = combine_features(d_ace_ctx_coref_emnlp17, d_ace_coh_ext)
d_clueweb_ctx_coref_coh_ext = combine_features(d_clueweb_ctx_coref_emnlp17, d_clueweb_coh_ext)
d_wiki_wned_ctx_coref_coh_ext = combine_features(d_wiki_wned_ctx_coref_emnlp17, d_wiki_wned_coh_ext)

print(d_msnbc_ctx_coref_coh_ext.shape)
print(d_aquaint_ctx_coref_coh_ext.shape)
print(d_ace_ctx_coref_coh_ext.shape)
print(d_clueweb_ctx_coref_coh_ext.shape)
print(d_wiki_wned_ctx_coref_coh_ext.shape)

(14287, 35)
(15255, 35)
(6733, 35)
(277802, 35)
(119788, 35)
Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
(14287, 65)
(15255, 65)
(6733, 65)
(277802, 65)
(119788, 65)


In [101]:
process('ctx_coref_coh_ext', d_msnbc_ctx_coref_coh_ext, d_msnbc_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_ext', d_aquaint_ctx_coref_coh_ext, d_aquaint_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_ext', d_ace_ctx_coref_coh_ext, d_ace_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_ext', d_clueweb_ctx_coref_coh_ext, d_clueweb_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_ext', d_wiki_wned_ctx_coref_coh_ext, d_wiki_wned_total, n_estimators=[4900], max_depths=[6])

2019-08-27 15:42:20.819096 Loading model: 4900_6_ctx_coref_coh_ext
2019-08-27 15:42:20.842563 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.928, recall: 0.8841463414634146, f1: 0.9055425448868072, corr_num: 580
2019-08-27 15:42:21.460525 Evaluation finished
2019-08-27 15:42:21.470022 Loading model: 4900_6_ctx_coref_coh_ext
2019-08-27 15:42:21.489455 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9154302670623146, recall: 0.8486932599724897, f1: 0.8807994289793005, corr_num: 617
2019-08-27 15:42:22.135302 Evaluation finished
2019-08-27 15:42:22.143328 Loading model: 4900_6_ctx_coref_coh_ext
2019-08-27 15:42:22.163014 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9377777777777778, recall: 0.8210116731517509, f1: 0.8755186721991702, corr_num: 211
2019-08-27 15:42:22.422456 Evaluation finished
2019-08-27 15:42:22.609306 Loading model: 4900_6_ctx_coref_coh_ext
2019-08-27 15:42:22.628698 Start evaluation
n_estimators: 4900, max_depth: 6, p

In [106]:
model = load_model('4900_6_ctx_coref')

for top_k in [3, 5]:
    print('=== Top K=%d ===' % top_k)
    
    print('> msnbc')
    test_filter=get_top_k_prediction_indices(model, d_msnbc_ctx_coref_emnlp17, 
                                             d_msnbc_ctx_emnlp17_raw, docs_dict_msnbc, top_k)
    print('test_filter', len(test_filter))
    process('ctx_coref_coh_ext', d_msnbc_ctx_coref_coh_ext, d_msnbc_total, 
            n_estimators=[4900], max_depths=[6], test_filter=test_filter)
    
    print('> aquaint')
    test_filter=get_top_k_prediction_indices(model, d_aquaint_ctx_coref_emnlp17, 
                                             d_aquaint_ctx_emnlp17_raw, docs_dict_aquaint, top_k)
    print('test_filter', len(test_filter))
    process('ctx_coref_coh_ext', d_aquaint_ctx_coref_coh_ext, d_aquaint_total, 
            n_estimators=[4900], max_depths=[6], test_filter=test_filter)
    
    print('> ace')
    test_filter=get_top_k_prediction_indices(model, d_ace_ctx_coref_emnlp17, 
                                             d_ace_ctx_emnlp17_raw, docs_dict_ace, top_k)
    print('test_filter', len(test_filter))
    process('ctx_coref_coh_ext', d_ace_ctx_coref_coh_ext, d_ace_total, 
            n_estimators=[4900], max_depths=[6], test_filter=test_filter)
    
    print('> clueweb')
    test_filter=get_top_k_prediction_indices(model, d_clueweb_ctx_coref_emnlp17, 
                                             d_clueweb_ctx_emnlp17_raw, docs_dict_clueweb, top_k)
    print('test_filter', len(test_filter))
    process('ctx_coref_coh_ext', d_clueweb_ctx_coref_coh_ext, d_clueweb_total, 
            n_estimators=[4900], max_depths=[6], test_filter=test_filter)
    
    print('> wiki')
    test_filter=get_top_k_prediction_indices(model, d_wiki_wned_ctx_coref_emnlp17, 
                                             d_wiki_wned_ctx_emnlp17_raw, docs_dict_wiki_wned, top_k)
    print('test_filter', len(test_filter))
    process('ctx_coref_coh_ext', d_wiki_wned_ctx_coref_coh_ext, d_wiki_wned_total, 
            n_estimators=[4900], max_depths=[6], test_filter=test_filter)

=== Top K=3 ===
> msnbc


  "because it will generate extra copies and increase memory consumption")


test_filter 1599
2019-08-27 16:37:28.024610 Loading model: 4900_6_ctx_coref_coh_ext
2019-08-27 16:37:28.043869 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9552, recall: 0.9100609756097561, f1: 0.9320843091334894, corr_num: 597
2019-08-27 16:37:28.140216 Evaluation finished
> aquaint
test_filter 1732
2019-08-27 16:37:28.664585 Loading model: 4900_6_ctx_coref_coh_ext
2019-08-27 16:37:28.683532 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9499241274658573, recall: 0.8610729023383769, f1: 0.9033189033189032, corr_num: 626
2019-08-27 16:37:28.772121 Evaluation finished
> ace
test_filter 607
2019-08-27 16:37:29.028715 Loading model: 4900_6_ctx_coref_coh_ext
2019-08-27 16:37:29.047822 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9641255605381166, recall: 0.8365758754863813, f1: 0.8958333333333334, corr_num: 215
2019-08-27 16:37:29.093110 Evaluation finished
> clueweb
test_filter 27342
2019-08-27 16:37:38.604979 Loading model: 4900_6_ct

In [110]:
d_msnbc_coh_ext_global = fetch_all_features('basic_fea_coh_ext_global', 'test', 'msnbc_new')
d_aquaint_coh_ext_global = fetch_all_features('basic_fea_coh_ext_global', 'test', 'aquaint_new')
d_ace_coh_ext_global = fetch_all_features('basic_fea_coh_ext_global', 'test', 'ace2014_uiuc')
d_clueweb_coh_ext_global = fetch_all_features('basic_fea_coh_ext_global', 'test', 'clueweb12')
d_wiki_wned_coh_ext_global = fetch_all_features('basic_fea_coh_ext_global', 'test', 'wiki_wned')

print(d_msnbc_coh_ext_global.shape)
print(d_aquaint_coh_ext_global.shape)
print(d_ace_coh_ext_global.shape)
print(d_clueweb_coh_ext_global.shape)
print(d_wiki_wned_coh_ext_global.shape)

d_msnbc_ctx_coref_coh_ext_global = combine_features(d_msnbc_ctx_coref_emnlp17, d_msnbc_coh_ext_global)
d_aquaint_ctx_coref_coh_ext_global = combine_features(d_aquaint_ctx_coref_emnlp17, d_aquaint_coh_ext_global)
d_ace_ctx_coref_coh_ext_global = combine_features(d_ace_ctx_coref_emnlp17, d_ace_coh_ext_global)
d_clueweb_ctx_coref_coh_ext_global = combine_features(d_clueweb_ctx_coref_emnlp17, d_clueweb_coh_ext_global)
d_wiki_wned_ctx_coref_coh_ext_global = combine_features(d_wiki_wned_ctx_coref_emnlp17, d_wiki_wned_coh_ext_global)

print(d_msnbc_ctx_coref_coh_ext_global.shape)
print(d_aquaint_ctx_coref_coh_ext_global.shape)
print(d_ace_ctx_coref_coh_ext_global.shape)
print(d_clueweb_ctx_coref_coh_ext_global.shape)
print(d_wiki_wned_ctx_coref_coh_ext_global.shape)

(14287, 35)
(15255, 35)
(6733, 35)
(277802, 35)
(119788, 35)
Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
(14287, 65)
(15255, 65)
(6733, 65)
(277802, 65)
(119788, 65)


In [111]:
process('ctx_coref_coh_ext_global', d_msnbc_ctx_coref_coh_ext_global, d_msnbc_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_ext_global', d_aquaint_ctx_coref_coh_ext_global, d_aquaint_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_ext_global', d_ace_ctx_coref_coh_ext_global, d_ace_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_ext_global', d_clueweb_ctx_coref_coh_ext_global, d_clueweb_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_ext_global', d_wiki_wned_ctx_coref_coh_ext_global, d_wiki_wned_total, n_estimators=[4900], max_depths=[6])

2019-08-29 18:01:29.908649 Loading model: 4900_6_ctx_coref_coh_ext_global
2019-08-29 18:01:29.987417 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.944, recall: 0.899390243902439, f1: 0.9211553473848556, corr_num: 590
2019-08-29 18:01:30.916386 Evaluation finished
2019-08-29 18:01:30.926792 Loading model: 4900_6_ctx_coref_coh_ext_global
2019-08-29 18:01:30.956182 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9183976261127597, recall: 0.8514442916093535, f1: 0.8836545324768023, corr_num: 619
2019-08-29 18:01:31.841295 Evaluation finished
2019-08-29 18:01:31.848710 Loading model: 4900_6_ctx_coref_coh_ext_global
2019-08-29 18:01:31.874226 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9333333333333333, recall: 0.8171206225680934, f1: 0.871369294605809, corr_num: 210
2019-08-29 18:01:32.338649 Evaluation finished
2019-08-29 18:01:32.578414 Loading model: 4900_6_ctx_coref_coh_ext_global
2019-08-29 18:01:32.603426 Start evaluation
n_estimat

In [113]:
d_msnbc_ctx_can_fixed = fetch_all_features('basic_ctx_can_fixed', 'test', 'msnbc_new')
d_aquaint_ctx_can_fixed = fetch_all_features('basic_ctx_can_fixed', 'test', 'aquaint_new')
d_ace_ctx_can_fixed = fetch_all_features('basic_ctx_can_fixed', 'test', 'ace2004_wned')
d_clueweb_ctx_can_fixed = fetch_all_features('basic_ctx_can_fixed', 'test', 'clueweb12')
d_wiki_wned_ctx_can_fixed = fetch_all_features('basic_ctx_can_fixed', 'test', 'wiki_wned')

print(d_msnbc_ctx_can_fixed.shape)
print(d_aquaint_ctx_can_fixed.shape)
print(d_ace_ctx_can_fixed.shape)
print(d_clueweb_ctx_can_fixed.shape)
print(d_wiki_wned_ctx_can_fixed.shape)

(14304, 21)
(15288, 21)
(6846, 21)
(277997, 21)
(123551, 21)


In [114]:
d_msnbc_coref_can_fixed = fetch_all_features('basic_coref_can_fixed', 'test', 'msnbc_new')
d_aquaint_coref_can_fixed = fetch_all_features('basic_coref_can_fixed', 'test', 'aquaint_new')
d_ace_coref_can_fixed = fetch_all_features('basic_coref_can_fixed', 'test', 'ace2004_wned')
d_clueweb_coref_can_fixed = fetch_all_features('basic_coref_can_fixed', 'test', 'clueweb12')
d_wiki_wned_coref_can_fixed = fetch_all_features('basic_coref_can_fixed', 'test', 'wiki_wned')

print(d_msnbc_coref_can_fixed.shape)
print(d_aquaint_coref_can_fixed.shape)
print(d_ace_coref_can_fixed.shape)
print(d_clueweb_coref_can_fixed.shape)
print(d_wiki_wned_coref_can_fixed.shape)

(14304, 13)
(15288, 13)
(6846, 13)
(277997, 13)
(123551, 13)


In [115]:
d_msnbc_ctx_coref_can_fixed = combine_features(d_msnbc_ctx_can_fixed, d_msnbc_coref_can_fixed)
d_aquaint_ctx_coref_can_fixed = combine_features(d_aquaint_ctx_can_fixed, d_aquaint_coref_can_fixed)
d_ace_ctx_coref_can_fixed = combine_features(d_ace_ctx_can_fixed, d_ace_coref_can_fixed)
d_clueweb_ctx_coref_can_fixed = combine_features(d_clueweb_ctx_can_fixed, d_clueweb_coref_can_fixed)
d_wiki_wned_ctx_coref_can_fixed = combine_features(d_wiki_wned_ctx_can_fixed, d_wiki_wned_coref_can_fixed)

print(d_msnbc_ctx_coref_can_fixed.shape)
print(d_aquaint_ctx_coref_can_fixed.shape)
print(d_ace_ctx_coref_can_fixed.shape)
print(d_clueweb_ctx_coref_can_fixed.shape)
print(d_wiki_wned_ctx_coref_can_fixed.shape)

Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
(14304, 32)
(15288, 32)
(6846, 32)
(277997, 32)
(123551, 32)


In [121]:
print(get_total_mentions('ace2004_wned', 'test'), get_total_mentions('ace2014_uiuc', 'test'))

257 257


In [116]:
process('ctx_coref', d_msnbc_ctx_coref_can_fixed, d_msnbc_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref', d_aquaint_ctx_coref_can_fixed, d_aquaint_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref', d_ace_ctx_coref_can_fixed, d_ace_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref', d_clueweb_ctx_coref_can_fixed, d_clueweb_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref', d_wiki_wned_ctx_coref_can_fixed, d_wiki_wned_total, n_estimators=[4900], max_depths=[6])

2019-08-29 21:49:15.570573 Loading model: 4900_6_ctx_coref
2019-08-29 21:49:15.602251 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9429477020602218, recall: 0.9070121951219512, f1: 0.9246309246309246, corr_num: 595
2019-08-29 21:49:16.139383 Evaluation finished
2019-08-29 21:49:16.147823 Loading model: 4900_6_ctx_coref
2019-08-29 21:49:16.164728 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.8985507246376812, recall: 0.8528198074277854, f1: 0.8750882145377559, corr_num: 620
2019-08-29 21:49:16.721732 Evaluation finished
2019-08-29 21:49:16.728121 Loading model: 4900_6_ctx_coref
2019-08-29 21:49:16.747115 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9404255319148936, recall: 0.8599221789883269, f1: 0.8983739837398375, corr_num: 221
2019-08-29 21:49:17.014528 Evaluation finished
2019-08-29 21:49:17.123492 Loading model: 4900_6_ctx_coref
2019-08-29 21:49:17.154269 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.7833668

In [128]:
process('ctx_coref_can_fixed', d_msnbc_ctx_coref_can_fixed, d_msnbc_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_can_fixed', d_aquaint_ctx_coref_can_fixed, d_aquaint_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_can_fixed', d_ace_ctx_coref_can_fixed, d_ace_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_can_fixed', d_clueweb_ctx_coref_can_fixed, d_clueweb_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_can_fixed', d_wiki_wned_ctx_coref_can_fixed, d_wiki_wned_total, n_estimators=[4900], max_depths=[6])

2019-08-30 03:26:01.405142 Loading model: 4900_6_ctx_coref_can_fixed
2019-08-30 03:26:01.437742 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9255150554675119, recall: 0.8902439024390244, f1: 0.9075369075369076, corr_num: 584
2019-08-30 03:26:01.964892 Evaluation finished
2019-08-30 03:26:01.974609 Loading model: 4900_6_ctx_coref_can_fixed
2019-08-30 03:26:01.999686 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.8971014492753623, recall: 0.8514442916093535, f1: 0.8736767819336626, corr_num: 619
2019-08-30 03:26:02.523069 Evaluation finished
2019-08-30 03:26:02.528705 Loading model: 4900_6_ctx_coref_can_fixed
2019-08-30 03:26:02.546755 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9446808510638298, recall: 0.8638132295719845, f1: 0.9024390243902439, corr_num: 222
2019-08-30 03:26:02.793726 Evaluation finished
2019-08-30 03:26:02.890505 Loading model: 4900_6_ctx_coref_can_fixed
2019-08-30 03:26:02.912656 Start evaluation
n_estimators: 

In [125]:
d_msnbc_coh_can_fixed = fetch_all_features('basic_coh_can_fixed', 'test', 'msnbc_new')
d_aquaint_coh_can_fixed = fetch_all_features('basic_coh_can_fixed', 'test', 'aquaint_new')
d_ace_coh_can_fixed = fetch_all_features('basic_coh_can_fixed', 'test', 'ace2004_wned')
d_clueweb_coh_can_fixed = fetch_all_features('basic_coh_can_fixed', 'test', 'clueweb12')
d_wiki_wned_coh_can_fixed = fetch_all_features('basic_coh_can_fixed', 'test', 'wiki_wned')

print(d_msnbc_coh_can_fixed.shape)
print(d_aquaint_coh_can_fixed.shape)
print(d_ace_coh_can_fixed.shape)
print(d_clueweb_coh_can_fixed.shape)
print(d_wiki_wned_coh_can_fixed.shape)

(14304, 24)
(15288, 24)
(6846, 24)
(277997, 24)
(123551, 24)


In [126]:
d_msnbc_ctx_coref_coh_can_fixed = combine_features(d_msnbc_ctx_coref_can_fixed, d_msnbc_coh_can_fixed)
d_aquaint_ctx_coref_coh_can_fixed = combine_features(d_aquaint_ctx_coref_can_fixed, d_aquaint_coh_can_fixed)
d_ace_ctx_coref_coh_can_fixed = combine_features(d_ace_ctx_coref_can_fixed, d_ace_coh_can_fixed)
d_clueweb_ctx_coref_coh_can_fixed = combine_features(d_clueweb_ctx_coref_can_fixed, d_clueweb_coh_can_fixed)
d_wiki_wned_ctx_coref_coh_can_fixed = combine_features(d_wiki_wned_ctx_coref_can_fixed, d_wiki_wned_coh_can_fixed)

print(d_msnbc_ctx_coref_coh_can_fixed.shape)
print(d_aquaint_ctx_coref_coh_can_fixed.shape)
print(d_ace_ctx_coref_coh_can_fixed.shape)
print(d_clueweb_ctx_coref_coh_can_fixed.shape)
print(d_wiki_wned_ctx_coref_coh_can_fixed.shape)

Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
(14304, 54)
(15288, 54)
(6846, 54)
(277997, 54)
(123551, 54)


In [127]:
process('ctx_coref_coh', d_msnbc_ctx_coref_coh_can_fixed, d_msnbc_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh', d_aquaint_ctx_coref_coh_can_fixed, d_aquaint_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh', d_ace_ctx_coref_coh_can_fixed, d_ace_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh', d_clueweb_ctx_coref_coh_can_fixed, d_clueweb_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh', d_wiki_wned_ctx_coref_coh_can_fixed, d_wiki_wned_total, n_estimators=[4900], max_depths=[6])

2019-08-29 23:40:48.992106 Loading model: 4900_6_ctx_coref_coh
2019-08-29 23:40:49.054345 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9603803486529319, recall: 0.9237804878048781, f1: 0.9417249417249418, corr_num: 606
2019-08-29 23:40:49.859409 Evaluation finished
2019-08-29 23:40:49.873004 Loading model: 4900_6_ctx_coref_coh
2019-08-29 23:40:49.896739 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9144927536231884, recall: 0.8679504814305364, f1: 0.8906139731827806, corr_num: 631
2019-08-29 23:40:50.668417 Evaluation finished
2019-08-29 23:40:50.675374 Loading model: 4900_6_ctx_coref_coh
2019-08-29 23:40:50.698184 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9404255319148936, recall: 0.8599221789883269, f1: 0.8983739837398375, corr_num: 221
2019-08-29 23:40:51.050069 Evaluation finished
2019-08-29 23:40:51.243811 Loading model: 4900_6_ctx_coref_coh
2019-08-29 23:40:51.268761 Start evaluation
n_estimators: 4900, max_depth: 6, prec

In [130]:
d_msnbc_coh_can_fixed_retrained = fetch_all_features('basic_fea_coh_can_fixed_retrained', 'test', 'msnbc_new')
d_aquaint_coh_can_fixed_retrained = fetch_all_features('basic_fea_coh_can_fixed_retrained', 'test', 'aquaint_new')
d_ace_coh_can_fixed_retrained = fetch_all_features('basic_fea_coh_can_fixed_retrained', 'test', 'ace2004_wned')
d_clueweb_coh_can_fixed_retrained = fetch_all_features('basic_fea_coh_can_fixed_retrained', 'test', 'clueweb12')
d_wiki_wned_coh_can_fixed_retrained = fetch_all_features('basic_fea_coh_can_fixed_retrained', 'test', 'wiki_wned')

print(d_msnbc_coh_can_fixed_retrained.shape)
print(d_aquaint_coh_can_fixed_retrained.shape)
print(d_ace_coh_can_fixed_retrained.shape)
print(d_clueweb_coh_can_fixed_retrained.shape)
print(d_wiki_wned_coh_can_fixed_retrained.shape)

(14304, 24)
(15288, 24)
(6846, 24)
(277997, 24)
(123551, 24)


In [131]:
d_msnbc_ctx_coref_coh_can_fixed_retrained = combine_features(d_msnbc_ctx_coref_can_fixed, d_msnbc_coh_can_fixed_retrained)
d_aquaint_ctx_coref_coh_can_fixed_retrained = combine_features(d_aquaint_ctx_coref_can_fixed, d_aquaint_coh_can_fixed_retrained)
d_ace_ctx_coref_coh_can_fixed_retrained = combine_features(d_ace_ctx_coref_can_fixed, d_ace_coh_can_fixed_retrained)
d_clueweb_ctx_coref_coh_can_fixed_retrained = combine_features(d_clueweb_ctx_coref_can_fixed, d_clueweb_coh_can_fixed_retrained)
d_wiki_wned_ctx_coref_coh_can_fixed_retrained = combine_features(d_wiki_wned_ctx_coref_can_fixed, d_wiki_wned_coh_can_fixed_retrained)

print(d_msnbc_ctx_coref_coh_can_fixed_retrained.shape)
print(d_aquaint_ctx_coref_coh_can_fixed_retrained.shape)
print(d_ace_ctx_coref_coh_can_fixed_retrained.shape)
print(d_clueweb_ctx_coref_coh_can_fixed_retrained.shape)
print(d_wiki_wned_ctx_coref_coh_can_fixed_retrained.shape)

Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
(14304, 54)
(15288, 54)
(6846, 54)
(277997, 54)
(123551, 54)


In [133]:
process('ctx_coref_coh_can_fixed', d_msnbc_ctx_coref_coh_can_fixed_retrained, d_msnbc_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_can_fixed', d_aquaint_ctx_coref_coh_can_fixed_retrained, d_aquaint_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_can_fixed', d_ace_ctx_coref_coh_can_fixed_retrained, d_ace_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_can_fixed', d_clueweb_ctx_coref_coh_can_fixed_retrained, d_clueweb_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref_coh_can_fixed', d_wiki_wned_ctx_coref_coh_can_fixed_retrained, d_wiki_wned_total, n_estimators=[4900], max_depths=[6])

2019-08-30 17:08:34.832694 Loading model: 4900_6_ctx_coref_coh_can_fixed
2019-08-30 17:08:34.863812 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9619651347068146, recall: 0.9253048780487805, f1: 0.9432789432789432, corr_num: 607
2019-08-30 17:08:35.330130 Evaluation finished
2019-08-30 17:08:35.341745 Loading model: 4900_6_ctx_coref_coh_can_fixed
2019-08-30 17:08:35.368079 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9130434782608695, recall: 0.8665749656121046, f1: 0.8892025405786873, corr_num: 630
2019-08-30 17:08:35.901558 Evaluation finished
2019-08-30 17:08:35.909211 Loading model: 4900_6_ctx_coref_coh_can_fixed
2019-08-30 17:08:35.928147 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.9404255319148936, recall: 0.8599221789883269, f1: 0.8983739837398375, corr_num: 221
2019-08-30 17:08:36.172224 Evaluation finished
2019-08-30 17:08:36.328483 Loading model: 4900_6_ctx_coref_coh_can_fixed
2019-08-30 17:08:36.345667 Start evaluatio

In [136]:
d_msnbc_coh_ext_global2 = fetch_all_features('basic_fea_coh_ext_global2', 'test', 'msnbc_new')
d_aquaint_coh_ext_global2 = fetch_all_features('basic_fea_coh_ext_global2', 'test', 'aquaint_new')
d_ace_coh_ext_global2 = fetch_all_features('basic_fea_coh_ext_global2', 'test', 'ace2014_uiuc')
d_clueweb_coh_ext_global2 = fetch_all_features('basic_fea_coh_ext_global2', 'test', 'clueweb12')
d_wiki_wned_coh_ext_global2 = fetch_all_features('basic_fea_coh_ext_global2', 'test', 'wiki_wned')

print(d_msnbc_coh_ext_global2.shape)
print(d_aquaint_coh_ext_global2.shape)
print(d_ace_coh_ext_global2.shape)
print(d_clueweb_coh_ext_global2.shape)
print(d_wiki_wned_coh_ext_global2.shape)

(14304, 35)
(15288, 35)
(6741, 35)
(204669, 35)
(123551, 35)


In [154]:
print(d_msnbc_ctx_coref_emnlp17.shape)
print(d_aquaint_ctx_coref_emnlp17.shape)
print(d_ace_ctx_coref_emnlp17.shape)
print(d_clueweb_ctx_coref_emnlp17.shape)
print(d_wiki_wned_ctx_coref_emnlp17.shape)

(14287, 32)
(15255, 32)
(6733, 32)
(277802, 32)
(119788, 32)


In [152]:
d_msnbc_ctx_coref_coh_ext_global2 = combine_features(d_msnbc_ctx_coref_emnlp17, d_msnbc_coh_ext_global2)
d_aquaint_ctx_coref_coh_ext_global2 = combine_features(d_aquaint_ctx_coref_emnlp17, d_aquaint_coh_ext_global2)
d_ace_ctx_coref_coh_ext_global2 = combine_features(d_ace_ctx_coref_emnlp17, d_ace_coh_ext_global2)
d_clueweb_ctx_coref_coh_ext_global2 = combine_features(d_clueweb_ctx_coref_emnlp17, d_clueweb_coh_ext_global2)
d_wiki_wned_ctx_coref_coh_ext_global2 = combine_features(d_wiki_wned_ctx_coref_emnlp17, d_wiki_wned_coh_ext_global2)

print(d_msnbc_ctx_coref_coh_ext_global2.shape)
print(d_aquaint_ctx_coref_coh_ext_global2.shape)
print(d_ace_ctx_coref_coh_ext_global2.shape)
print(d_clueweb_ctx_coref_coh_ext_global2.shape)
print(d_wiki_wned_ctx_coref_coh_ext_global2.shape)

Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...
Building idx for new features...
Combine original and new features...


IndexError: too many indices for array

In [9]:
docs_dict_kore = fetch_q_ids_docs('kore')
print(len(docs_dict_kore))

144


In [13]:
d_kore_ctx = fetch_all_features('exp1910:fea_ctx', 'test', 'kore')
d_kore_ctx_raw = get_feature_results('exp1910:fea_ctx', 'test', 'kore')
d_kore_coref = fetch_all_features('exp1910:fea_coref', 'test', 'kore')
d_kore_total = get_total_mentions('kore', 'test')
d_kore_true_labels = get_true_labels(d_kore_ctx)

print(d_kore_ctx.shape)
print(d_kore_coref.shape)
print(d_kore_total)
print(d_kore_true_labels)
print('upper bound', d_kore_true_labels / d_kore_total)
print()

d_kore_ctx_coref = combine_features(d_kore_ctx, d_kore_coref)

print(d_kore_ctx_coref.shape)

(4900, 21)
(4900, 13)
144
132
upper bound 0.9166666666666666

Building idx for new features...
Combine original and new features...
(4900, 32)


In [11]:
process('ctx', d_kore_ctx, d_kore_total, n_estimators=[4900], max_depths=[6])
process('ctx_coref', d_kore_ctx_coref, d_kore_total, n_estimators=[4900], max_depths=[6])

  "because it will generate extra copies and increase memory consumption")


2019-10-18 17:48:31.199877 Loading model: 4900_6_ctx
2019-10-18 17:48:31.263067 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.5918367346938775, recall: 0.40559440559440557, f1: 0.4813278008298755, corr_num: 58
2019-10-18 17:48:31.424013 Evaluation finished
2019-10-18 17:48:31.428862 Loading model: 4900_6_ctx_coref
2019-10-18 17:48:31.459753 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.6224489795918368, recall: 0.42657342657342656, f1: 0.5062240663900415, corr_num: 61
2019-10-18 17:48:31.564378 Evaluation finished


In [14]:
d_rss_ctx = fetch_all_features('exp1910:fea_ctx', 'test', 'rss')
d_rss_ctx_raw = get_feature_results('exp1910:fea_ctx', 'test', 'rss')
d_rss_coref = fetch_all_features('exp1910:fea_coref', 'test', 'rss')
d_rss_total = get_total_mentions('rss', 'test')
d_rss_true_labels = get_true_labels(d_rss_ctx)

print(d_rss_ctx.shape)
print(d_rss_coref.shape)
print(d_rss_total)
print(d_rss_true_labels)
print('upper bound', d_rss_true_labels / d_rss_total)
print()

d_rss_ctx_coref = combine_features(d_rss_ctx, d_rss_coref)

print(d_rss_ctx_coref.shape)

(8780, 21)
(8780, 13)
524
448
upper bound 0.8549618320610687

Building idx for new features...
Combine original and new features...
(8780, 32)


In [18]:
d_reuters_ctx = fetch_all_features('exp2110reuters:fea_ctx', 'test', 'reuters')
d_reuters_coref = fetch_all_features('exp2110reuters:fea_coref', 'test', 'reuters')
d_reuters_total = get_total_mentions('reuters', 'test')
d_reuters_true_labels = get_true_labels(d_reuters_ctx)

print(d_reuters_ctx.shape)
print(d_reuters_coref.shape)
print(d_reuters_total)
print(d_reuters_true_labels)

d_reuters_ctx_coref = combine_features(d_reuters_ctx, d_reuters_coref)
print(d_reuters_ctx_coref.shape)

(10117, 21)
(10117, 13)
650
456
Building idx for new features...
Combine original and new features...
(10117, 32)


In [19]:
process('ctx_coref', d_reuters_ctx_coref, d_reuters_total, n_estimators=[4900], max_depths=[6])

  "because it will generate extra copies and increase memory consumption")


2019-10-21 12:25:59.880015 Loading model: 4900_6_ctx_coref
2019-10-21 12:25:59.929725 Start evaluation
n_estimators: 4900, max_depth: 6, precision: 0.8464912280701754, recall: 0.5938461538461538, f1: 0.698010849909584, corr_num: 386
2019-10-21 12:26:00.221932 Evaluation finished
