In [1]:
from wikidata.client import Client
import pandas as pd

In [2]:
client = Client()

In [3]:
rel2id = pd.read_csv('dataset/relation_to_wiki_id.tsv', sep='\t', header=None, names=['label', 'id'])

### Build Dataset Description

In [None]:
rel_ids = rel2id['id'].tolist()
rel_list = []
for rel_id in rel_ids:
    rel = client.get(rel_id)
    rel_list.append((rel_id, str(rel.label), str(rel.description)))
rel_list_df = pd.DataFrame.from_records(rel_list, columns=['id', 'label', 'description'])

In [None]:
rel_list_df = rel_list_df.drop_duplicates('id')
rel_list_df.to_csv('dataset/relation_desc.tsv', sep='\t', header=None, index=False)

### Build Dataset Triples

In [4]:
from SPARQLWrapper import SPARQLWrapper, JSON

In [5]:
train = pd.read_csv('dataset/orig/test.txt', sep='\t', header=None, names=['e1_id', 'e2_id', 'e1', 'e2', 'rel_label', 'sent', 'end_token'])

In [6]:
# Drop unnecessary columns
train_filt = train.dropna()
train_filt = train_filt.drop(columns=['sent', 'end_token'])
train_filt = train_filt.drop_duplicates()
train_filt.index = range(len(train_filt))

In [7]:
# Add rel_id column
rel2id_dict = {}
for i in range(len(rel2id['label'])):
    label = rel2id['label'][i]
    rel_id = rel2id['id'][i]
    rel2id_dict[label] = rel_id
    
def label_relation(row):
    return rel2id_dict[row['rel_label']]

train_filt['rel_id'] = train_filt.apply(lambda row: label_relation(row), axis=1)

In [8]:
# Convert freebase id to wikidata id
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")

def get_wiki_info(fb_id):
    sparql.setQuery("""
    PREFIX wdt: <http://www.wikidata.org/prop/direct/>
    SELECT ?item ?itemLabel WHERE {
      ?item wdt:P646 "%s" .

      SERVICE wikibase:label {
        bd:serviceParam wikibase:language "en" .
      }
    }
    """ % fb_id)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    wiki_id = results['results']['bindings'][0]['item']['value'].replace('http://www.wikidata.org/entity/', '')
    wiki_label = results['results']['bindings'][0]['itemLabel']['value']
    return wiki_id, wiki_label 

In [9]:
len_data = len(train_filt)
for i in range(len_data):
    print(i, 'of', len_data-1)
    try:
        e1_wiki_id, e1_wiki_label = get_wiki_info(train_filt['e1_id'][i])
        e2_wiki_id, e2_wiki_label = get_wiki_info(train_filt['e2_id'][i])
    except IndexError as e:
        print('IndexError:', i)
        continue
    train_filt['e1_id'][i] = e1_wiki_id
    train_filt['e2_id'][i] = e2_wiki_id
    train_filt['e1'][i] = e1_wiki_label
    train_filt['e2'][i] = e2_wiki_label

0 of 5344
1 of 5344
2 of 5344
3 of 5344
4 of 5344
5 of 5344
6 of 5344
7 of 5344
8 of 5344
9 of 5344
10 of 5344
11 of 5344
12 of 5344
13 of 5344
14 of 5344
15 of 5344
16 of 5344
17 of 5344
18 of 5344
19 of 5344
20 of 5344
21 of 5344
22 of 5344
23 of 5344
24 of 5344
25 of 5344
26 of 5344
27 of 5344
28 of 5344
29 of 5344
30 of 5344
31 of 5344
32 of 5344
33 of 5344
34 of 5344
35 of 5344
36 of 5344
37 of 5344
38 of 5344
39 of 5344
40 of 5344
41 of 5344
42 of 5344
43 of 5344
44 of 5344
45 of 5344
46 of 5344
47 of 5344
48 of 5344
49 of 5344
50 of 5344
51 of 5344
52 of 5344
53 of 5344
54 of 5344
55 of 5344
56 of 5344
57 of 5344
58 of 5344
59 of 5344
60 of 5344
61 of 5344
62 of 5344
63 of 5344
64 of 5344
65 of 5344
66 of 5344
67 of 5344
68 of 5344
69 of 5344
70 of 5344
71 of 5344
72 of 5344
73 of 5344
74 of 5344
75 of 5344
76 of 5344
77 of 5344
78 of 5344
79 of 5344
80 of 5344
81 of 5344
82 of 5344
83 of 5344
84 of 5344
85 of 5344
86 of 5344
87 of 5344
88 of 5344
89 of 5344
90 of 5344
91 of 534

691 of 5344
692 of 5344
693 of 5344
694 of 5344
695 of 5344
696 of 5344
697 of 5344
698 of 5344
699 of 5344
700 of 5344
701 of 5344
702 of 5344
703 of 5344
704 of 5344
705 of 5344
706 of 5344
707 of 5344
708 of 5344
709 of 5344
710 of 5344
711 of 5344
712 of 5344
713 of 5344
714 of 5344
715 of 5344
IndexError: 715
716 of 5344
717 of 5344
718 of 5344
719 of 5344
720 of 5344
721 of 5344
722 of 5344
723 of 5344
724 of 5344
725 of 5344
726 of 5344
727 of 5344
728 of 5344
729 of 5344
730 of 5344
731 of 5344
732 of 5344
733 of 5344
734 of 5344
735 of 5344
736 of 5344
737 of 5344
738 of 5344
739 of 5344
740 of 5344
741 of 5344
742 of 5344
743 of 5344
744 of 5344
745 of 5344
746 of 5344
747 of 5344
748 of 5344
749 of 5344
750 of 5344
751 of 5344
752 of 5344
753 of 5344
754 of 5344
755 of 5344
756 of 5344
757 of 5344
758 of 5344
759 of 5344
760 of 5344
761 of 5344
762 of 5344
763 of 5344
764 of 5344
765 of 5344
766 of 5344
767 of 5344
768 of 5344
769 of 5344
770 of 5344
771 of 5344
772 of 5344


1343 of 5344
1344 of 5344
1345 of 5344
1346 of 5344
1347 of 5344
1348 of 5344
1349 of 5344
1350 of 5344
1351 of 5344
1352 of 5344
1353 of 5344
1354 of 5344
1355 of 5344
1356 of 5344
1357 of 5344
1358 of 5344
1359 of 5344
1360 of 5344
1361 of 5344
1362 of 5344
1363 of 5344
1364 of 5344
1365 of 5344
1366 of 5344
1367 of 5344
1368 of 5344
1369 of 5344
1370 of 5344
1371 of 5344
1372 of 5344
1373 of 5344
1374 of 5344
1375 of 5344
1376 of 5344
1377 of 5344
1378 of 5344
1379 of 5344
1380 of 5344
1381 of 5344
1382 of 5344
1383 of 5344
1384 of 5344
1385 of 5344
1386 of 5344
1387 of 5344
1388 of 5344
1389 of 5344
1390 of 5344
1391 of 5344
1392 of 5344
1393 of 5344
1394 of 5344
1395 of 5344
1396 of 5344
1397 of 5344
1398 of 5344
1399 of 5344
1400 of 5344
1401 of 5344
1402 of 5344
1403 of 5344
1404 of 5344
1405 of 5344
1406 of 5344
1407 of 5344
1408 of 5344
1409 of 5344
1410 of 5344
1411 of 5344
1412 of 5344
1413 of 5344
1414 of 5344
1415 of 5344
1416 of 5344
1417 of 5344
1418 of 5344
1419 of 5344

1974 of 5344
1975 of 5344
1976 of 5344
1977 of 5344
1978 of 5344
1979 of 5344
1980 of 5344
1981 of 5344
1982 of 5344
1983 of 5344
1984 of 5344
1985 of 5344
1986 of 5344
1987 of 5344
1988 of 5344
1989 of 5344
1990 of 5344
1991 of 5344
1992 of 5344
1993 of 5344
1994 of 5344
1995 of 5344
1996 of 5344
1997 of 5344
1998 of 5344
1999 of 5344
2000 of 5344
2001 of 5344
2002 of 5344
2003 of 5344
2004 of 5344
2005 of 5344
2006 of 5344
2007 of 5344
2008 of 5344
2009 of 5344
2010 of 5344
2011 of 5344
2012 of 5344
2013 of 5344
2014 of 5344
2015 of 5344
2016 of 5344
2017 of 5344
2018 of 5344
2019 of 5344
2020 of 5344
2021 of 5344
2022 of 5344
2023 of 5344
2024 of 5344
2025 of 5344
2026 of 5344
2027 of 5344
2028 of 5344
2029 of 5344
2030 of 5344
2031 of 5344
2032 of 5344
2033 of 5344
2034 of 5344
2035 of 5344
2036 of 5344
2037 of 5344
2038 of 5344
2039 of 5344
2040 of 5344
2041 of 5344
2042 of 5344
2043 of 5344
2044 of 5344
2045 of 5344
2046 of 5344
2047 of 5344
2048 of 5344
2049 of 5344
2050 of 5344

2602 of 5344
2603 of 5344
2604 of 5344
2605 of 5344
2606 of 5344
2607 of 5344
2608 of 5344
2609 of 5344
2610 of 5344
2611 of 5344
2612 of 5344
2613 of 5344
2614 of 5344
2615 of 5344
2616 of 5344
2617 of 5344
2618 of 5344
2619 of 5344
2620 of 5344
2621 of 5344
2622 of 5344
2623 of 5344
2624 of 5344
2625 of 5344
2626 of 5344
2627 of 5344
2628 of 5344
2629 of 5344
2630 of 5344
2631 of 5344
2632 of 5344
2633 of 5344
2634 of 5344
2635 of 5344
2636 of 5344
2637 of 5344
2638 of 5344
2639 of 5344
2640 of 5344
2641 of 5344
2642 of 5344
2643 of 5344
2644 of 5344
2645 of 5344
2646 of 5344
2647 of 5344
2648 of 5344
2649 of 5344
2650 of 5344
2651 of 5344
2652 of 5344
2653 of 5344
2654 of 5344
2655 of 5344
2656 of 5344
2657 of 5344
2658 of 5344
2659 of 5344
2660 of 5344
2661 of 5344
2662 of 5344
2663 of 5344
2664 of 5344
2665 of 5344
2666 of 5344
2667 of 5344
2668 of 5344
2669 of 5344
2670 of 5344
2671 of 5344
2672 of 5344
2673 of 5344
2674 of 5344
2675 of 5344
2676 of 5344
2677 of 5344
2678 of 5344

3233 of 5344
3234 of 5344
3235 of 5344
3236 of 5344
3237 of 5344
3238 of 5344
3239 of 5344
3240 of 5344
3241 of 5344
3242 of 5344
3243 of 5344
3244 of 5344
3245 of 5344
3246 of 5344
3247 of 5344
3248 of 5344
3249 of 5344
3250 of 5344
3251 of 5344
3252 of 5344
3253 of 5344
3254 of 5344
3255 of 5344
3256 of 5344
3257 of 5344
3258 of 5344
3259 of 5344
3260 of 5344
3261 of 5344
3262 of 5344
3263 of 5344
3264 of 5344
3265 of 5344
3266 of 5344
3267 of 5344
3268 of 5344
3269 of 5344
3270 of 5344
3271 of 5344
3272 of 5344
3273 of 5344
3274 of 5344
3275 of 5344
3276 of 5344
3277 of 5344
3278 of 5344
3279 of 5344
3280 of 5344
3281 of 5344
3282 of 5344
3283 of 5344
3284 of 5344
3285 of 5344
3286 of 5344
3287 of 5344
3288 of 5344
3289 of 5344
3290 of 5344
3291 of 5344
3292 of 5344
3293 of 5344
3294 of 5344
3295 of 5344
3296 of 5344
3297 of 5344
3298 of 5344
3299 of 5344
3300 of 5344
3301 of 5344
3302 of 5344
3303 of 5344
3304 of 5344
3305 of 5344
3306 of 5344
3307 of 5344
3308 of 5344
3309 of 5344

3864 of 5344
3865 of 5344
3866 of 5344
3867 of 5344
3868 of 5344
3869 of 5344
3870 of 5344
3871 of 5344
3872 of 5344
3873 of 5344
3874 of 5344
3875 of 5344
3876 of 5344
3877 of 5344
3878 of 5344
3879 of 5344
3880 of 5344
3881 of 5344
3882 of 5344
3883 of 5344
3884 of 5344
3885 of 5344
3886 of 5344
3887 of 5344
3888 of 5344
3889 of 5344
3890 of 5344
3891 of 5344
3892 of 5344
3893 of 5344
3894 of 5344
3895 of 5344
3896 of 5344
3897 of 5344
3898 of 5344
3899 of 5344
3900 of 5344
3901 of 5344
3902 of 5344
3903 of 5344
3904 of 5344
3905 of 5344
3906 of 5344
3907 of 5344
3908 of 5344
3909 of 5344
3910 of 5344
3911 of 5344
3912 of 5344
3913 of 5344
3914 of 5344
3915 of 5344
3916 of 5344
3917 of 5344
3918 of 5344
3919 of 5344
3920 of 5344
3921 of 5344
3922 of 5344
3923 of 5344
3924 of 5344
3925 of 5344
3926 of 5344
3927 of 5344
3928 of 5344
3929 of 5344
3930 of 5344
3931 of 5344
3932 of 5344
3933 of 5344
3934 of 5344
3935 of 5344
3936 of 5344
3937 of 5344
3938 of 5344
3939 of 5344
3940 of 5344

4495 of 5344
4496 of 5344
4497 of 5344
4498 of 5344
4499 of 5344
4500 of 5344
4501 of 5344
4502 of 5344
4503 of 5344
4504 of 5344
4505 of 5344
4506 of 5344
4507 of 5344
4508 of 5344
4509 of 5344
4510 of 5344
4511 of 5344
4512 of 5344
4513 of 5344
4514 of 5344
4515 of 5344
4516 of 5344
4517 of 5344
4518 of 5344
4519 of 5344
4520 of 5344
4521 of 5344
4522 of 5344
4523 of 5344
4524 of 5344
4525 of 5344
4526 of 5344
4527 of 5344
4528 of 5344
4529 of 5344
4530 of 5344
4531 of 5344
4532 of 5344
4533 of 5344
4534 of 5344
4535 of 5344
4536 of 5344
4537 of 5344
4538 of 5344
4539 of 5344
4540 of 5344
4541 of 5344
4542 of 5344
4543 of 5344
4544 of 5344
4545 of 5344
4546 of 5344
4547 of 5344
4548 of 5344
4549 of 5344
4550 of 5344
4551 of 5344
4552 of 5344
4553 of 5344
4554 of 5344
4555 of 5344
4556 of 5344
4557 of 5344
4558 of 5344
4559 of 5344
4560 of 5344
4561 of 5344
4562 of 5344
4563 of 5344
4564 of 5344
4565 of 5344
4566 of 5344
4567 of 5344
4568 of 5344
4569 of 5344
4570 of 5344
4571 of 5344

5124 of 5344
5125 of 5344
5126 of 5344
5127 of 5344
5128 of 5344
5129 of 5344
5130 of 5344
5131 of 5344
5132 of 5344
5133 of 5344
5134 of 5344
5135 of 5344
5136 of 5344
5137 of 5344
5138 of 5344
5139 of 5344
5140 of 5344
5141 of 5344
5142 of 5344
5143 of 5344
5144 of 5344
5145 of 5344
5146 of 5344
5147 of 5344
5148 of 5344
5149 of 5344
5150 of 5344
5151 of 5344
5152 of 5344
5153 of 5344
5154 of 5344
5155 of 5344
5156 of 5344
5157 of 5344
5158 of 5344
5159 of 5344
5160 of 5344
5161 of 5344
5162 of 5344
5163 of 5344
5164 of 5344
5165 of 5344
5166 of 5344
5167 of 5344
5168 of 5344
5169 of 5344
5170 of 5344
5171 of 5344
5172 of 5344
5173 of 5344
5174 of 5344
5175 of 5344
5176 of 5344
5177 of 5344
5178 of 5344
5179 of 5344
5180 of 5344
5181 of 5344
5182 of 5344
5183 of 5344
5184 of 5344
5185 of 5344
5186 of 5344
5187 of 5344
5188 of 5344
5189 of 5344
5190 of 5344
5191 of 5344
5192 of 5344
5193 of 5344
5194 of 5344
5195 of 5344
5196 of 5344
5197 of 5344
5198 of 5344
5199 of 5344
5200 of 5344

In [10]:
train_filt.to_csv('dataset/test_triples_all.tsv', sep='\t', header=None, index=False)

In [11]:
train_filt = pd.read_csv('dataset/test_triples_all.tsv', sep='\t', header=None, names=['e1_id', 'e2_id', 'e1', 'e2', 'rel_label', 'rel_id'])

In [12]:
train_filt = train_filt[train_filt.e1_id.str.contains("/m/") == False]

In [13]:
train_filt = train_filt.drop(columns=['rel_label'])

In [14]:
rel_labels = pd.read_csv('dataset/relation_desc.tsv', sep='\t', header=None, names=['id', 'label', 'desc'])
rel_labels = rel_labels.drop(columns=['desc'])

In [15]:
rel2id_wiki_dict = {}
for i in range(len(rel_labels['label'])):
    label = rel_labels['label'][i]
    rel_id = rel_labels['id'][i]
    rel2id_wiki_dict[rel_id] = label
    
def label_relation_wiki(row):
    return rel2id_wiki_dict[row['rel_id']]

train_filt['rel_label'] = train_filt.apply(lambda row: label_relation_wiki(row), axis=1)

In [16]:
train_filt = train_filt.reindex(range(len(train_filt)))
train_filt = train_filt[['e1_id', 'e2_id', 'rel_id', 'e1', 'e2', 'rel_label']]

In [17]:
train_filt

Unnamed: 0,e1_id,e2_id,rel_id,e1,e2,rel_label
0,Q27401,Q3380760,P793,Bugatti,selling,significant event
1,Q1515481,Q18188982,P47,Roy,Sunset,shares border with
2,Q422360,Q14647,P178,Secreted protein acidic and cysteine rich,Sun Microsystems,developer
3,Q1342533,Q188984,P54,Michael Nylander,New York Rangers,member of sports team
4,Q16560,Q2604149,P17,palace,Italy Pavilion,country
5,Q10847270,Q2129498,P495,Gold,Germany Pavilion,country of origin
6,Q692417,Q1041292,P641,New York Mets,Baseball,sport
7,Q180377,Q1860,P103,Raymond Chandler,English,native language
8,Q1009718,Q664,P17,Queenstown,New Zealand,country
9,Q103784,Q421957,P106,Donald Sutherland,Actor,occupation


In [18]:
def get_object_id(subject, predicate):
    sparql.setQuery("""
    PREFIX wdt: <http://www.wikidata.org/prop/direct/>
    SELECT ?item WHERE {
      wd:%s wdt:%s ?item .
    }
    """ % (subject, predicate))
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    try:
        wiki_id = results['results']['bindings'][0]['item']['value'].replace('http://www.wikidata.org/entity/', '')
    except IndexError:
        wiki_id = 'N/A'
    return wiki_id

def get_object_label(subject, predicate):
    sparql.setQuery("""
    PREFIX wdt: <http://www.wikidata.org/prop/direct/>
    SELECT ?itemLabel WHERE {
      wd:%s wdt:%s ?item .
      
      SERVICE wikibase:label {
        bd:serviceParam wikibase:language "en" .
      }
    }
    """ % (subject, predicate))
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    try:
        wiki_label = results['results']['bindings'][0]['itemLabel']['value'].replace('http://www.wikidata.org/entity/', '')
    except IndexError:
        wiki_label = 'N/A'
    return wiki_label

In [19]:
get_object_id('Q18394838', 'P31'), get_object_label('Q18394838', 'P31')

('Q482994', 'album')

In [20]:
n_triples = len(train_filt)
#rem_idx = []
for i in range(n_triples):
    print(i, 'of', n_triples - 1)
    obj_id = get_object_id(train_filt['e1_id'][i], train_filt['rel_id'][i])
    if obj_id == train_filt['e2_id'][i]:
        continue
    else:
        if obj_id == 'N/A':
            rem_idx.append(i)
        else:
            obj_label = get_object_label(train_filt['e1_id'][i], train_filt['rel_id'][i])
            train_filt['e2_id'][i] = obj_id
            train_filt['e2'][i] = obj_label

0 of 5338
1 of 5338
2 of 5338
3 of 5338
4 of 5338
5 of 5338
6 of 5338
7 of 5338
8 of 5338
9 of 5338
10 of 5338
11 of 5338
12 of 5338
13 of 5338
14 of 5338
15 of 5338
16 of 5338
17 of 5338
18 of 5338
19 of 5338
20 of 5338
21 of 5338
22 of 5338
23 of 5338
24 of 5338
25 of 5338
26 of 5338
27 of 5338
28 of 5338
29 of 5338
30 of 5338
31 of 5338
32 of 5338
33 of 5338
34 of 5338
35 of 5338
36 of 5338
37 of 5338
38 of 5338
39 of 5338
40 of 5338
41 of 5338
42 of 5338
43 of 5338
44 of 5338
45 of 5338
46 of 5338
47 of 5338
48 of 5338
49 of 5338
50 of 5338
51 of 5338
52 of 5338
53 of 5338
54 of 5338
55 of 5338
56 of 5338
57 of 5338
58 of 5338
59 of 5338
60 of 5338
61 of 5338
62 of 5338
63 of 5338
64 of 5338
65 of 5338
66 of 5338
67 of 5338
68 of 5338
69 of 5338
70 of 5338
71 of 5338
72 of 5338
73 of 5338
74 of 5338
75 of 5338
76 of 5338
77 of 5338
78 of 5338
79 of 5338
80 of 5338
81 of 5338
82 of 5338
83 of 5338
84 of 5338
85 of 5338
86 of 5338
87 of 5338
88 of 5338
89 of 5338
90 of 5338
91 of 533

692 of 5338
693 of 5338
694 of 5338
695 of 5338
696 of 5338
697 of 5338
698 of 5338
699 of 5338
700 of 5338
701 of 5338
702 of 5338
703 of 5338
704 of 5338
705 of 5338
706 of 5338
707 of 5338
708 of 5338
709 of 5338
710 of 5338
711 of 5338
712 of 5338
713 of 5338
714 of 5338
715 of 5338
716 of 5338
717 of 5338
718 of 5338
719 of 5338
720 of 5338
721 of 5338
722 of 5338
723 of 5338
724 of 5338
725 of 5338
726 of 5338
727 of 5338
728 of 5338
729 of 5338
730 of 5338
731 of 5338
732 of 5338
733 of 5338
734 of 5338
735 of 5338
736 of 5338
737 of 5338
738 of 5338
739 of 5338
740 of 5338
741 of 5338
742 of 5338
743 of 5338
744 of 5338
745 of 5338
746 of 5338
747 of 5338
748 of 5338
749 of 5338
750 of 5338
751 of 5338
752 of 5338
753 of 5338
754 of 5338
755 of 5338
756 of 5338
757 of 5338
758 of 5338
759 of 5338
760 of 5338
761 of 5338
762 of 5338
763 of 5338
764 of 5338
765 of 5338
766 of 5338
767 of 5338
768 of 5338
769 of 5338
770 of 5338
771 of 5338
772 of 5338
773 of 5338
774 of 5338
775 

1346 of 5338
1347 of 5338
1348 of 5338
1349 of 5338
1350 of 5338
1351 of 5338
1352 of 5338
1353 of 5338
1354 of 5338
1355 of 5338
1356 of 5338
1357 of 5338
1358 of 5338
1359 of 5338
1360 of 5338
1361 of 5338
1362 of 5338
1363 of 5338
1364 of 5338
1365 of 5338
1366 of 5338
1367 of 5338
1368 of 5338
1369 of 5338
1370 of 5338
1371 of 5338
1372 of 5338
1373 of 5338
1374 of 5338
1375 of 5338
1376 of 5338
1377 of 5338
1378 of 5338
1379 of 5338
1380 of 5338
1381 of 5338
1382 of 5338
1383 of 5338
1384 of 5338
1385 of 5338
1386 of 5338
1387 of 5338
1388 of 5338
1389 of 5338
1390 of 5338
1391 of 5338
1392 of 5338
1393 of 5338
1394 of 5338
1395 of 5338
1396 of 5338
1397 of 5338
1398 of 5338
1399 of 5338
1400 of 5338
1401 of 5338
1402 of 5338
1403 of 5338
1404 of 5338
1405 of 5338
1406 of 5338
1407 of 5338
1408 of 5338
1409 of 5338
1410 of 5338
1411 of 5338
1412 of 5338
1413 of 5338
1414 of 5338
1415 of 5338
1416 of 5338
1417 of 5338
1418 of 5338
1419 of 5338
1420 of 5338
1421 of 5338
1422 of 5338

1977 of 5338
1978 of 5338
1979 of 5338
1980 of 5338
1981 of 5338
1982 of 5338
1983 of 5338
1984 of 5338
1985 of 5338
1986 of 5338
1987 of 5338
1988 of 5338
1989 of 5338
1990 of 5338
1991 of 5338
1992 of 5338
1993 of 5338
1994 of 5338
1995 of 5338
1996 of 5338
1997 of 5338
1998 of 5338
1999 of 5338
2000 of 5338
2001 of 5338
2002 of 5338
2003 of 5338
2004 of 5338
2005 of 5338
2006 of 5338
2007 of 5338
2008 of 5338
2009 of 5338
2010 of 5338
2011 of 5338
2012 of 5338
2013 of 5338
2014 of 5338
2015 of 5338
2016 of 5338
2017 of 5338
2018 of 5338
2019 of 5338
2020 of 5338
2021 of 5338
2022 of 5338
2023 of 5338
2024 of 5338
2025 of 5338
2026 of 5338
2027 of 5338
2028 of 5338
2029 of 5338
2030 of 5338
2031 of 5338
2032 of 5338
2033 of 5338
2034 of 5338
2035 of 5338
2036 of 5338
2037 of 5338
2038 of 5338
2039 of 5338
2040 of 5338
2041 of 5338
2042 of 5338
2043 of 5338
2044 of 5338
2045 of 5338
2046 of 5338
2047 of 5338
2048 of 5338
2049 of 5338
2050 of 5338
2051 of 5338
2052 of 5338
2053 of 5338

2608 of 5338
2609 of 5338
2610 of 5338
2611 of 5338
2612 of 5338
2613 of 5338
2614 of 5338
2615 of 5338
2616 of 5338
2617 of 5338
2618 of 5338
2619 of 5338
2620 of 5338
2621 of 5338
2622 of 5338
2623 of 5338
2624 of 5338
2625 of 5338
2626 of 5338
2627 of 5338
2628 of 5338
2629 of 5338
2630 of 5338
2631 of 5338
2632 of 5338
2633 of 5338
2634 of 5338
2635 of 5338
2636 of 5338
2637 of 5338
2638 of 5338
2639 of 5338
2640 of 5338
2641 of 5338
2642 of 5338
2643 of 5338
2644 of 5338
2645 of 5338
2646 of 5338
2647 of 5338
2648 of 5338
2649 of 5338
2650 of 5338
2651 of 5338
2652 of 5338
2653 of 5338
2654 of 5338
2655 of 5338
2656 of 5338
2657 of 5338
2658 of 5338
2659 of 5338
2660 of 5338
2661 of 5338
2662 of 5338
2663 of 5338
2664 of 5338
2665 of 5338
2666 of 5338
2667 of 5338
2668 of 5338
2669 of 5338
2670 of 5338
2671 of 5338
2672 of 5338
2673 of 5338
2674 of 5338
2675 of 5338
2676 of 5338
2677 of 5338
2678 of 5338
2679 of 5338
2680 of 5338
2681 of 5338
2682 of 5338
2683 of 5338
2684 of 5338

3239 of 5338
3240 of 5338
3241 of 5338
3242 of 5338
3243 of 5338
3244 of 5338
3245 of 5338
3246 of 5338
3247 of 5338
3248 of 5338
3249 of 5338
3250 of 5338
3251 of 5338
3252 of 5338
3253 of 5338
3254 of 5338
3255 of 5338
3256 of 5338
3257 of 5338
3258 of 5338
3259 of 5338
3260 of 5338
3261 of 5338
3262 of 5338
3263 of 5338
3264 of 5338
3265 of 5338
3266 of 5338
3267 of 5338
3268 of 5338
3269 of 5338
3270 of 5338
3271 of 5338
3272 of 5338
3273 of 5338
3274 of 5338
3275 of 5338
3276 of 5338
3277 of 5338
3278 of 5338
3279 of 5338
3280 of 5338
3281 of 5338
3282 of 5338
3283 of 5338
3284 of 5338
3285 of 5338
3286 of 5338
3287 of 5338
3288 of 5338
3289 of 5338
3290 of 5338
3291 of 5338
3292 of 5338
3293 of 5338
3294 of 5338
3295 of 5338
3296 of 5338
3297 of 5338
3298 of 5338
3299 of 5338
3300 of 5338
3301 of 5338
3302 of 5338
3303 of 5338
3304 of 5338
3305 of 5338
3306 of 5338
3307 of 5338
3308 of 5338
3309 of 5338
3310 of 5338
3311 of 5338
3312 of 5338
3313 of 5338
3314 of 5338
3315 of 5338

3870 of 5338
3871 of 5338
3872 of 5338
3873 of 5338
3874 of 5338
3875 of 5338
3876 of 5338
3877 of 5338
3878 of 5338
3879 of 5338
3880 of 5338
3881 of 5338
3882 of 5338
3883 of 5338
3884 of 5338
3885 of 5338
3886 of 5338
3887 of 5338
3888 of 5338
3889 of 5338
3890 of 5338
3891 of 5338
3892 of 5338
3893 of 5338
3894 of 5338
3895 of 5338
3896 of 5338
3897 of 5338
3898 of 5338
3899 of 5338
3900 of 5338
3901 of 5338
3902 of 5338
3903 of 5338
3904 of 5338
3905 of 5338
3906 of 5338
3907 of 5338
3908 of 5338
3909 of 5338
3910 of 5338
3911 of 5338
3912 of 5338
3913 of 5338
3914 of 5338
3915 of 5338
3916 of 5338
3917 of 5338
3918 of 5338
3919 of 5338
3920 of 5338
3921 of 5338
3922 of 5338
3923 of 5338
3924 of 5338
3925 of 5338
3926 of 5338
3927 of 5338
3928 of 5338
3929 of 5338
3930 of 5338
3931 of 5338
3932 of 5338
3933 of 5338
3934 of 5338
3935 of 5338
3936 of 5338
3937 of 5338
3938 of 5338
3939 of 5338
3940 of 5338
3941 of 5338
3942 of 5338
3943 of 5338
3944 of 5338
3945 of 5338
3946 of 5338

4501 of 5338
4502 of 5338
4503 of 5338
4504 of 5338
4505 of 5338
4506 of 5338
4507 of 5338
4508 of 5338
4509 of 5338
4510 of 5338
4511 of 5338
4512 of 5338
4513 of 5338
4514 of 5338
4515 of 5338
4516 of 5338
4517 of 5338
4518 of 5338
4519 of 5338
4520 of 5338
4521 of 5338
4522 of 5338
4523 of 5338
4524 of 5338
4525 of 5338
4526 of 5338
4527 of 5338
4528 of 5338
4529 of 5338
4530 of 5338
4531 of 5338
4532 of 5338
4533 of 5338
4534 of 5338
4535 of 5338
4536 of 5338
4537 of 5338
4538 of 5338
4539 of 5338
4540 of 5338
4541 of 5338
4542 of 5338
4543 of 5338
4544 of 5338
4545 of 5338
4546 of 5338
4547 of 5338
4548 of 5338
4549 of 5338
4550 of 5338
4551 of 5338
4552 of 5338
4553 of 5338
4554 of 5338
4555 of 5338
4556 of 5338
4557 of 5338
4558 of 5338
4559 of 5338
4560 of 5338
4561 of 5338
4562 of 5338
4563 of 5338
4564 of 5338
4565 of 5338
4566 of 5338
4567 of 5338
4568 of 5338
4569 of 5338
4570 of 5338
4571 of 5338
4572 of 5338
4573 of 5338
4574 of 5338
4575 of 5338
4576 of 5338
4577 of 5338

5132 of 5338
5133 of 5338
5134 of 5338
5135 of 5338
5136 of 5338
5137 of 5338
5138 of 5338
5139 of 5338
5140 of 5338
5141 of 5338
5142 of 5338
5143 of 5338
5144 of 5338
5145 of 5338
5146 of 5338
5147 of 5338
5148 of 5338
5149 of 5338
5150 of 5338
5151 of 5338
5152 of 5338
5153 of 5338
5154 of 5338
5155 of 5338
5156 of 5338
5157 of 5338
5158 of 5338
5159 of 5338
5160 of 5338
5161 of 5338
5162 of 5338
5163 of 5338
5164 of 5338
5165 of 5338
5166 of 5338
5167 of 5338
5168 of 5338
5169 of 5338
5170 of 5338
5171 of 5338
5172 of 5338
5173 of 5338
5174 of 5338
5175 of 5338
5176 of 5338
5177 of 5338
5178 of 5338
5179 of 5338
5180 of 5338
5181 of 5338
5182 of 5338
5183 of 5338
5184 of 5338
5185 of 5338
5186 of 5338
5187 of 5338
5188 of 5338
5189 of 5338
5190 of 5338
5191 of 5338
5192 of 5338
5193 of 5338
5194 of 5338
5195 of 5338
5196 of 5338
5197 of 5338
5198 of 5338
5199 of 5338
5200 of 5338
5201 of 5338
5202 of 5338
5203 of 5338
5204 of 5338
5205 of 5338
5206 of 5338
5207 of 5338
5208 of 5338

In [21]:
len(rem_idx)

1233

In [22]:
train_filt

Unnamed: 0,e1_id,e2_id,rel_id,e1,e2,rel_label
0,Q27401,Q3380760,P793,Bugatti,selling,significant event
1,Q1515481,Q18188982,P47,Roy,Sunset,shares border with
2,Q422360,Q14647,P178,Secreted protein acidic and cysteine rich,Sun Microsystems,developer
3,Q1342533,Q36930,P54,Michael Nylander,EHC Kloten,member of sports team
4,Q16560,Q2604149,P17,palace,Italy Pavilion,country
5,Q10847270,Q2129498,P495,Gold,Germany Pavilion,country of origin
6,Q692417,Q5369,P641,New York Mets,baseball,sport
7,Q180377,Q1860,P103,Raymond Chandler,English,native language
8,Q1009718,Q408,P17,Queenstown,Australia,country
9,Q103784,Q17221,P106,Donald Sutherland,spokesperson,occupation


In [23]:
train_filt = train_filt.drop(train_filt.index[rem_idx])

In [24]:
train_filt

Unnamed: 0,e1_id,e2_id,rel_id,e1,e2,rel_label
3,Q1342533,Q36930,P54,Michael Nylander,EHC Kloten,member of sports team
6,Q692417,Q5369,P641,New York Mets,baseball,sport
7,Q180377,Q1860,P103,Raymond Chandler,English,native language
8,Q1009718,Q408,P17,Queenstown,Australia,country
9,Q103784,Q17221,P106,Donald Sutherland,spokesperson,occupation
11,Q852190,Q11446,P279,shipwreck,ship,subclass of
12,Q318835,Q1344,P136,Elliott Carter,opera,genre
13,Q371710,Q18424,P19,Cliff Gorman,Queens,place of birth
14,Q41571,Q576349,P279,epilepsy,brain disease,subclass of
16,Q23027,Q183,P17,Schopp,Germany,country


In [25]:
train_filt.to_csv('dataset/test_triples.tsv', sep='\t', header=None, index=False)