In [1]:
import  os
import  re
import glob

import  pandas as pd
import  numpy as np

from src.utils import *
from src.models import TransE, rTransE

  from .autonotebook import tqdm as notebook_tqdm


#### OWL2Bench

In [2]:
OWL2Bench_dbs = [ { 'path' : './datasets/OWL2Bench/OWL2Bench1/',
                'train_file'  :'_train_OWL2Bench1',
                'test_file' : '_test_OWL2Bench1'},
                 { 'path' : './datasets/OWL2Bench/OWL2Bench2/',
                'train_file'  :'_train_OWL2Bench2',
                'test_file' : '_test_OWL2Bench2'} ]


for db_ in OWL2Bench_dbs:
    
    path = db_['path']
    train_file= db_['train_file']
    test_file= db_['test_file']
    
    print('Running...', train_file, test_file)
    
    # load data
    df_train= load_ore_files(path+train_file)
    data_subclass_train = df_train[df_train['p']== 'ClassAssertion']
    data_subclass_train= data_subclass_train[['s','o']].rename(columns={'s':'class','o':'assertion'})
    transitive_classes= pd.merge(data_subclass_train,
                                 data_subclass_train,
                                 how='right',right_on=['class'],left_on=['assertion']).dropna(subset=['class_x'])
    del transitive_classes['class_y']
    transitive_classes.columns = ['class_0', 'class_1', 'assertion']
    transitive_classes = transitive_classes.drop_duplicates(subset=['class_0', 'class_1', 'assertion']) 
    data_subclass_train_quads = transitive_classes.reset_index(drop=True)

    df_test= load_ore_files(path+test_file)
    data_subclass_test = df_test[df_test['p']== 'ClassAssertion']
    data_subclass_test= data_subclass_test[['s','o']].rename(columns={'s':'class','o':'assertion'})

    res = prepare_subclass_data(data_subclass_train,data_subclass_train_quads,
                                tc1='class',
                                tc2='assertion',
                                qc1='assertion',
                                qc2='class_1',
                                qc3='class_0')
    node_dict, node_count, train_trips, train_quads = res
    res = prepare_subclass_data(data_subclass_test,transitive_classes=None,tc1='class',
                                tc2='assertion')
    _, _, test_trips, test_quads = res
    print(len(train_trips),len(train_quads))
    
    # tarin TransE
    print('')
    model_ORE_TransE  = TransE(node_count,1)
    model_ORE_TransE._train(train_trips,train_quads);

    model_ORE_TransE._eval(test_trips) # evaluate TransE
    
    # train rTransE
    model_ORE_rTransE  = rTransE(node_count,1)
    model_ORE_rTransE._train(train_trips,train_quads,num_epoches=300);
    model_ORE_rTransE._eval(test_trips)  # evaluate RTransE
    
    print()

Running... _train_OWL2Bench1 _test_OWL2Bench1
7989 157

epoch 0,	 train loss 0.93
epoch 50,	 train loss 1.21
epoch 100,	 train loss 1.02
hits@1  tensor(0.0039) ,hits@10  tensor(0.1231) ,MR  tensor(37.9864) ,MRR  tensor(0.0552)
epoch 0,	 train loss 8.82
epoch 50,	 train loss 5.68
epoch 150,	 train loss 4.58
epoch 200,	 train loss 1.12
epoch 250,	 train loss 4.32
hits@1  tensor(0.3868) ,hits@10  tensor(0.5707) ,MR  tensor(19.8327) ,MRR  tensor(0.4475)

Running... _train_OWL2Bench2 _test_OWL2Bench2
15526 146

epoch 0,	 train loss 1.00
epoch 100,	 train loss 1.01
hits@1  tensor(0.0023) ,hits@10  tensor(0.1688) ,MR  tensor(53.0631) ,MRR  tensor(0.0532)
epoch 50,	 train loss 4.45
epoch 100,	 train loss 3.88
epoch 150,	 train loss 0.95
epoch 200,	 train loss 1.00
hits@1  tensor(0.2042) ,hits@10  tensor(0.3063) ,MR  tensor(40.5172) ,MRR  tensor(0.2551)



#### ORE

In [3]:
ORE_dbs = [     { 'path'      : './datasets/ORE/ORE1/',
                'train_file'  : '_train_ORE1',
                'test_file'   : '_test_ORE1'},
                { 'path'      : './datasets/ORE/ORE2/',
                'train_file'  : '_train_ORE2',
                'test_file'   : '_test_ORE2'},
                { 'path'      : './datasets/ORE/ORE3/',
                'train_file'  : '_train_ORE3',
                'test_file'   : '_test_ORE3'}]

In [4]:
for db_ in ORE_dbs:
    
    path = db_['path']
    train_file= db_['train_file']
    test_file= db_['test_file']
    
    print('Running...', train_file, test_file)
    
    # load data
    df_train= load_ore_files(path+train_file)
    data_subclass_train = df_train[df_train['p']== 'ClassAssertion']
    data_subclass_train= data_subclass_train[['s','o']].rename(columns={'s':'class','o':'assertion'})
    transitive_classes= pd.merge(data_subclass_train,
                                 data_subclass_train,
                                 how='right',right_on=['class'],left_on=['assertion']).dropna(subset=['class_x'])
    del transitive_classes['class_y']
    transitive_classes.columns = ['class_0', 'class_1', 'assertion']
    transitive_classes = transitive_classes.drop_duplicates(subset=['class_0', 'class_1', 'assertion']) 
    data_subclass_train_quads = transitive_classes.reset_index(drop=True)

    df_test= load_ore_files(path+test_file)
    data_subclass_test = df_test[df_test['p']== 'ClassAssertion']
    data_subclass_test= data_subclass_test[['s','o']].rename(columns={'s':'class','o':'assertion'})

    res = prepare_subclass_data(data_subclass_train,data_subclass_train_quads,
                                tc1='class',
                                tc2='assertion',
                                qc1='assertion',
                                qc2='class_1',
                                qc3='class_0')
    node_dict, node_count, train_trips, train_quads = res
    res = prepare_subclass_data(data_subclass_test,transitive_classes=None,tc1='class',
                                tc2='assertion')
    _, _, test_trips, test_quads = res
    print(len(train_trips),len(train_quads))
    
    # tarin TransE
    print('')
    model_ORE_TransE  = TransE(node_count,1)
    model_ORE_TransE._train(train_trips,train_quads);

    model_ORE_TransE._eval(test_trips) # evaluate TransE
    
    # train rTransE
    model_ORE_rTransE  = rTransE(node_count,1)
    model_ORE_rTransE._train(train_trips,train_quads,num_epoches=300);
    model_ORE_rTransE._eval(test_trips)  # evaluate RTransE
    
    print()


Running... _train_ORE1 _test_ORE1
53048 42851

epoch 0,	 train loss 0.99
epoch 50,	 train loss 1.10
epoch 100,	 train loss 1.03
hits@1  tensor(0.0071) ,hits@10  tensor(0.1456) ,MR  tensor(43.3990) ,MRR  tensor(0.0589)
epoch 0,	 train loss 5.09
epoch 50,	 train loss 11.33
epoch 100,	 train loss 10.52
epoch 150,	 train loss 9.19
epoch 200,	 train loss 7.76
hits@1  tensor(0.1248) ,hits@10  tensor(0.3693) ,MR  tensor(31.8564) ,MRR  tensor(0.2076)

Running... _train_ORE2 _test_ORE2
53081 42432

epoch 0,	 train loss 1.00
epoch 50,	 train loss 1.13
epoch 100,	 train loss 0.95
hits@1  tensor(0.0070) ,hits@10  tensor(0.0919) ,MR  tensor(51.0719) ,MRR  tensor(0.0465)
epoch 0,	 train loss 11.47
epoch 50,	 train loss 9.91
epoch 150,	 train loss 6.70
epoch 200,	 train loss 6.29
hits@1  tensor(0.0437) ,hits@10  tensor(0.1728) ,MR  tensor(43.3212) ,MRR  tensor(0.0965)

Running... _train_ORE3 _test_ORE3
53014 43181

epoch 0,	 train loss 0.98
epoch 50,	 train loss 1.25
epoch 100,	 train loss 1.02
hits@

#### CaLiGraph

In [82]:
CLG_dbs = [ { 'path'      : 'datasets/clg/clg_10e4/',
                'train_file'  : 'clg_10e4-train.nt',
                'test_file'   : 'clg_10e4-test.nt-e'},
            { 'path'      : 'datasets/clg/clg_10e5/',
                'train_file'  : 'clg_10e5-train.nt',
                'test_file'   : 'clg_10e5-test.nt'}]

In [85]:
max_test_batch_size = 10000

In [88]:
for db_ in CLG_dbs:
    path = db_['path']
    train_file= db_['train_file']
    test_file= db_['test_file']
    
    print('Running...', train_file, test_file)
    
    df_train= load_clg_files(path+train_file)
    data_subclass_train = df_train[df_train['p']== '<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>']
    data_subclass_train = data_subclass_train[['s','o']].rename(columns={'s':'type','o':'class'})   
    transitive_classes= pd.merge(data_subclass_train,data_subclass_train,
                                 how='left',right_on=['type'],
                                 left_on=['class']).dropna(subset=['class_y'])
    del transitive_classes['class_x']


    transitive_classes.columns = ['type_0', 'class_0', 'class_1']
    transitive_classes = transitive_classes.drop_duplicates(subset=['type_0', 'class_0', 'class_1']) # drop duplicates
    data_subclass_train_quads = transitive_classes.reset_index(drop=True)

    res = prepare_subclass_data(data_subclass_train,data_subclass_train_quads,
                                tc1='class',
                                tc2='type',
                                qc1='type_0',
                                qc2='class_0',
                                qc3='class_1')
    node_dict, node_count, train_trips, train_quads = res

    df_test= load_clg_files(path+test_file)

    data_type_test = df_test[df_test['p']== '<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>']
    data_type_test= data_type_test[['s','o']].rename(columns={'s':'type','o':'class'})

    _, _, test_trips, test_quads = prepare_subclass_data(data_type_test,tc1='class',
                                    tc2='type')

    print(len(train_trips),len(train_quads))
    
    # tarin TransE
    print('')
    model_ORE_TransE  = TransE(node_count,1)
    model_ORE_TransE._train(train_trips,train_quads);

    model_ORE_TransE._eval(test_trips[:max_test_batch_size]) # evaluate TransE
    
    # train rTransE
    model_ORE_rTransE  = rTransE(node_count,1)
    model_ORE_rTransE._train(train_trips,train_quads,num_epoches=300);
    model_ORE_rTransE._eval(test_trips[:max_test_batch_size])  # evaluate RTransE
    
    print()

Running... clg_10e4-train.nt clg_10e4-test.nt-e
51577 59923

epoch 0,	 train loss 0.85
epoch 100,	 train loss 1.09
hits@1  tensor(0.0164) ,hits@10  tensor(0.1731) ,MR  tensor(41.7982) ,MRR  tensor(0.0739)
epoch 50,	 train loss 13.32
epoch 100,	 train loss 11.97
epoch 200,	 train loss 9.05
epoch 250,	 train loss 7.41
hits@1  tensor(0.4237) ,hits@10  tensor(0.6039) ,MR  tensor(20.1118) ,MRR  tensor(0.4994)

Running... clg_10e5-train.nt clg_10e5-test.nt
29973 143

epoch 50,	 train loss 1.18
epoch 100,	 train loss 1.04
hits@1  tensor(0.0079) ,hits@10  tensor(0.0914) ,MR  tensor(47.1705) ,MRR  tensor(0.0503)
epoch 50,	 train loss 3.53
epoch 100,	 train loss 0.94
epoch 150,	 train loss 0.96
epoch 200,	 train loss 1.02
epoch 250,	 train loss 1.04
hits@1  tensor(0.0173) ,hits@10  tensor(0.0953) ,MR  tensor(54.0183) ,MRR  tensor(0.0561)

