#### Os testes foram realizados com a proporção de 2 exemplos negativos para cada exemplo positivo. O dataset foi dividido em 3 folds e o número máximo de literais por nó é de 2 literais, número máximo de 8 cláusulas e uma profundidade máxima da árvore de tamanho 3. O domínio utilizado foi o NELL Finances e 10 boosted trees foram aprendidos.


In [1]:
import os
import sys
import time

from datasets.get_datasets import *
from revision import *
from transfer import *
from mapping import *
from boostsrl import boostsrl
import numpy as np
import random
import json

balanced = 2
firstRun = False
n_runs = 2
n_folds = 3

nodeSize = 2
numOfClauses = 8
maxTreeDepth = 3

seed = 0
target = 'nell_finances'
new_target = 'companyeconomicsector'

#### A serguir tem-se os modes utilizados no aprendizado. O predicado artificial ceoeconomicsector(person, sector) foi removido. Sem o predicado inverso do target companyeconomicsector, nenhum outro predicado possui o tipo sector, portanto não era possível aprender nenhuma regra relevante e com isso o resultado alternava em torno de AUC ROC 0.5.

#### Com o predicado inverso economicsectorcompany presente nos modes, o aprendizado da regra companyeconomicsector(A, B) :- economicsectorcompany(B, A) seria possível resultando em AUC ROC 1.0. Para evitar o aprendizado dessa regra foi utilizado o operador mode (\`) que impede que o algoritmo use variáveis da cabeça da cláusula.

In [2]:
bk = ['countryhascompanyoffice(+country,+company).',
        'countryhascompanyoffice(+country,-company).',
        'countryhascompanyoffice(-country,+company).',
        'companyeconomicsector(+company,+sector).',
        'companyeconomicsector(+company,-sector).',
        'companyeconomicsector(-company,+sector).',
        #'economicsectorcompany(+sector,+company).',
        'economicsectorcompany(+sector,`company).',
        'economicsectorcompany(`sector,+company).',
        #'ceoeconomicsector(+person,+sector).',
        #'ceoeconomicsector(+person,-sector).',
        #'ceoeconomicsector(-person,+sector).',
        'companyceo(+company,+person).',
        'companyceo(+company,-person).',
        'companyceo(-company,+person).',
        'companyalsoknownas(+company,+company).',
        'companyalsoknownas(+company,-company).',
        'companyalsoknownas(-company,+company).',
        'cityhascompanyoffice(+city,+company).',
        'cityhascompanyoffice(+city,-company).',
        'cityhascompanyoffice(-city,+company).',
        'acquired(+company,+company).',
        'acquired(+company,-company).',
        'acquired(-company,+company).',
        #'ceoof(+person,+company).',
        #'ceoof(+person,-company).',
        #'ceoof(-person,+company).',
        'bankbankincountry(+person,+country).',
        'bankbankincountry(+person,-country).',
        'bankbankincountry(-person,+country).',
        'bankboughtbank(+company,+company).',
        'bankboughtbank(+company,-company).',
        'bankboughtbank(-company,+company).',
        'bankchiefexecutiveceo(+company,+person).',
        'bankchiefexecutiveceo(+company,-person).',
        'bankchiefexecutiveceo(-company,+person).']

In [3]:
background = boostsrl.modes(bk, [new_target], useStdLogicVariables=False, maxTreeDepth=maxTreeDepth, nodeSize=nodeSize, numOfClauses=numOfClauses)

#### Aprendizado do zero com as árvores do primeiro fold

In [11]:
# Load new predicate target dataset
tar_data = datasets.load(target, bk, target=new_target, balanced=balanced, seed=seed)
results = []

for i in range(n_folds):
    [tar_train_facts, tar_test_facts] =  [tar_data[0][0], tar_data[0][0]]
    to_folds_pos = datasets.split_into_folds(tar_data[1][0], n_folds=n_folds, seed=seed)
    to_folds_neg = datasets.split_into_folds(tar_data[2][0], n_folds=n_folds, seed=seed)
    [tar_train_pos, tar_test_pos] =  datasets.get_kfold_small(i, to_folds_pos)
    [tar_train_neg, tar_test_neg] =  datasets.get_kfold_small(i, to_folds_neg)
    
    [model, t_results, structured, will, variances] = revision.learn_test_model(background, boostsrl, new_target, tar_train_pos, tar_train_neg, tar_train_facts, tar_test_pos, tar_test_neg, tar_test_facts, trees=10, print_function=None)
    results.append(t_results)
    
    if i == 0:
        for line in will:
            print(line)
    print('\n')
    print('Fold ' + str(i+1))
    print(t_results)
print('\n')
auc_roc = np.array([item['AUC ROC'] for item in results])
auc_pr = np.array([item['AUC PR'] for item in results])
cll = np.array([item['CLL'] for item in results])
print('AUC ROC: %.3f +/- %.3f' % (auc_roc.mean(), 2 * auc_roc.std()))
print('AUC PR: %.3f +/- %.3f' % (auc_pr.mean(), 2 * auc_pr.std()))
print('CLL: %.3f +/- %.3f' % (cll.mean(), 2 * cll.std()))

WILL Produced-Tree #1
% FOR companyeconomicsector(A, B):
%   if ( acquired(C, A), economicsectorcompany(B, C) )
%   then return 0.8581489350995123;  // std dev = 0,000, 16,000 (wgt'ed) examples reached here.  /* #pos=16 */
%   else if ( acquired(D, A) )
%   | then if ( companyalsoknownas(A, E), economicsectorcompany(B, E) )
%   | | then return 0.8581489350995121;  // std dev = 2,11e-08, 3,000 (wgt'ed) examples reached here.  /* #pos=3 */
%   | | else if ( acquired(F, D), cityhascompanyoffice(G, D) )
%   | | | then return 0.2581489350995123;  // std dev = 1,549, 10,000 (wgt'ed) examples reached here.  /* #neg=6 #pos=4 */
%   | | | else return 0.010926712877290107;  // std dev = 3,053, 72,000 (wgt'ed) examples reached here.  /* #neg=61 #pos=11 */
%   | else if ( acquired(A, H), economicsectorcompany(B, H) )
%   | | then if ( cityhascompanyoffice(I, A) )
%   | | | then return 0.8581489350995123;  // std dev = 0,000, 6,000 (wgt'ed) examples reached here.  /* #pos=6 */
%   | | | else return

#### A váriavel refine são os parâmetros que passo para o algoritmo determinando como quero formar suas árvores.

#### id da árvore; caminho do nó; target(A,B) :- literais do nó.; aprende uma subárvore na esquerda?; aprende uma subárvore na direita?

#### Com esse refine portanto estou forçando um único nó economicsectorcompany(B, A) que não é permitido pelos modes e que resulta num AUC ROC 1.0.

In [13]:
refine = ['0;;companyeconomicsector(A, B) :- economicsectorcompany(B, A).;false;false',
          '1;;companyeconomicsector(A, B) :- economicsectorcompany(B, A).;false;false',
          '2;;companyeconomicsector(A, B) :- economicsectorcompany(B, A).;false;false',
          '3;;companyeconomicsector(A, B) :- economicsectorcompany(B, A).;false;false',
          '4;;companyeconomicsector(A, B) :- economicsectorcompany(B, A).;false;false',
          '5;;companyeconomicsector(A, B) :- economicsectorcompany(B, A).;false;false',
          '6;;companyeconomicsector(A, B) :- economicsectorcompany(B, A).;false;false',
          '7;;companyeconomicsector(A, B) :- economicsectorcompany(B, A).;false;false',
          '8;;companyeconomicsector(A, B) :- economicsectorcompany(B, A).;false;false',
          '9;;companyeconomicsector(A, B) :- economicsectorcompany(B, A).;false;false',
         ]

# Load new predicate target dataset
tar_data = datasets.load(target, bk, target=new_target, balanced=balanced, seed=seed)
results = []

for i in range(n_folds):
    [tar_train_facts, tar_test_facts] =  [tar_data[0][0], tar_data[0][0]]
    to_folds_pos = datasets.split_into_folds(tar_data[1][0], n_folds=n_folds, seed=seed)
    to_folds_neg = datasets.split_into_folds(tar_data[2][0], n_folds=n_folds, seed=seed)
    [tar_train_pos, tar_test_pos] =  datasets.get_kfold_small(i, to_folds_pos)
    [tar_train_neg, tar_test_neg] =  datasets.get_kfold_small(i, to_folds_neg)
    
    [model, t_results, structured, will, variances] = revision.learn_test_model(background, boostsrl, new_target, tar_train_pos, tar_train_neg, tar_train_facts, tar_test_pos, tar_test_neg, tar_test_facts, refine=refine, trees=10, print_function=None)
    results.append(t_results)
    
    if i == 0:
        for line in will:
            print(line)
    print('\n')
    print('Fold ' + str(i+1))
    print(t_results)
print('\n')
auc_roc = np.array([item['AUC ROC'] for item in results])
auc_pr = np.array([item['AUC PR'] for item in results])
cll = np.array([item['CLL'] for item in results])
print('AUC ROC: %.3f +/- %.3f' % (auc_roc.mean(), 2 * auc_roc.std()))
print('AUC PR: %.3f +/- %.3f' % (auc_pr.mean(), 2 * auc_pr.std()))
print('CLL: %.3f +/- %.3f' % (cll.mean(), 2 * cll.std()))

WILL Produced-Tree #1
% FOR companyeconomicsector(A, B):
%   if ( economicsectorcompany(B, A) )
%   then return 0.8581489350995087;  // std dev = 1,07e-06, 260,000 (wgt'ed) examples reached here.  /* #pos=260 */
%   else return -0.1418510649004879;  // std dev = 0,000, 519,000 (wgt'ed) examples reached here.  /* #neg=519 */
WILL Produced-Tree #2
% FOR companyeconomicsector(A, B):
%   if ( economicsectorcompany(B, A) )
%   then return 0.7194734122109575;  // std dev = 0,000, 260,000 (wgt'ed) examples reached here.  /* #pos=260 */
%   else return -0.1254446385283923;  // std dev = 0,000, 519,000 (wgt'ed) examples reached here.  /* #neg=519 */
WILL Produced-Tree #3
% FOR companyeconomicsector(A, B):
%   if ( economicsectorcompany(B, A) )
%   then return 0.5553664367462832;  // std dev = 3,95e-07, 260,000 (wgt'ed) examples reached here.  /* #pos=260 */
%   else return -0.11231637819360576;  // std dev = 0,000, 519,000 (wgt'ed) examples reached here.  /* #neg=519 */
WILL Produced-Tree #4
% 

#### Apenas o nó companyceo(A, C), companyceo(D, C), economicsectorcompany(B, D).

In [14]:
refine = ['0;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C), economicsectorcompany(B, D).;false;false',
          '1;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C), economicsectorcompany(B, D).;false;false',
          '2;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C), economicsectorcompany(B, D).;false;false',
          '3;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C), economicsectorcompany(B, D).;false;false',
          '4;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C), economicsectorcompany(B, D).;false;false',
          '5;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C), economicsectorcompany(B, D).;false;false',
          '6;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C), economicsectorcompany(B, D).;false;false',
          '7;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C), economicsectorcompany(B, D).;false;false',
          '8;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C), economicsectorcompany(B, D).;false;false',
          '9;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C), economicsectorcompany(B, D).;false;false',
         ]

# Load new predicate target dataset
tar_data = datasets.load(target, bk, target=new_target, balanced=balanced, seed=seed)
results = []

for i in range(n_folds):
    [tar_train_facts, tar_test_facts] =  [tar_data[0][0], tar_data[0][0]]
    to_folds_pos = datasets.split_into_folds(tar_data[1][0], n_folds=n_folds, seed=seed)
    to_folds_neg = datasets.split_into_folds(tar_data[2][0], n_folds=n_folds, seed=seed)
    [tar_train_pos, tar_test_pos] =  datasets.get_kfold_small(i, to_folds_pos)
    [tar_train_neg, tar_test_neg] =  datasets.get_kfold_small(i, to_folds_neg)
    
    [model, t_results, structured, will, variances] = revision.learn_test_model(background, boostsrl, new_target, tar_train_pos, tar_train_neg, tar_train_facts, tar_test_pos, tar_test_neg, tar_test_facts, refine=refine, trees=10, print_function=None)
    results.append(t_results)
    
    if i == 0:
        for line in will:
            print(line)
    print('\n')
    print('Fold ' + str(i+1))
    print(t_results)
print('\n')
auc_roc = np.array([item['AUC ROC'] for item in results])
auc_pr = np.array([item['AUC PR'] for item in results])
cll = np.array([item['CLL'] for item in results])
print('AUC ROC: %.3f +/- %.3f' % (auc_roc.mean(), 2 * auc_roc.std()))
print('AUC PR: %.3f +/- %.3f' % (auc_pr.mean(), 2 * auc_pr.std()))
print('CLL: %.3f +/- %.3f' % (cll.mean(), 2 * cll.std()))

WILL Produced-Tree #1
% FOR companyeconomicsector(A, B):
%   if ( companyceo(A, C), companyceo(D, C), economicsectorcompany(B, D) )
%   then return 0.8581489350995121;  // std dev = 2,06e-07, 46,000 (wgt'ed) examples reached here.  /* #pos=46 */
%   else return 0.15009982186622042;  // std dev = 12,309, 733,000 (wgt'ed) examples reached here.  /* #neg=519 #pos=214 */
WILL Produced-Tree #2
% FOR companyeconomicsector(A, B):
%   if ( companyceo(A, C), companyceo(D, C), economicsectorcompany(B, D) )
%   then return 0.719473412210955;  // std dev = 0,000, 46,000 (wgt'ed) examples reached here.  /* #pos=46 */
%   else return 0.13082844552342424;  // std dev = 12,309, 733,000 (wgt'ed) examples reached here.  /* #neg=519 #pos=214 */
WILL Produced-Tree #3
% FOR companyeconomicsector(A, B):
%   if ( companyceo(A, C), companyceo(D, C), economicsectorcompany(B, D) )
%   then return 0.5553664367462839;  // std dev = 0,000, 46,000 (wgt'ed) examples reached here.  /* #pos=46 */
%   else return 0.112

#### O nó companyceo(A, C), companyceo(D, C), economicsectorcompany(B, D). podendo aprender subárvores no ramo da esquerda (TRUE) ou direita (FALSE).

In [15]:
refine = ['0;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C), economicsectorcompany(B, D).;true;true',
          '1;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C), economicsectorcompany(B, D).;true;true',
          '2;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C), economicsectorcompany(B, D).;true;true',
          '3;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C), economicsectorcompany(B, D).;true;true',
          '4;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C), economicsectorcompany(B, D).;true;true',
          '5;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C), economicsectorcompany(B, D).;true;true',
          '6;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C), economicsectorcompany(B, D).;true;true',
          '7;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C), economicsectorcompany(B, D).;true;true',
          '8;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C), economicsectorcompany(B, D).;true;true',
          '9;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C), economicsectorcompany(B, D).;true;true',
         ]

# Load new predicate target dataset
tar_data = datasets.load(target, bk, target=new_target, balanced=balanced, seed=seed)
results = []

for i in range(n_folds):
    [tar_train_facts, tar_test_facts] =  [tar_data[0][0], tar_data[0][0]]
    to_folds_pos = datasets.split_into_folds(tar_data[1][0], n_folds=n_folds, seed=seed)
    to_folds_neg = datasets.split_into_folds(tar_data[2][0], n_folds=n_folds, seed=seed)
    [tar_train_pos, tar_test_pos] =  datasets.get_kfold_small(i, to_folds_pos)
    [tar_train_neg, tar_test_neg] =  datasets.get_kfold_small(i, to_folds_neg)
    
    [model, t_results, structured, will, variances] = revision.learn_test_model(background, boostsrl, new_target, tar_train_pos, tar_train_neg, tar_train_facts, tar_test_pos, tar_test_neg, tar_test_facts, refine=refine, trees=10, print_function=None)
    results.append(t_results)
    
    if i == 0:
        for line in will:
            print(line)
    print('\n')
    print('Fold ' + str(i+1))
    print(t_results)
print('\n')
auc_roc = np.array([item['AUC ROC'] for item in results])
auc_pr = np.array([item['AUC PR'] for item in results])
cll = np.array([item['CLL'] for item in results])
print('AUC ROC: %.3f +/- %.3f' % (auc_roc.mean(), 2 * auc_roc.std()))
print('AUC PR: %.3f +/- %.3f' % (auc_pr.mean(), 2 * auc_pr.std()))
print('CLL: %.3f +/- %.3f' % (cll.mean(), 2 * cll.std()))

WILL Produced-Tree #1
% FOR companyeconomicsector(A, B):
%   if ( companyceo(A, C), companyceo(D, C), economicsectorcompany(B, D) )
%   then return 0.8581489350995121;  // std dev = 2,06e-07, 46,000 (wgt'ed) examples reached here.  /* #pos=46 */
%   else if ( companyceo(A, E) )
%   | then return -0.14185106490048766;  // std dev = 7,88e-08, 85,000 (wgt'ed) examples reached here.  /* #neg=85 */
%   | else if ( companyalsoknownas(A, F), companyceo(F, G) )
%   | | then if ( acquired(F, H), acquired(H, I) )
%   | | | then return 0.28672036367094084;  // std dev = 1,309, 7,000 (wgt'ed) examples reached here.  /* #neg=4 #pos=3 */
%   | | | else return 0.6081489350995122;  // std dev = 0,866, 4,000 (wgt'ed) examples reached here.  /* #neg=1 #pos=3 */
%   | | else if ( acquired(A, J), cityhascompanyoffice(K, J) )
%   | | | then return -0.0168510649004878;  // std dev = 1,323, 16,000 (wgt'ed) examples reached here.  /* #neg=14 #pos=2 */
%   | | | else return 0.18987196247470933;  // std dev = 1

#### O nó companyceo(A, C), companyceo(D, C).  podendo aprender subárvores em ambas direções. O algoritmo não foi capaz de aprender o nó economicsectorcompany(B, D) no ramo TRUE, claúsula que mostrou uma precisão maior.

In [16]:
refine = ['0;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C).;true;true',
          '1;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C).;true;true',
          '2;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C).;true;true',
          '3;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C).;true;true',
          '4;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C).;true;true',
          '5;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C).;true;true',
          '6;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C).;true;true',
          '7;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C).;true;true',
          '8;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C).;true;true',
          '9;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C).;true;true',
         ]

# Load new predicate target dataset
tar_data = datasets.load(target, bk, target=new_target, balanced=balanced, seed=seed)
results = []

for i in range(n_folds):
    [tar_train_facts, tar_test_facts] =  [tar_data[0][0], tar_data[0][0]]
    to_folds_pos = datasets.split_into_folds(tar_data[1][0], n_folds=n_folds, seed=seed)
    to_folds_neg = datasets.split_into_folds(tar_data[2][0], n_folds=n_folds, seed=seed)
    [tar_train_pos, tar_test_pos] =  datasets.get_kfold_small(i, to_folds_pos)
    [tar_train_neg, tar_test_neg] =  datasets.get_kfold_small(i, to_folds_neg)
    
    [model, t_results, structured, will, variances] = revision.learn_test_model(background, boostsrl, new_target, tar_train_pos, tar_train_neg, tar_train_facts, tar_test_pos, tar_test_neg, tar_test_facts, refine=refine, trees=10, print_function=None)
    results.append(t_results)
    
    if i == 0:
        for line in will:
            print(line)
    print('\n')
    print('Fold ' + str(i+1))
    print(t_results)
print('\n')
auc_roc = np.array([item['AUC ROC'] for item in results])
auc_pr = np.array([item['AUC PR'] for item in results])
cll = np.array([item['CLL'] for item in results])
print('AUC ROC: %.3f +/- %.3f' % (auc_roc.mean(), 2 * auc_roc.std()))
print('AUC PR: %.3f +/- %.3f' % (auc_pr.mean(), 2 * auc_pr.std()))
print('CLL: %.3f +/- %.3f' % (cll.mean(), 2 * cll.std()))

WILL Produced-Tree #1
% FOR companyeconomicsector(A, B):
%   if ( companyceo(A, C), companyceo(D, C) )
%   then if ( acquired(E, A), companyceo(E, F) )
%   | then if ( economicsectorcompany(B, E) )
%   | | then return 0.8581489350995122;  // std dev = 0,000, 5,000 (wgt'ed) examples reached here.  /* #pos=5 */
%   | | else if ( cityhascompanyoffice(G, E) )
%   | | | then return 0.13087620782678497;  // std dev = 1,477, 11,000 (wgt'ed) examples reached here.  /* #neg=8 #pos=3 */
%   | | | else return 0.5248156017661788;  // std dev = 0,816, 3,000 (wgt'ed) examples reached here.  /* #neg=1 #pos=2 */
%   | else if ( acquired(A, H), economicsectorcompany(B, H) )
%   | | then return 0.7152917922423694;  // std dev = 0,350, 7,000 (wgt'ed) examples reached here.  /* #neg=1 #pos=6 */
%   | | else return 0.14386322081379807;  // std dev = 0,452, 105,000 (wgt'ed) examples reached here.  /* #neg=75 #pos=30 */
%   else if ( acquired(A, I), cityhascompanyoffice(J, I) )
%   | then return -0.016851064

In [17]:
refine = ['0;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C).;true;false',
          '0;true;economicsectorcompany(B, D).;false;false',
          '1;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C).;true;false',
          '1;true;economicsectorcompany(B, D).;false;false',
          '2;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C).;true;false',
          '2;true;economicsectorcompany(B, D).;false;false',
          '3;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C).;true;false',
          '3;true;economicsectorcompany(B, D).;false;false',
          '4;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C).;true;false',
          '4;true;economicsectorcompany(B, D).;false;false',
          '5;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C).;true;false',
          '5;true;economicsectorcompany(B, D).;false;false',
          '6;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C).;true;false',
          '6;true;economicsectorcompany(B, D).;false;false',
          '7;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C).;true;false',
          '7;true;economicsectorcompany(B, D).;false;false',
          '8;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C).;true;false',
          '8;true;economicsectorcompany(B, D).;false;false',
          '9;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C).;true;false',
          '9;true;economicsectorcompany(B, D).;false;false',
         ]

# Load new predicate target dataset
tar_data = datasets.load(target, bk, target=new_target, balanced=balanced, seed=seed)
results = []

for i in range(n_folds):
    [tar_train_facts, tar_test_facts] =  [tar_data[0][0], tar_data[0][0]]
    to_folds_pos = datasets.split_into_folds(tar_data[1][0], n_folds=n_folds, seed=seed)
    to_folds_neg = datasets.split_into_folds(tar_data[2][0], n_folds=n_folds, seed=seed)
    [tar_train_pos, tar_test_pos] =  datasets.get_kfold_small(i, to_folds_pos)
    [tar_train_neg, tar_test_neg] =  datasets.get_kfold_small(i, to_folds_neg)
    
    [model, t_results, structured, will, variances] = revision.learn_test_model(background, boostsrl, new_target, tar_train_pos, tar_train_neg, tar_train_facts, tar_test_pos, tar_test_neg, tar_test_facts, refine=refine, trees=10, print_function=None)
    results.append(t_results)
    
    if i == 0:
        for line in will:
            print(line)
    print('\n')
    print('Fold ' + str(i+1))
    print(t_results)
print('\n')
auc_roc = np.array([item['AUC ROC'] for item in results])
auc_pr = np.array([item['AUC PR'] for item in results])
cll = np.array([item['CLL'] for item in results])
print('AUC ROC: %.3f +/- %.3f' % (auc_roc.mean(), 2 * auc_roc.std()))
print('AUC PR: %.3f +/- %.3f' % (auc_pr.mean(), 2 * auc_pr.std()))
print('CLL: %.3f +/- %.3f' % (cll.mean(), 2 * cll.std()))

WILL Produced-Tree #1
% FOR companyeconomicsector(A, B):
%   if ( companyceo(A, C), companyceo(D, C) )
%   then if ( economicsectorcompany(B, D) )
%   | then return 0.8581489350995121;  // std dev = 2,06e-07, 46,000 (wgt'ed) examples reached here.  /* #pos=46 */
%   | else return -0.14185106490048766;  // std dev = 7,88e-08, 85,000 (wgt'ed) examples reached here.  /* #neg=85 */
%   else return 0.18839584867975465;  // std dev = 11,972, 648,000 (wgt'ed) examples reached here.  /* #neg=434 #pos=214 */
WILL Produced-Tree #2
% FOR companyeconomicsector(A, B):
%   if ( companyceo(A, C), companyceo(D, C) )
%   then if ( economicsectorcompany(B, D) )
%   | then return 0.719473412210955;  // std dev = 0,000, 46,000 (wgt'ed) examples reached here.  /* #pos=46 */
%   | else return -0.12544463852839144;  // std dev = 0,000, 85,000 (wgt'ed) examples reached here.  /* #neg=85 */
%   else return 0.16388089621524768;  // std dev = 11,972, 648,000 (wgt'ed) examples reached here.  /* #neg=434 #pos=214 

#### Este padrão com o nó raiz companyceo(A, C), companyceo(D, C) e seu nó TRUE como economicsectorcompany(B, D) apresentou um resultado melhor.

In [23]:
refine = ['0;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C).;true;true',
          '0;true;economicsectorcompany(B, D).;true;true',
          '1;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C).;true;true',
          '1;true;economicsectorcompany(B, D).;true;true',
          '2;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C).;true;true',
          '2;true;economicsectorcompany(B, D).;true;true',
          '3;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C).;true;true',
          '3;true;economicsectorcompany(B, D).;true;true',
          '4;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C).;true;true',
          '4;true;economicsectorcompany(B, D).;true;true',
          '5;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C).;true;true',
          '5;true;economicsectorcompany(B, D).;true;true',
          '6;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C).;true;true',
          '6;true;economicsectorcompany(B, D).;true;true',
          '7;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C).;true;true',
          '7;true;economicsectorcompany(B, D).;true;true',
          '8;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C).;true;true',
          '8;true;economicsectorcompany(B, D).;true;true',
          '9;;companyeconomicsector(A, B) :- companyceo(A, C), companyceo(D, C).;true;true',
          '9;true;economicsectorcompany(B, D).;true;true',
         ]

# Load new predicate target dataset
tar_data = datasets.load(target, bk, target=new_target, balanced=balanced, seed=seed)
results = []

for i in range(n_folds):
    [tar_train_facts, tar_test_facts] =  [tar_data[0][0], tar_data[0][0]]
    to_folds_pos = datasets.split_into_folds(tar_data[1][0], n_folds=n_folds, seed=seed)
    to_folds_neg = datasets.split_into_folds(tar_data[2][0], n_folds=n_folds, seed=seed)
    [tar_train_pos, tar_test_pos] =  datasets.get_kfold_small(i, to_folds_pos)
    [tar_train_neg, tar_test_neg] =  datasets.get_kfold_small(i, to_folds_neg)
    
    [model, t_results, structured, will, variances] = revision.learn_test_model(background, boostsrl, new_target, tar_train_pos, tar_train_neg, tar_train_facts, tar_test_pos, tar_test_neg, tar_test_facts, refine=refine, trees=10, print_function=None)
    results.append(t_results)
    
    if i == 0:
        for line in will:
            print(line)
    print('\n')
    print('Fold ' + str(i+1))
    print(t_results)
print('\n')
auc_roc = np.array([item['AUC ROC'] for item in results])
auc_pr = np.array([item['AUC PR'] for item in results])
cll = np.array([item['CLL'] for item in results])
print('AUC ROC: %.3f +/- %.3f' % (auc_roc.mean(), 2 * auc_roc.std()))
print('AUC PR: %.3f +/- %.3f' % (auc_pr.mean(), 2 * auc_pr.std()))
print('CLL: %.3f +/- %.3f' % (cll.mean(), 2 * cll.std()))

WILL Produced-Tree #1
% FOR companyeconomicsector(A, B):
%   if ( companyceo(A, C), companyceo(D, C) )
%   then if ( economicsectorcompany(B, D) )
%   | then return 0.8581489350995121;  // std dev = 2,06e-07, 46,000 (wgt'ed) examples reached here.  /* #pos=46 */
%   | else return -0.14185106490048766;  // std dev = 7,88e-08, 85,000 (wgt'ed) examples reached here.  /* #neg=85 */
%   else if ( cityhascompanyoffice(E, A) )
%   | then if ( cityhascompanyoffice(E, F), economicsectorcompany(B, F) )
%   | | then if ( companyalsoknownas(F, G), economicsectorcompany(B, G) )
%   | | | then return 0.8581489350995123;  // std dev = 0,000, 17,000 (wgt'ed) examples reached here.  /* #pos=17 */
%   | | | else return 0.3364098046647297;  // std dev = 2,396, 23,000 (wgt'ed) examples reached here.  /* #neg=12 #pos=11 */
%   | | else return -0.14185106490048766;  // std dev = 5,37e-08, 52,000 (wgt'ed) examples reached here.  /* #neg=52 */
%   | else if ( acquired(A, H) )
%   | | then if ( economicsectorc

In [19]:
refine = ['0;;companyeconomicsector(A, B) :- companyceo(A, C), companyalsoknownas(C, D), economicsectorcompany(B, D).;true;true',
          '1;;companyeconomicsector(A, B) :- companyceo(A, C), companyalsoknownas(C, D), economicsectorcompany(B, D).;true;true',
          '2;;companyeconomicsector(A, B) :- companyceo(A, C), companyalsoknownas(C, D), economicsectorcompany(B, D).;true;true',
          '3;;companyeconomicsector(A, B) :- companyceo(A, C), companyalsoknownas(C, D), economicsectorcompany(B, D).;true;true',
          '4;;companyeconomicsector(A, B) :- companyceo(A, C), companyalsoknownas(C, D), economicsectorcompany(B, D).;true;true',
          '5;;companyeconomicsector(A, B) :- companyceo(A, C), companyalsoknownas(C, D), economicsectorcompany(B, D).;true;true',
          '6;;companyeconomicsector(A, B) :- companyceo(A, C), companyalsoknownas(C, D), economicsectorcompany(B, D).;true;true',
          '7;;companyeconomicsector(A, B) :- companyceo(A, C), companyalsoknownas(C, D), economicsectorcompany(B, D).;true;true',
          '8;;companyeconomicsector(A, B) :- companyceo(A, C), companyalsoknownas(C, D), economicsectorcompany(B, D).;true;true',
          '9;;companyeconomicsector(A, B) :- companyceo(A, C), companyalsoknownas(C, D), economicsectorcompany(B, D).;true;true',
         ]

# Load new predicate target dataset
tar_data = datasets.load(target, bk, target=new_target, balanced=balanced, seed=seed)
results = []

for i in range(n_folds):
    [tar_train_facts, tar_test_facts] =  [tar_data[0][0], tar_data[0][0]]
    to_folds_pos = datasets.split_into_folds(tar_data[1][0], n_folds=n_folds, seed=seed)
    to_folds_neg = datasets.split_into_folds(tar_data[2][0], n_folds=n_folds, seed=seed)
    [tar_train_pos, tar_test_pos] =  datasets.get_kfold_small(i, to_folds_pos)
    [tar_train_neg, tar_test_neg] =  datasets.get_kfold_small(i, to_folds_neg)
    
    [model, t_results, structured, will, variances] = revision.learn_test_model(background, boostsrl, new_target, tar_train_pos, tar_train_neg, tar_train_facts, tar_test_pos, tar_test_neg, tar_test_facts, refine=refine, trees=10, print_function=None)
    results.append(t_results)
    
    if i == 0:
        for line in will:
            print(line)
    print('\n')
    print('Fold ' + str(i+1))
    print(t_results)
print('\n')
auc_roc = np.array([item['AUC ROC'] for item in results])
auc_pr = np.array([item['AUC PR'] for item in results])
cll = np.array([item['CLL'] for item in results])
print('AUC ROC: %.3f +/- %.3f' % (auc_roc.mean(), 2 * auc_roc.std()))
print('AUC PR: %.3f +/- %.3f' % (auc_pr.mean(), 2 * auc_pr.std()))
print('CLL: %.3f +/- %.3f' % (cll.mean(), 2 * cll.std()))

WILL Produced-Tree #1
% FOR companyeconomicsector(A, B):
%   if ( companyceo(A, C), companyalsoknownas(C, D), economicsectorcompany(B, D) )
%   then return 0;  // std dev = 0,000, 0,000 (wgt'ed) examples reached here.
%   else if ( acquired(A, E), economicsectorcompany(B, E) )
%   | then if ( cityhascompanyoffice(F, A) )
%   | | then return 0.8581489350995123;  // std dev = 0,000, 6,000 (wgt'ed) examples reached here.  /* #pos=6 */
%   | | else return 0.6581489350995122;  // std dev = 0,894, 5,000 (wgt'ed) examples reached here.  /* #neg=1 #pos=4 */
%   | else if ( acquired(A, G), cityhascompanyoffice(H, G) )
%   | | then if ( companyceo(A, I) )
%   | | | then return 0.024815601766178853;  // std dev = 1,291, 12,000 (wgt'ed) examples reached here.  /* #neg=10 #pos=2 */
%   | | | else return -0.1418510649004878;  // std dev = 0,000, 14,000 (wgt'ed) examples reached here.  /* #neg=14 */
%   | | else if ( acquired(J, A), cityhascompanyoffice(K, J) )
%   | | | then return 0.252885777204775

In [20]:
refine = ['0;;companyeconomicsector(A, B) :- companyceo(A, C), companyalsoknownas(C, D).;true;true',
          '0;true;economicsectorcompany(B, D).;true;true',
          '1;;companyeconomicsector(A, B) :- companyceo(A, C), companyalsoknownas(C, D).;true;true',
          '1;true;economicsectorcompany(B, D).;true;true',
          '2;;companyeconomicsector(A, B) :- companyceo(A, C), companyalsoknownas(C, D).;true;true',
          '2;true;economicsectorcompany(B, D).;true;true',
          '3;;companyeconomicsector(A, B) :- companyceo(A, C), companyalsoknownas(C, D).;true;true',
          '3;true;economicsectorcompany(B, D).;true;true',
          '4;;companyeconomicsector(A, B) :- companyceo(A, C), companyalsoknownas(C, D).;true;true',
          '4;true;economicsectorcompany(B, D).;true;true',
          '5;;companyeconomicsector(A, B) :- companyceo(A, C), companyalsoknownas(C, D).;true;true',
          '5;true;economicsectorcompany(B, D).;true;true',
          '6;;companyeconomicsector(A, B) :- companyceo(A, C), companyalsoknownas(C, D).;true;true',
          '6;true;economicsectorcompany(B, D).;true;true',
          '7;;companyeconomicsector(A, B) :- companyceo(A, C), companyalsoknownas(C, D).;true;true',
          '7;true;economicsectorcompany(B, D).;true;true',
          '8;;companyeconomicsector(A, B) :- companyceo(A, C), companyalsoknownas(C, D).;true;true',
          '8;true;economicsectorcompany(B, D).;true;true',
          '9;;companyeconomicsector(A, B) :- companyceo(A, C), companyalsoknownas(C, D).;true;true',
          '9;true;economicsectorcompany(B, D).;true;true',
         ]

# Load new predicate target dataset
tar_data = datasets.load(target, bk, target=new_target, balanced=balanced, seed=seed)
results = []

for i in range(n_folds):
    [tar_train_facts, tar_test_facts] =  [tar_data[0][0], tar_data[0][0]]
    to_folds_pos = datasets.split_into_folds(tar_data[1][0], n_folds=n_folds, seed=seed)
    to_folds_neg = datasets.split_into_folds(tar_data[2][0], n_folds=n_folds, seed=seed)
    [tar_train_pos, tar_test_pos] =  datasets.get_kfold_small(i, to_folds_pos)
    [tar_train_neg, tar_test_neg] =  datasets.get_kfold_small(i, to_folds_neg)
    
    [model, t_results, structured, will, variances] = revision.learn_test_model(background, boostsrl, new_target, tar_train_pos, tar_train_neg, tar_train_facts, tar_test_pos, tar_test_neg, tar_test_facts, refine=refine, trees=10, print_function=None)
    results.append(t_results)
    
    if i == 0:
        for line in will:
            print(line)
    print('\n')
    print('Fold ' + str(i+1))
    print(t_results)
print('\n')
auc_roc = np.array([item['AUC ROC'] for item in results])
auc_pr = np.array([item['AUC PR'] for item in results])
cll = np.array([item['CLL'] for item in results])
print('AUC ROC: %.3f +/- %.3f' % (auc_roc.mean(), 2 * auc_roc.std()))
print('AUC PR: %.3f +/- %.3f' % (auc_pr.mean(), 2 * auc_pr.std()))
print('CLL: %.3f +/- %.3f' % (cll.mean(), 2 * cll.std()))

WILL Produced-Tree #1
% FOR companyeconomicsector(A, B):
%   if ( companyceo(A, C), companyalsoknownas(C, D) )
%   then return 0;  // std dev = 0,000, 0,000 (wgt'ed) examples reached here.
%   else if ( countryhascompanyoffice(E, A), bankbankincountry(F, E) )
%   | then if ( companyalsoknownas(A, G), economicsectorcompany(B, G) )
%   | | then return 0.8581489350995121;  // std dev = 2,11e-08, 3,000 (wgt'ed) examples reached here.  /* #pos=3 */
%   | | else if ( acquired(A, H), companyceo(H, I) )
%   | | | then return -0.1418510649004878;  // std dev = 0,000, 10,000 (wgt'ed) examples reached here.  /* #neg=10 */
%   | | | else return 0.10452574669371523;  // std dev = 3,579, 69,000 (wgt'ed) examples reached here.  /* #neg=52 #pos=17 */
%   | else if ( acquired(J, A), economicsectorcompany(B, J) )
%   | | then return 0.8581489350995123;  // std dev = 0,000, 15,000 (wgt'ed) examples reached here.  /* #pos=15 */
%   | | else if ( acquired(A, K), companyceo(A, L) )
%   | | | then return 0.2

In [21]:
refine = ['0;;companyeconomicsector(A, B) :- companyceo(A, C), companyalsoknownas(C, D).;true;true',
          '1;;companyeconomicsector(A, B) :- companyceo(A, C), companyalsoknownas(C, D).;true;true',
          '2;;companyeconomicsector(A, B) :- companyceo(A, C), companyalsoknownas(C, D).;true;true',
          '3;;companyeconomicsector(A, B) :- companyceo(A, C), companyalsoknownas(C, D).;true;true',
          '4;;companyeconomicsector(A, B) :- companyceo(A, C), companyalsoknownas(C, D).;true;true',
          '5;;companyeconomicsector(A, B) :- companyceo(A, C), companyalsoknownas(C, D).;true;true',
          '6;;companyeconomicsector(A, B) :- companyceo(A, C), companyalsoknownas(C, D).;true;true',
          '7;;companyeconomicsector(A, B) :- companyceo(A, C), companyalsoknownas(C, D).;true;true',
          '8;;companyeconomicsector(A, B) :- companyceo(A, C), companyalsoknownas(C, D).;true;true',
          '9;;companyeconomicsector(A, B) :- companyceo(A, C), companyalsoknownas(C, D).;true;true',
         ]

# Load new predicate target dataset
tar_data = datasets.load(target, bk, target=new_target, balanced=balanced, seed=seed)
results = []

for i in range(n_folds):
    [tar_train_facts, tar_test_facts] =  [tar_data[0][0], tar_data[0][0]]
    to_folds_pos = datasets.split_into_folds(tar_data[1][0], n_folds=n_folds, seed=seed)
    to_folds_neg = datasets.split_into_folds(tar_data[2][0], n_folds=n_folds, seed=seed)
    [tar_train_pos, tar_test_pos] =  datasets.get_kfold_small(i, to_folds_pos)
    [tar_train_neg, tar_test_neg] =  datasets.get_kfold_small(i, to_folds_neg)
    
    [model, t_results, structured, will, variances] = revision.learn_test_model(background, boostsrl, new_target, tar_train_pos, tar_train_neg, tar_train_facts, tar_test_pos, tar_test_neg, tar_test_facts, refine=refine, trees=10, print_function=None)
    results.append(t_results)
    
    if i == 0:
        for line in will:
            print(line)
    print('\n')
    print('Fold ' + str(i+1))
    print(t_results)
print('\n')
auc_roc = np.array([item['AUC ROC'] for item in results])
auc_pr = np.array([item['AUC PR'] for item in results])
cll = np.array([item['CLL'] for item in results])
print('AUC ROC: %.3f +/- %.3f' % (auc_roc.mean(), 2 * auc_roc.std()))
print('AUC PR: %.3f +/- %.3f' % (auc_pr.mean(), 2 * auc_pr.std()))
print('CLL: %.3f +/- %.3f' % (cll.mean(), 2 * cll.std()))

WILL Produced-Tree #1
% FOR companyeconomicsector(A, B):
%   if ( companyceo(A, C), companyalsoknownas(C, D) )
%   then return 0;  // std dev = 0,000, 0,000 (wgt'ed) examples reached here.
%   else if ( countryhascompanyoffice(E, A), bankbankincountry(F, E) )
%   | then if ( companyalsoknownas(A, G), economicsectorcompany(B, G) )
%   | | then return 0.8581489350995121;  // std dev = 2,11e-08, 3,000 (wgt'ed) examples reached here.  /* #pos=3 */
%   | | else if ( acquired(A, H), companyceo(H, I) )
%   | | | then return -0.1418510649004878;  // std dev = 0,000, 10,000 (wgt'ed) examples reached here.  /* #neg=10 */
%   | | | else return 0.10452574669371523;  // std dev = 3,579, 69,000 (wgt'ed) examples reached here.  /* #neg=52 #pos=17 */
%   | else if ( acquired(A, J), companyceo(A, K) )
%   | | then if ( economicsectorcompany(B, J) )
%   | | | then return 0.8581489350995122;  // std dev = 0,000, 5,000 (wgt'ed) examples reached here.  /* #pos=5 */
%   | | | else return 0.22353355048412768

In [4]:
refine = ['0;;companyeconomicsector(A, B) :- companyalsoknownas(A, C), economicsectorcompany(B, C).;true;true',
          '1;;companyeconomicsector(A, B) :- companyalsoknownas(A, C), economicsectorcompany(B, C).;true;true',
          '2;;companyeconomicsector(A, B) :- companyalsoknownas(A, C), economicsectorcompany(B, C).;true;true',
          '3;;companyeconomicsector(A, B) :- companyalsoknownas(A, C), economicsectorcompany(B, C).;true;true',
          '4;;companyeconomicsector(A, B) :- companyalsoknownas(A, C), economicsectorcompany(B, C).;true;true',
          '5;;companyeconomicsector(A, B) :- companyalsoknownas(A, C), economicsectorcompany(B, C).;true;true',
          '6;;companyeconomicsector(A, B) :- companyalsoknownas(A, C), economicsectorcompany(B, C).;true;true',
          '7;;companyeconomicsector(A, B) :- companyalsoknownas(A, C), economicsectorcompany(B, C).;true;true',
          '8;;companyeconomicsector(A, B) :- companyalsoknownas(A, C), economicsectorcompany(B, C).;true;true',
          '9;;companyeconomicsector(A, B) :- companyalsoknownas(A, C), economicsectorcompany(B, C).;true;true',
         ]

# Load new predicate target dataset
tar_data = datasets.load(target, bk, target=new_target, balanced=balanced, seed=seed)
results = []

for i in range(n_folds):
    [tar_train_facts, tar_test_facts] =  [tar_data[0][0], tar_data[0][0]]
    to_folds_pos = datasets.split_into_folds(tar_data[1][0], n_folds=n_folds, seed=seed)
    to_folds_neg = datasets.split_into_folds(tar_data[2][0], n_folds=n_folds, seed=seed)
    [tar_train_pos, tar_test_pos] =  datasets.get_kfold_small(i, to_folds_pos)
    [tar_train_neg, tar_test_neg] =  datasets.get_kfold_small(i, to_folds_neg)
    
    [model, t_results, structured, will, variances] = revision.learn_test_model(background, boostsrl, new_target, tar_train_pos, tar_train_neg, tar_train_facts, tar_test_pos, tar_test_neg, tar_test_facts, refine=refine, trees=10, print_function=None)
    results.append(t_results)
    
    if i == 0:
        for line in will:
            print(line)
    print('\n')
    print('Fold ' + str(i+1))
    print(t_results)
print('\n')
auc_roc = np.array([item['AUC ROC'] for item in results])
auc_pr = np.array([item['AUC PR'] for item in results])
cll = np.array([item['CLL'] for item in results])
print('AUC ROC: %.3f +/- %.3f' % (auc_roc.mean(), 2 * auc_roc.std()))
print('AUC PR: %.3f +/- %.3f' % (auc_pr.mean(), 2 * auc_pr.std()))
print('CLL: %.3f +/- %.3f' % (cll.mean(), 2 * cll.std()))

WILL Produced-Tree #1
% FOR companyeconomicsector(A, B):
%   if ( companyalsoknownas(A, C), economicsectorcompany(B, C) )
%   then if ( companyalsoknownas(A, D), countryhascompanyoffice(E, D) )
%   | then return 0.4581489350995122;  // std dev = 1,095, 5,000 (wgt'ed) examples reached here.  /* #neg=2 #pos=3 */
%   | else return 0.8581489350995122;  // std dev = 0,000, 4,000 (wgt'ed) examples reached here.  /* #pos=4 */
%   else if ( acquired(A, F), cityhascompanyoffice(G, F) )
%   | then if ( economicsectorcompany(B, F) )
%   | | then return 0.8581489350995122;  // std dev = 0,000, 4,000 (wgt'ed) examples reached here.  /* #pos=4 */
%   | | else if ( companyceo(A, H) )
%   | | | then return 0.024815601766178853;  // std dev = 1,291, 12,000 (wgt'ed) examples reached here.  /* #neg=10 #pos=2 */
%   | | | else return -0.1418510649004878;  // std dev = 0,000, 14,000 (wgt'ed) examples reached here.  /* #neg=14 */
%   | else if ( countryhascompanyoffice(I, A), bankbankincountry(J, I) )
%   |

In [5]:
refine = ['0;;companyeconomicsector(A, B) :- acquired(A, C), economicsectorcompany(B, C).;true;true',
          '1;;companyeconomicsector(A, B) :- acquired(A, C), economicsectorcompany(B, C).;true;true',
          '2;;companyeconomicsector(A, B) :- acquired(A, C), economicsectorcompany(B, C).;true;true',
          '3;;companyeconomicsector(A, B) :- acquired(A, C), economicsectorcompany(B, C).;true;true',
          '4;;companyeconomicsector(A, B) :- acquired(A, C), economicsectorcompany(B, C).;true;true',
          '5;;companyeconomicsector(A, B) :- acquired(A, C), economicsectorcompany(B, C).;true;true',
          '6;;companyeconomicsector(A, B) :- acquired(A, C), economicsectorcompany(B, C).;true;true',
          '7;;companyeconomicsector(A, B) :- acquired(A, C), economicsectorcompany(B, C).;true;true',
          '8;;companyeconomicsector(A, B) :- acquired(A, C), economicsectorcompany(B, C).;true;true',
          '9;;companyeconomicsector(A, B) :- acquired(A, C), economicsectorcompany(B, C).;true;true',
         ]

# Load new predicate target dataset
tar_data = datasets.load(target, bk, target=new_target, balanced=balanced, seed=seed)
results = []

for i in range(n_folds):
    [tar_train_facts, tar_test_facts] =  [tar_data[0][0], tar_data[0][0]]
    to_folds_pos = datasets.split_into_folds(tar_data[1][0], n_folds=n_folds, seed=seed)
    to_folds_neg = datasets.split_into_folds(tar_data[2][0], n_folds=n_folds, seed=seed)
    [tar_train_pos, tar_test_pos] =  datasets.get_kfold_small(i, to_folds_pos)
    [tar_train_neg, tar_test_neg] =  datasets.get_kfold_small(i, to_folds_neg)
    
    [model, t_results, structured, will, variances] = revision.learn_test_model(background, boostsrl, new_target, tar_train_pos, tar_train_neg, tar_train_facts, tar_test_pos, tar_test_neg, tar_test_facts, refine=refine, trees=10, print_function=None)
    results.append(t_results)
    
    if i == 0:
        for line in will:
            print(line)
    print('\n')
    print('Fold ' + str(i+1))
    print(t_results)
print('\n')
auc_roc = np.array([item['AUC ROC'] for item in results])
auc_pr = np.array([item['AUC PR'] for item in results])
cll = np.array([item['CLL'] for item in results])
print('AUC ROC: %.3f +/- %.3f' % (auc_roc.mean(), 2 * auc_roc.std()))
print('AUC PR: %.3f +/- %.3f' % (auc_pr.mean(), 2 * auc_pr.std()))
print('CLL: %.3f +/- %.3f' % (cll.mean(), 2 * cll.std()))

WILL Produced-Tree #1
% FOR companyeconomicsector(A, B):
%   if ( acquired(A, C), economicsectorcompany(B, C) )
%   then return 0.8581489350995122;  // std dev = 0,000, 10,000 (wgt'ed) examples reached here.  /* #pos=10 */
%   else if ( countryhascompanyoffice(D, A), bankbankincountry(E, D) )
%   | then if ( acquired(A, F) )
%   | | then if ( companyalsoknownas(A, G), economicsectorcompany(B, G) )
%   | | | then return 0.5248156017661788;  // std dev = 0,816, 3,000 (wgt'ed) examples reached here.  /* #neg=1 #pos=2 */
%   | | | else return -0.06185106490048781;  // std dev = 1,356, 25,000 (wgt'ed) examples reached here.  /* #neg=23 #pos=2 */
%   | | else if ( companyalsoknownas(A, H) )
%   | | | then return 0.28672036367094084;  // std dev = 1,309, 7,000 (wgt'ed) examples reached here.  /* #neg=4 #pos=3 */
%   | | | else return 0.11901850031690367;  // std dev = 2,978, 46,000 (wgt'ed) examples reached here.  /* #neg=34 #pos=12 */
%   | else if ( acquired(I, A), companyceo(A, J) )
%   | 

In [11]:
refine = ['0;;companyeconomicsector(A, B) :- acquired(C, A), economicsectorcompany(B, C).;true;true',
          '0;false;acquired(A, D), economicsectorcompany(B, D).;true;true',
          '0;false,false;companyceo(A, E), companyceo(F, E), economicsectorcompany(B, F).;true;true',
          '1;;companyeconomicsector(A, B) :- acquired(C, A), economicsectorcompany(B, C).;true;true',
          '1;false;acquired(A, D), economicsectorcompany(B, D).;true;true',
          '1;false,false;companyceo(A, E), companyceo(F, E), economicsectorcompany(B, F).;true;true',
          '2;;companyeconomicsector(A, B) :- acquired(C, A), economicsectorcompany(B, C).;true;true',
          '2;false;acquired(A, D), economicsectorcompany(B, D).;true;true',
          '2;false,false;companyceo(A, E), companyceo(F, E), economicsectorcompany(B, F).;true;true',
          '3;;companyeconomicsector(A, B) :- acquired(C, A), economicsectorcompany(B, C).;true;true',
          '3;false;acquired(A, D), economicsectorcompany(B, D).;true;true',
          '3;false,false;companyceo(A, E), companyceo(F, E), economicsectorcompany(B, F).;true;true',
          '4;;companyeconomicsector(A, B) :- acquired(C, A), economicsectorcompany(B, C).;true;true',
          '4;false;acquired(A, D), economicsectorcompany(B, D).;true;true',
          '4;false,false;companyceo(A, E), companyceo(F, E), economicsectorcompany(B, F).;true;true',
          '5;;companyeconomicsector(A, B) :- acquired(C, A), economicsectorcompany(B, C).;true;true',
          '5;false;acquired(A, D), economicsectorcompany(B, D).;true;true',
          '5;false,false;companyceo(A, E), companyceo(F, E), economicsectorcompany(B, F).;true;true',
          '6;;companyeconomicsector(A, B) :- acquired(C, A), economicsectorcompany(B, C).;true;true',
          '6;false;acquired(A, D), economicsectorcompany(B, D).;true;true',
          '6;false,false;companyceo(A, E), companyceo(F, E), economicsectorcompany(B, F).;true;true',
          '7;;companyeconomicsector(A, B) :- acquired(C, A), economicsectorcompany(B, C).;true;true',
          '7;false;acquired(A, D), economicsectorcompany(B, D).;true;true',
          '7;false,false;companyceo(A, E), companyceo(F, E), economicsectorcompany(B, F).;true;true',
          '8;;companyeconomicsector(A, B) :- acquired(C, A), economicsectorcompany(B, C).;true;true',
          '8;false;acquired(A, D), economicsectorcompany(B, D).;true;true',
          '8;false,false;companyceo(A, E), companyceo(F, E), economicsectorcompany(B, F).;true;true',
          '9;;companyeconomicsector(A, B) :- acquired(C, A), economicsectorcompany(B, C).;true;true',
          '9;false;acquired(A, D), economicsectorcompany(B, D).;true;true',
          '9;false,false;companyceo(A, E), companyceo(F, E), economicsectorcompany(B, F).;true;true',
         ]

# Load new predicate target dataset
tar_data = datasets.load(target, bk, target=new_target, balanced=balanced, seed=seed)
results = []

for i in range(n_folds):
    [tar_train_facts, tar_test_facts] =  [tar_data[0][0], tar_data[0][0]]
    to_folds_pos = datasets.split_into_folds(tar_data[1][0], n_folds=n_folds, seed=seed)
    to_folds_neg = datasets.split_into_folds(tar_data[2][0], n_folds=n_folds, seed=seed)
    [tar_train_pos, tar_test_pos] =  datasets.get_kfold_small(i, to_folds_pos)
    [tar_train_neg, tar_test_neg] =  datasets.get_kfold_small(i, to_folds_neg)
    
    [model, t_results, structured, will, variances] = revision.learn_test_model(background, boostsrl, new_target, tar_train_pos, tar_train_neg, tar_train_facts, tar_test_pos, tar_test_neg, tar_test_facts, refine=refine, trees=10, print_function=None)
    results.append(t_results)
    
    if i == 0:
        for line in will:
            print(line)
    print('\n')
    print('Fold ' + str(i+1))
    print(t_results)
print('\n')
auc_roc = np.array([item['AUC ROC'] for item in results])
auc_pr = np.array([item['AUC PR'] for item in results])
cll = np.array([item['CLL'] for item in results])
print('AUC ROC: %.3f +/- %.3f' % (auc_roc.mean(), 2 * auc_roc.std()))
print('AUC PR: %.3f +/- %.3f' % (auc_pr.mean(), 2 * auc_pr.std()))
print('CLL: %.3f +/- %.3f' % (cll.mean(), 2 * cll.std()))

WILL Produced-Tree #1
% FOR companyeconomicsector(A, B):
%   if ( acquired(C, A), economicsectorcompany(B, C) )
%   then return 0.8581489350995123;  // std dev = 0,000, 16,000 (wgt'ed) examples reached here.  /* #pos=16 */
%   else if ( acquired(A, D), economicsectorcompany(B, D) )
%   | then return 0.8581489350995122;  // std dev = 0,000, 10,000 (wgt'ed) examples reached here.  /* #pos=10 */
%   | else if ( companyceo(A, E), companyceo(F, E), economicsectorcompany(B, F) )
%   | | then return 0.8581489350995115;  // std dev = 2,38e-07, 34,000 (wgt'ed) examples reached here.  /* #pos=34 */
%   | | else if ( companyceo(A, G) )
%   | | | then return -0.14185106490048766;  // std dev = 7,88e-08, 85,000 (wgt'ed) examples reached here.  /* #neg=85 */
%   | | | else return 0.1736063483487191;  // std dev = 11,701, 634,000 (wgt'ed) examples reached here.  /* #neg=434 #pos=200 */
WILL Produced-Tree #2
% FOR companyeconomicsector(A, B):
%   if ( acquired(C, A), economicsectorcompany(B, C) )
%   