In [1]:
import pandas as pd
import numpy as np
import pickle
import pdb
import time
import tqdm
import random

In [2]:
def save_obj(obj, path):
    with open(path, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(path):
    with open(path, 'rb') as f:
        return pickle.load(f)

In [3]:
K_train = 10000
K_test = 2000
root = '../../data/DBpedia/mid/'
kg_dict_all = load_obj(root + 'kg_dict_all.pkl')
is_dict_all = load_obj(root + 'is_dict_all.pkl')
kg_dict_train = load_obj(root + 'kg_dict_train.pkl')
is_dict_train = load_obj(root + 'is_dict_train.pkl')
kg_data_all = pd.read_csv(root + 'kg_data_all.csv', index_col=0)
is_data_all = pd.read_csv(root + 'is_data_all.csv', index_col=0)
kg_data_train = pd.read_csv(root + 'kg_data_train.csv', index_col=0)
is_data_train = pd.read_csv(root + 'is_data_train.csv', index_col=0)
ot_data = pd.read_csv(root + 'ot.csv', index_col=0)
save_root = '../../data/DBpedia/input/'

In [4]:
def get_answers_1p(kg_dict, is_dict):
    answers_e = {}
    answers_c = {}
    for key in tqdm.tqdm(kg_dict):
        answer_e = kg_dict[key]
        answer_c = []
        for e in answer_e:
            answer_c.extend(is_dict.get(e, []))
        answer_e = set(answer_e)
        answer_c = set(answer_c)
        answers_e[key] = answer_e
        if len(answer_c) != 0:
            answers_c[key] = answer_c
    return answers_e, answers_c

def answers_difference(all_answers_e, all_answers_c, train_answers_e, train_answers_c):
    test_answers_e = {}
    test_answers_c = {}
    for key in all_answers_e:
        answer_e = all_answers_e[key] - train_answers_e.get(key, set())
        answer_c = all_answers_c.get(key, set()) - train_answers_c.get(key, set())

        if len(answer_e) != 0:
            test_answers_e[key] = answer_e
        if len(answer_c) != 0:
            test_answers_c[key] = answer_c
    return test_answers_e, test_answers_c

def get_filter_train_e_1p(train_answers_1p_e):
    ret = {}
    for query in train_answers_1p_e:
        answer_train = train_answers_1p_e.get(query, set())
        ret[query] = answer_train
    return ret

def get_filter_train_c_1p(train_answers_1p_c):
    ret = {}
    for query in train_answers_1p_c:
        answer_train = train_answers_1p_c.get(query, set())
        ret[query] = answer_train
    return ret

def get_filter_test_e_1p(train_answers_1p_e, test_answers_1p_e):
    ret = {}
    for query_test in test_answers_1p_e:
        answer_train = train_answers_1p_e.get(query_test, set())
        ret[query_test] = answer_train
    return ret

def get_filter_test_c_1p(train_answers_1p_c, test_answers_1p_c):
    ret = {}
    for query_test in test_answers_1p_c:
        answer_train = train_answers_1p_c.get(query_test, set())
        ret[query_test] = answer_train
    return ret

In [5]:
all_answers_1p_e, all_answers_1p_c = get_answers_1p(kg_dict_all, is_dict_all)
train_answers_1p_e, train_answers_1p_c = get_answers_1p(kg_dict_train, is_dict_train)
test_answers_1p_e, test_answers_1p_c = answers_difference(all_answers_1p_e, 
                                                          all_answers_1p_c,
                                                          train_answers_1p_e, 
                                                          train_answers_1p_c)
train_filter_answers_1p_e = get_filter_train_e_1p(train_answers_1p_e)
train_filter_answers_1p_c = get_filter_train_c_1p(train_answers_1p_c)
test_filter_answers_1p_e = get_filter_test_e_1p(train_answers_1p_e, test_answers_1p_e)
test_filter_answers_1p_c = get_filter_test_c_1p(train_answers_1p_c, test_answers_1p_c)

ret_1p = {'train': {'e': train_answers_1p_e, 'c': train_answers_1p_c}, 
          'test': {'e': test_answers_1p_e, 'c': test_answers_1p_c},
          'train_filter': {'e': train_filter_answers_1p_e, 'c': train_filter_answers_1p_c},
          'test_filter': {'e': test_filter_answers_1p_e, 'c': test_filter_answers_1p_c}}
save_obj(ret_1p, save_root + '1p.pkl')
print('Done 1p')

100%|██████████| 76230/76230 [00:00<00:00, 153110.14it/s]
100%|██████████| 73836/73836 [00:00<00:00, 163407.37it/s]


Done 1p


In [6]:
count = 0
for i in all_answers_1p_e:
    count += len(all_answers_1p_e[i])
print(f'All Count E: {count}')
count = 0
for i in all_answers_1p_c:
    count += len(all_answers_1p_c[i])
print(f'All Count C: {count}')

count = 0
for i in train_answers_1p_e:
    count += len(train_answers_1p_e[i])
print(f'Train Count E: {count}')
count = 0
for i in train_answers_1p_c:
    count += len(train_answers_1p_c[i])
print(f'Train Count C: {count}')

count = 0
for i in test_answers_1p_e:
    count += len(test_answers_1p_e[i])
print(f'Test Count E: {count}')
count = 0
for i in test_answers_1p_c:
    count += len(test_answers_1p_c[i])
print(f'Test Count C: {count}')

All Count E: 144034
All Count C: 507192
Train Count E: 136821
Train Count C: 473924
Test Count E: 7213
Test Count C: 33268


In [7]:
def get_h_2_rt(dict_1):
    dict_2 = {}
    for key in dict_1:
        es = dict_1[key]
        for e in es:
            try:
                dict_2[key[0]].add((key[1], e))
            except:
                dict_2[key[0]] = set([(key[1], e)])
    return dict_2
def get_t_2_hr(dict_1):
    dict_2 = {}
    for key in dict_1:
        es = dict_1[key]
        for e in es:
            try:
                dict_2[e].add((key[0], key[1]))
            except:
                dict_2[e] = set([(key[0], key[1])])
    return dict_2

In [8]:
def get_train_answers_2p_e(train_answers_1p, k=K_train):
    ret = {}
    h_2_rt = get_h_2_rt(train_answers_1p)
    while len(ret) < k:
        query_1 = random.choice(list(train_answers_1p.keys()))
        answers_1 = list(train_answers_1p[query_1])
        answer_1 = random.choice(answers_1)
        try:
            queries_2 = list(h_2_rt[answer_1])
            query_2 = random.choice(queries_2)
            if len(set([query_1[0], answer_1, query_2[1]])) != 3:
                continue
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0])] = query_2[1]
    return ret

def get_train_answers_2p_c(train_answers_1p, is_dict, k=K_train):
    ret = {}
    h_2_rt = get_h_2_rt(train_answers_1p)
    while len(ret) < k:
        query_1 = random.choice(list(train_answers_1p.keys()))
        answers_1 = list(train_answers_1p[query_1])
        answer_1 = random.choice(answers_1)
        try:
            queries_2 = list(h_2_rt[answer_1])
            query_2 = random.choice(queries_2)
            if len(set([query_1[0], answer_1, query_2[1]])) != 3:
                continue
            concepts = list(is_dict[query_2[1]])
            concept = random.choice(concepts)
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0])] = concept
    return ret

def get_test_answers_2p_e(train_answers_1p, test_answers_1p, k=K_test):
    ret = {}
    h_2_rt = get_h_2_rt(test_answers_1p)
    while len(ret) < k:
        query_1 = random.choice(list(train_answers_1p.keys()))
        answers_1 = list(train_answers_1p[query_1])
        answer_1 = random.choice(answers_1)
        try:
            queries_2 = list(h_2_rt[answer_1])
            query_2 = random.choice(queries_2)
            if len(set([query_1[0], answer_1, query_2[1]])) != 3:
                continue
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0])] = query_2[1]
    return ret

def get_test_answers_2p_c(train_answers_1p, test_answers_1p, is_dict, k=K_test):
    ret = {}
    h_2_rt = get_h_2_rt(test_answers_1p)
    while len(ret) < k:
        query_1 = random.choice(list(train_answers_1p.keys()))
        answers_1 = list(train_answers_1p[query_1])
        answer_1 = random.choice(answers_1)
        try:
            queries_2 = list(h_2_rt[answer_1])
            query_2 = random.choice(queries_2)
            if len(set([query_1[0], answer_1, query_2[1]])) != 3:
                continue
            concepts = list(is_dict[query_2[1]])
            concept = random.choice(concepts)
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0])] = concept
    return ret

def get_train_filter_e_2p(train_answers_1p_e, train_answers_2p_e):
    ret = {}
    for query in train_answers_2p_e:
        filters = set()
        answers_1 = train_answers_1p_e.get((query[0], query[1]), set())
        for answer_1 in answers_1:
            answers_2 = train_answers_1p_e.get((answer_1, query[2]), set())
            for answer_2 in answers_2:
                filters.add(answer_2)
        ret[query] = filters
    return ret

def get_train_filter_c_2p(train_answers_1p_e, train_answers_2p_c, is_dict_train):
    ret = {}
    for query in train_answers_2p_c:
        filters = set()
        answers_1 = train_answers_1p_e.get((query[0], query[1]), set())
        for answer_1 in answers_1:
            answers_2 = train_answers_1p_e.get((answer_1, query[2]), set())
            for answer_2 in answers_2:
                concepts = is_dict_train.get(answer_2, set())
                for concept in concepts:
                    filters.add(concept)
        ret[query] = filters
    return ret

def get_test_filter_e_2p(train_answers_1p_e, test_answers_2p_e):
    ret = {}
    for query_test in test_answers_2p_e:
        filters = set()
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        for answer_1 in answers_1:
            answers_2 = train_answers_1p_e.get((answer_1, query_test[2]), set())
            for answer_2 in answers_2:
                filters.add(answer_2)
        ret[query_test] = filters
    return ret

def get_test_filter_c_2p(train_answers_1p_e, test_answers_2p_c, is_dict_train):
    ret = {}
    for query_test in test_answers_2p_c:
        filters = set()
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        for answer_1 in answers_1:
            answers_2 = train_answers_1p_e.get((answer_1, query_test[2]), set())
            for answer_2 in answers_2:
                concepts = is_dict_train.get(answer_2, set())
                for concept in concepts:
                    filters.add(concept)
        ret[query_test] = filters
    return ret

In [9]:
train_answers_2p_e = get_train_answers_2p_e(train_answers_1p_e)
train_answers_2p_c = get_train_answers_2p_c(train_answers_1p_e, is_dict_train)
test_answers_2p_e = get_test_answers_2p_e(train_answers_1p_e, test_answers_1p_e)
test_answers_2p_c = get_test_answers_2p_c(train_answers_1p_e, test_answers_1p_e, is_dict_all)
train_filter_answers_2p_e = get_train_filter_e_2p(train_answers_1p_e, train_answers_2p_e)
train_filter_answers_2p_c = get_train_filter_c_2p(train_answers_1p_e, train_answers_2p_c, is_dict_train)
test_filter_answers_2p_e = get_test_filter_e_2p(train_answers_1p_e, test_answers_2p_e)
test_filter_answers_2p_c = get_test_filter_c_2p(train_answers_1p_e, test_answers_2p_c, is_dict_train)

ret_2p = {'train': {'e': train_answers_2p_e, 'c': train_answers_2p_c}, 
          'test': {'e': test_answers_2p_e, 'c': test_answers_2p_c},
          'train_filter': {'e': train_filter_answers_2p_e, 'c': train_filter_answers_2p_c},
          'test_filter': {'e': test_filter_answers_2p_e, 'c': test_filter_answers_2p_c}}
save_obj(ret_2p, save_root + '2p.pkl')
print('Done 2p')

Done 2p


In [10]:
def get_train_answers_3p_e(train_answers_1p, k=K_train):
    ret = {}
    h_2_rt = get_h_2_rt(train_answers_1p)
    while len(ret) < k:
        query_1 = random.choice(list(train_answers_1p.keys()))
        answers_1 = list(train_answers_1p[query_1])
        answer_1 = random.choice(answers_1)
        try:
            queries_2 = list(h_2_rt[answer_1])
            query_2 = random.choice(queries_2)
            answer_2 = query_2[1]
            queries_3 = list(h_2_rt[answer_2])
            query_3 = random.choice(queries_3)
            if len(set([query_1[0], answer_1, answer_2, query_3[1]])) != 4:
                continue
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_3[0])] = query_3[1]
    return ret

def get_train_answers_3p_c(train_answers_1p, is_dict, k=K_train):
    ret = {}
    h_2_rt = get_h_2_rt(train_answers_1p)
    while len(ret) < k:
        query_1 = random.choice(list(train_answers_1p.keys()))
        answers_1 = list(train_answers_1p[query_1])
        answer_1 = random.choice(answers_1)
        try:
            queries_2 = list(h_2_rt[answer_1])
            query_2 = random.choice(queries_2)
            answer_2 = query_2[1]
            queries_3 = list(h_2_rt[answer_2])
            query_3 = random.choice(queries_3)
            if len(set([query_1[0], answer_1, answer_2, query_3[1]])) != 4:
                continue
            concepts = list(is_dict[query_3[1]])
            concept = random.choice(concepts)
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_3[0])] = concept
    return ret

def get_test_answers_3p_e(train_answers_1p, test_answers_1p, k=K_test):
    ret = {}
    h_2_rt_train = get_h_2_rt(train_answers_1p)
    h_2_rt_test = get_h_2_rt(test_answers_1p)
    while len(ret) < k:
        query_1 = random.choice(list(train_answers_1p.keys()))
        answers_1 = list(train_answers_1p[query_1])
        answer_1 = random.choice(answers_1)
        try:
            queries_2 = list(h_2_rt_train[answer_1])
            query_2 = random.choice(queries_2)
            answer_2 = query_2[1]
            queries_3 = list(h_2_rt_test[answer_2])
            query_3 = random.choice(queries_3)
            if len(set([query_1[0], answer_1, answer_2, query_3[1]])) != 4:
                continue
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_3[0])] = query_3[1]
    return ret

def get_test_answers_3p_c(train_answers_1p, test_answers_1p, is_dict, k=K_test):
    ret = {}
    h_2_rt_train = get_h_2_rt(train_answers_1p)
    h_2_rt_test = get_h_2_rt(test_answers_1p)
    while len(ret) < k:
        query_1 = random.choice(list(train_answers_1p.keys()))
        answers_1 = list(train_answers_1p[query_1])
        answer_1 = random.choice(answers_1)
        try:
            queries_2 = list(h_2_rt_train[answer_1])
            query_2 = random.choice(queries_2)
            answer_2 = query_2[1]
            queries_3 = list(h_2_rt_test[answer_2])
            query_3 = random.choice(queries_3)
            if len(set([query_1[0], answer_1, answer_2, query_3[1]])) != 4:
                continue
            concepts = list(is_dict[query_3[1]])
            concept = random.choice(concepts)
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_3[0])] = concept
    return ret

def get_train_filter_e_3p(train_answers_1p_e, train_answers_3p_e):
    ret = {}
    for query in train_answers_3p_e:
        filters = set()
        answers_1 = train_answers_1p_e.get((query[0], query[1]), set())
        for answer_1 in answers_1:
            answers_2 = train_answers_1p_e.get((answer_1, query[2]), set())
            for answer_2 in answers_2:
                answers_3 = train_answers_1p_e.get((answer_2, query[3]), set())
                for answer_3 in answers_3:
                    filters.add(answer_3)
        ret[query] = filters
    return ret

def get_train_filter_c_3p(train_answers_1p_e, train_answers_3p_c, is_dict_train):
    ret = {}
    for query in train_answers_3p_c:
        filters = set()
        answers_1 = train_answers_1p_e.get((query[0], query[1]), set())
        for answer_1 in answers_1:
            answers_2 = train_answers_1p_e.get((answer_1, query[2]), set())
            for answer_2 in answers_2:
                answers_3 = train_answers_1p_e.get((answer_2, query[3]), set())
                for answer_3 in answers_3:
                    concepts = is_dict_train.get(answer_3, set())
                    for concept in concepts:
                        filters.add(concept)
        ret[query] = filters
    return ret

def get_test_filter_e_3p(train_answers_1p_e, test_answers_3p_e):
    ret = {}
    for query_test in test_answers_3p_e:
        filters = set()
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        for answer_1 in answers_1:
            answers_2 = train_answers_1p_e.get((answer_1, query_test[2]), set())
            for answer_2 in answers_2:
                answers_3 = train_answers_1p_e.get((answer_2, query_test[3]), set())
                for answer_3 in answers_3:
                    filters.add(answer_3)
        ret[query_test] = filters
    return ret

def get_test_filter_c_3p(train_answers_1p_e, test_answers_3p_c, is_dict_train):
    ret = {}
    for query_test in test_answers_3p_c:
        filters = set()
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        for answer_1 in answers_1:
            answers_2 = train_answers_1p_e.get((answer_1, query_test[2]), set())
            for answer_2 in answers_2:
                answers_3 = train_answers_1p_e.get((answer_2, query_test[3]), set())
                for answer_3 in answers_3:
                    concepts = is_dict_train.get(answer_3, set())
                    for concept in concepts:
                        filters.add(concept)
        ret[query_test] = filters
    return ret

In [11]:
train_answers_3p_e = get_train_answers_3p_e(train_answers_1p_e)
train_answers_3p_c = get_train_answers_3p_c(train_answers_1p_e, is_dict_train)
test_answers_3p_e = get_test_answers_3p_e(train_answers_1p_e, test_answers_1p_e)
test_answers_3p_c = get_test_answers_3p_c(train_answers_1p_e, test_answers_1p_e, is_dict_all)
train_filter_answers_3p_e = get_train_filter_e_3p(train_answers_1p_e, train_answers_3p_e)
train_filter_answers_3p_c = get_train_filter_c_3p(train_answers_1p_e, train_answers_3p_c, is_dict_train)
test_filter_answers_3p_e = get_test_filter_e_3p(train_answers_1p_e, test_answers_3p_e)
test_filter_answers_3p_c = get_test_filter_c_3p(train_answers_1p_e, test_answers_3p_c, is_dict_train)

ret_3p = {'train': {'e': train_answers_3p_e, 'c': train_answers_3p_c}, 
          'test': {'e': test_answers_3p_e, 'c': test_answers_3p_c},
          'train_filter': {'e': train_filter_answers_3p_e, 'c': train_filter_answers_3p_c},
          'test_filter': {'e': test_filter_answers_3p_e, 'c': test_filter_answers_3p_c}}
save_obj(ret_3p, save_root + '3p.pkl')
print('Done 3p')

Done 3p


In [12]:
def get_train_answers_2i_e(train_answers_1p_e, k=K_train):
    ret = {}
    t_2_hr = get_t_2_hr(train_answers_1p_e)
    while len(ret) < k:
        query_1 = random.choice(list(train_answers_1p_e.keys()))
        answers_1 = list(train_answers_1p_e[query_1])
        answer_1 = random.choice(answers_1)
        try:
            queries_2 = t_2_hr[answer_1]
            query_2 = random.choice(list(queries_2))
            if query_1 == query_2:
                continue
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_2[1])] = answer_1
    return ret
        
def get_train_answers_2i_c(train_answers_1p_e, is_dict, k=K_train):
    ret = {}
    t_2_hr = get_t_2_hr(train_answers_1p_e)
    while len(ret) < k:
        query_1 = random.choice(list(train_answers_1p_e.keys()))
        answers_1 = list(train_answers_1p_e[query_1])
        answer_1 = random.choice(answers_1)
        try:
            queries_2 = t_2_hr[answer_1]
            query_2 = random.choice(list(queries_2))
            if query_1 == query_2:
                continue
            concepts = is_dict[answer_1]
            concept = random.choice(list(concepts))
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_2[1])] = concept
    return ret

def get_test_answers_2i_e(train_answers_1p_e, test_answers_1p_e, k=K_test):
    ret = {}
    t_2_hr = get_t_2_hr(train_answers_1p_e)
    while len(ret) < k:
        query_1 = random.choice(list(test_answers_1p_e.keys()))
        answers_1 = list(test_answers_1p_e[query_1])
        answer_1 = random.choice(answers_1)
        try:
            queries_2 = t_2_hr[answer_1]
            query_2 = random.choice(list(queries_2))
            if query_1 == query_2:
                continue
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_2[1])] = answer_1
    return ret
        
def get_test_answers_2i_c(train_answers_1p_e, test_answers_1p_e, is_dict, k=K_test):
    ret = {}
    t_2_hr = get_t_2_hr(train_answers_1p_e)
    while len(ret) < k:
        query_1 = random.choice(list(test_answers_1p_e.keys()))
        answers_1 = list(test_answers_1p_e[query_1])
        answer_1 = random.choice(answers_1)
        try:
            queries_2 = t_2_hr[answer_1]
            query_2 = random.choice(list(queries_2))
            if query_1 == query_2:
                continue
            concepts = is_dict[answer_1]
            concept = random.choice(list(concepts))
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_2[1])] = concept
    return ret

def get_train_filter_e_2i(train_answers_1p_e, train_answers_2i_e):
    ret = {}
    for query in train_answers_2i_e:
        answers_1 = train_answers_1p_e.get((query[0], query[1]), set())
        answers_2 = train_answers_1p_e.get((query[2], query[3]), set())
        ret[query] = answers_1 & answers_2
    return ret

def get_train_filter_c_2i(train_answers_1p_e, train_answers_2i_c, is_dict_train):
    ret = {}
    for query in train_answers_2i_c:
        filters = set()
        answers_1 = train_answers_1p_e.get((query[0], query[1]), set())
        answers_2 = train_answers_1p_e.get((query[2], query[3]), set())
        for answer in answers_1 & answers_2:
            concepts = is_dict_train.get(answer, set())
            for concept in concepts:
                filters.add(concept)
        ret[query] = filters
    return ret

def get_test_filter_e_2i(train_answers_1p_e, test_answers_2i_e):
    ret = {}
    for query_test in test_answers_2i_e:
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        answers_2 = train_answers_1p_e.get((query_test[2], query_test[3]), set())
        ret[query_test] = answers_1 & answers_2
    return ret

def get_test_filter_c_2i(train_answers_1p_e, test_answers_2i_c, is_dict_train):
    ret = {}
    for query_test in test_answers_2i_c:
        filters = set()
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        answers_2 = train_answers_1p_e.get((query_test[2], query_test[3]), set())
        for answer in answers_1 & answers_2:
            concepts = is_dict_train.get(answer, set())
            for concept in concepts:
                filters.add(concept)
        ret[query_test] = filters
    return ret

In [13]:
train_answers_2i_e = get_train_answers_2i_e(train_answers_1p_e)
train_answers_2i_c = get_train_answers_2i_c(train_answers_1p_e, is_dict_train)
test_answers_2i_e = get_test_answers_2i_e(train_answers_1p_e, test_answers_1p_e)
test_answers_2i_c = get_test_answers_2i_c(train_answers_1p_e, test_answers_1p_e, is_dict_all)
train_filter_answers_2i_e = get_train_filter_e_2i(train_answers_1p_e, train_answers_2i_e)
train_filter_answers_2i_c = get_train_filter_c_2i(train_answers_1p_e, train_answers_2i_c, is_dict_train)
test_filter_answers_2i_e = get_test_filter_e_2i(train_answers_1p_e, test_answers_2i_e)
test_filter_answers_2i_c = get_test_filter_c_2i(train_answers_1p_e, test_answers_2i_c, is_dict_train)

ret_2i = {'train': {'e': train_answers_2i_e, 'c': train_answers_2i_c}, 
          'test': {'e': test_answers_2i_e, 'c': test_answers_2i_c},
          'train_filter': {'e': train_filter_answers_2i_e, 'c': train_filter_answers_2i_c},
          'test_filter': {'e': test_filter_answers_2i_e, 'c': test_filter_answers_2i_c}}
save_obj(ret_2i, save_root + '2i.pkl')
print('Done 2i')

Done 2i


In [14]:
def get_train_answers_3i_e(train_answers_1p_e, k=K_train):
    ret = {}
    t_2_hr = get_t_2_hr(train_answers_1p_e)
    while len(ret) < k:
        query_1 = random.choice(list(train_answers_1p_e.keys()))
        answers_1 = list(train_answers_1p_e[query_1])
        answer_1 = random.choice(answers_1)
        try:
            queries_2 = t_2_hr[answer_1]
            query_2 = random.choice(list(queries_2))
            queries_3 = t_2_hr[answer_1]
            query_3 = random.choice(list(queries_3))
            if query_1 == query_2 or query_2 == query_3 or query_1 == query_3:
                continue
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_2[1], query_3[0], query_3[1])] = answer_1
    return ret

def get_train_answers_3i_c(train_answers_1p_e, is_dict, k=K_train):
    ret = {}
    t_2_hr = get_t_2_hr(train_answers_1p_e)
    while len(ret) < k:
        query_1 = random.choice(list(train_answers_1p_e.keys()))
        answers_1 = list(train_answers_1p_e[query_1])
        answer_1 = random.choice(answers_1)
        try:
            queries_2 = t_2_hr[answer_1]
            query_2 = random.choice(list(queries_2))
            queries_3 = t_2_hr[answer_1]
            query_3 = random.choice(list(queries_3))
            if query_1 == query_2 or query_2 == query_3 or query_1 == query_3:
                continue
            concepts = is_dict[answer_1]
            concept = random.choice(list(concepts))
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_2[1], query_3[0], query_3[1])] = concept
    return ret

def get_test_answers_3i_e(train_answers_1p_e, test_answers_1p_e, k=K_test):
    ret = {}
    t_2_hr = get_t_2_hr(train_answers_1p_e)
    while len(ret) < k:
        query_1 = random.choice(list(test_answers_1p_e.keys()))
        answers_1 = list(test_answers_1p_e[query_1])
        answer_1 = random.choice(answers_1)
        try:
            queries_2 = t_2_hr[answer_1]
            query_2 = random.choice(list(queries_2))
            queries_3 = t_2_hr[answer_1]
            query_3 = random.choice(list(queries_3))
            if query_1 == query_2 or query_2 == query_3 or query_1 == query_3:
                continue
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_2[1], query_3[0], query_3[1])] = answer_1
    return ret

def get_test_answers_3i_c(train_answers_1p_e, test_answers_1p_e, is_dict, k=K_test):
    ret = {}
    t_2_hr = get_t_2_hr(train_answers_1p_e)
    while len(ret) < k:
        query_1 = random.choice(list(test_answers_1p_e.keys()))
        answers_1 = list(test_answers_1p_e[query_1])
        answer_1 = random.choice(answers_1)
        try:
            queries_2 = t_2_hr[answer_1]
            query_2 = random.choice(list(queries_2))
            queries_3 = t_2_hr[answer_1]
            query_3 = random.choice(list(queries_3))
            if query_1 == query_2 or query_2 == query_3 or query_1 == query_3:
                continue
            concepts = is_dict[answer_1]
            concept = random.choice(list(concepts))
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_2[1], query_3[0], query_3[1])] = concept
    return ret

def get_train_filter_e_3i(train_answers_1p_e, train_answers_3i_e):
    ret = {}
    for query in train_answers_3i_e:
        answers_1 = train_answers_1p_e.get((query[0], query[1]), set())
        answers_2 = train_answers_1p_e.get((query[2], query[3]), set())
        answers_3 = train_answers_1p_e.get((query[4], query[5]), set())
        ret[query] = answers_1 & answers_2 & answers_3
    return ret

def get_train_filter_c_3i(train_answers_1p_e, train_answers_3i_c, is_dict_train):
    ret = {}
    for query in train_answers_3i_c:
        filters = set()
        answers_1 = train_answers_1p_e.get((query[0], query[1]), set())
        answers_2 = train_answers_1p_e.get((query[2], query[3]), set())
        answers_3 = train_answers_1p_e.get((query[4], query[5]), set())
        for answer in answers_1 & answers_2 & answers_3:
            concepts = is_dict_train.get(answer, set())
            for concept in concepts:
                filters.add(concept)
        ret[query] = filters
    return ret

def get_test_filter_e_3i(train_answers_1p_e, test_answers_3i_e):
    ret = {}
    for query_test in test_answers_3i_e:
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        answers_2 = train_answers_1p_e.get((query_test[2], query_test[3]), set())
        answers_3 = train_answers_1p_e.get((query_test[4], query_test[5]), set())
        ret[query_test] = answers_1 & answers_2 & answers_3
    return ret

def get_test_filter_c_3i(train_answers_1p_e, test_answers_3i_c, is_dict_train):
    ret = {}
    for query_test in test_answers_3i_c:
        filters = set()
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        answers_2 = train_answers_1p_e.get((query_test[2], query_test[3]), set())
        answers_3 = train_answers_1p_e.get((query_test[4], query_test[5]), set())
        for answer in answers_1 & answers_2 & answers_3:
            concepts = is_dict_train.get(answer, set())
            for concept in concepts:
                filters.add(concept)
        ret[query_test] = filters
    return ret

In [15]:
train_answers_3i_e = get_train_answers_3i_e(train_answers_1p_e)
train_answers_3i_c = get_train_answers_3i_c(train_answers_1p_e, is_dict_train)
test_answers_3i_e = get_test_answers_3i_e(train_answers_1p_e, test_answers_1p_e)
test_answers_3i_c = get_test_answers_3i_c(train_answers_1p_e, test_answers_1p_e, is_dict_all)
train_filter_answers_3i_e = get_train_filter_e_3i(train_answers_1p_e, train_answers_3i_e)
train_filter_answers_3i_c = get_train_filter_c_3i(train_answers_1p_e, train_answers_3i_c, is_dict_train)
test_filter_answers_3i_e = get_test_filter_e_3i(train_answers_1p_e, test_answers_3i_e)
test_filter_answers_3i_c = get_test_filter_c_3i(train_answers_1p_e, test_answers_3i_c, is_dict_train)

ret_3i = {'train': {'e': train_answers_3i_e, 'c': train_answers_3i_c}, 
          'test': {'e': test_answers_3i_e, 'c': test_answers_3i_c},
          'train_filter': {'e': train_filter_answers_3i_e, 'c': train_filter_answers_3i_c},
          'test_filter': {'e': test_filter_answers_3i_e, 'c': test_filter_answers_3i_c}}
save_obj(ret_3i, save_root + '3i.pkl')
print('Done 3i')

Done 3i


In [16]:
def get_test_answers_pi_e(train_answers_1p_e, test_answers_1p_e, k=K_test):
    ret = {}
    h_2_rt = get_h_2_rt(train_answers_1p_e)
    t_2_hr = get_t_2_hr(train_answers_1p_e)
    while len(ret) < k:
        query_1 = random.choice(list(test_answers_1p_e.keys()))
        answers_1 = list(test_answers_1p_e[query_1])
        answer_1 = random.choice(answers_1)
        try:
            queries_2 = list(h_2_rt[answer_1])
            query_2 = random.choice(queries_2)
            queries_3 = list(t_2_hr[query_2[1]])
            query_3 = random.choice(queries_3)
            if len(set([query_1[0], answer_1, query_2[1], query_3[0]])) != 4:
                continue
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_3[0], query_3[1])] = query_2[1]
    return ret

def get_test_answers_pi_c(train_answers_1p_e, test_answers_1p_e, is_dict, k=K_test):
    ret = {}
    h_2_rt = get_h_2_rt(train_answers_1p_e)
    t_2_hr = get_t_2_hr(train_answers_1p_e)  
    while len(ret) < k:
        query_1 = random.choice(list(test_answers_1p_e.keys()))
        answers_1 = list(test_answers_1p_e[query_1])
        answer_1 = random.choice(answers_1)
        try:
            queries_2 = list(h_2_rt[answer_1])
            query_2 = random.choice(queries_2)
            queries_3 = list(t_2_hr[query_2[1]])
            query_3 = random.choice(queries_3)
            if len(set([query_1[0], answer_1, query_2[1], query_3[0]])) != 4:
                continue
            concepts = is_dict[query_2[1]]
            concept = random.choice(list(concepts))
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_3[0], query_3[1])] = concept
    return ret

def get_test_filter_e_pi(train_answers_1p_e, test_answers_pi_e):
    ret = {}
    for query_test in test_answers_pi_e:
        filters = set()
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        for answer_1 in answers_1:
            answers_2 = train_answers_1p_e.get((answer_1, query_test[2]), set())
            answers_3 = train_answers_1p_e.get((query_test[3], query_test[4]), set())
            for answer in answers_2 & answers_3:
                filters.add(answer)
        ret[query_test] = filters
    return ret

def get_test_filter_c_pi(train_answers_1p_e, test_answers_pi_c, is_dict_train):
    ret = {}
    for query_test in test_answers_pi_c:
        filters = set()
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        for answer_1 in answers_1:
            answers_2 = train_answers_1p_e.get((answer_1, query_test[2]), set())
            answers_3 = train_answers_1p_e.get((query_test[3], query_test[4]), set())
            for answer in answers_2 & answers_3:
                concepts = is_dict_train.get(answer, set())
                for concept in concepts:
                    filters.add(concept)
        ret[query_test] = filters
    return ret

In [17]:
test_answers_pi_e = get_test_answers_pi_e(train_answers_1p_e, test_answers_1p_e)
test_answers_pi_c = get_test_answers_pi_c(train_answers_1p_e, test_answers_1p_e, is_dict_all)
test_filter_answers_pi_e = get_test_filter_e_pi(train_answers_1p_e, test_answers_pi_e)
test_filter_answers_pi_c = get_test_filter_c_pi(train_answers_1p_e, test_answers_pi_c, is_dict_train)

ret_pi = {'test': {'e': test_answers_pi_e, 'c': test_answers_pi_c},
          'test_filter': {'e': test_filter_answers_pi_e, 'c': test_filter_answers_pi_c}}
save_obj(ret_pi, save_root + 'pi.pkl')
print('Done pi')

Done pi


In [18]:
def get_test_answers_ip_e(train_answers_1p_e, test_answers_1p_e, k=K_test):
    ret = {}
    h_2_rt = get_h_2_rt(train_answers_1p_e)
    t_2_hr = get_t_2_hr(train_answers_1p_e)
    while len(ret) < k:
        query_1 = random.choice(list(test_answers_1p_e.keys()))
        answers_1 = list(test_answers_1p_e[query_1])
        answer_1 = random.choice(answers_1)
        try:
            queries_2 = list(t_2_hr[answer_1])
            query_2 = random.choice(queries_2)
            queries_3 = list(h_2_rt[answer_1])
            query_3 = random.choice(queries_3)
            if len(set([query_1[0], query_2[0], answer_1, query_3[1]])) != 4:
                continue
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_2[1], query_3[0])] = query_3[1]
    return ret

def get_test_answers_ip_c(train_answers_1p_e, test_answers_1p_e, is_dict, k=K_test):
    ret = {}
    h_2_rt = get_h_2_rt(train_answers_1p_e)
    t_2_hr = get_t_2_hr(train_answers_1p_e)
    while len(ret) < k:
        query_1 = random.choice(list(test_answers_1p_e.keys()))
        answers_1 = list(test_answers_1p_e[query_1])
        answer_1 = random.choice(answers_1)
        try:
            queries_2 = list(t_2_hr[answer_1])
            query_2 = random.choice(queries_2)
            queries_3 = list(h_2_rt[answer_1])
            query_3 = random.choice(queries_3)
            if len(set([query_1[0], query_2[0], answer_1, query_3[1]])) != 4:
                continue
            concepts = is_dict[query_3[1]]
            concept = random.choice(list(concepts))
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_2[1], query_3[0])] = concept
    return ret

def get_test_filter_e_ip(train_answers_1p_e, test_answers_pi_e):
    ret = {}
    for query_test in test_answers_pi_e:
        filters = set()
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        answers_2 = train_answers_1p_e.get((query_test[2], query_test[3]), set())
        for answer_12 in answers_1 & answers_2:
            answers_3 = train_answers_1p_e.get((answer_12, query_test[4]), set())
            for answer in answers_3:
                filters.add(answer)
        ret[query_test] = filters
    return ret

def get_test_filter_c_ip(train_answers_1p_e, test_answers_pi_e, is_dict_train):
    ret = {}
    for query_test in test_answers_pi_e:
        filters = set()
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        answers_2 = train_answers_1p_e.get((query_test[2], query_test[3]), set())
        for answer_12 in answers_1 & answers_2:
            answers_3 = train_answers_1p_e.get((answer_12, query_test[4]), set())
            for answer in answers_3:
                concepts = is_dict_train.get(answer, set())
                for concept in concepts:
                    filters.add(concept)
        ret[query_test] = filters
    return ret

In [19]:
test_answers_ip_e = get_test_answers_ip_e(train_answers_1p_e, test_answers_1p_e)
test_answers_ip_c = get_test_answers_ip_c(train_answers_1p_e, test_answers_1p_e, is_dict_all)
test_filter_answers_ip_e = get_test_filter_e_ip(train_answers_1p_e, test_answers_ip_e)
test_filter_answers_ip_c = get_test_filter_c_ip(train_answers_1p_e, test_answers_ip_c, is_dict_train)

ret_ip = {'test': {'e': test_answers_ip_e, 'c': test_answers_ip_c},
          'test_filter': {'e': test_filter_answers_ip_e, 'c': test_filter_answers_ip_c}}
save_obj(ret_ip, save_root + 'ip.pkl')
print('Done ip')

Done ip


In [20]:
def get_test_answers_2u_e(train_answers_1p_e, test_answers_1p_e, k=K_test):
    ret = {}
    while len(ret) < k:
        query_1 = random.choice(list(test_answers_1p_e.keys()))
        query_2 = random.choice(list(train_answers_1p_e.keys()))
        if query_1 == query_2:
            continue
        try:
            answers = list(test_answers_1p_e[query_1])
            answer = random.choice(answers)
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_2[1])] = answer
    return ret

def get_test_answers_2u_c(train_answers_1p_e, test_answers_1p_e, is_dict, k=K_test):
    ret = {}
    while len(ret) < k:
        query_1 = random.choice(list(test_answers_1p_e.keys()))
        query_2 = random.choice(list(train_answers_1p_e.keys()))
        if query_1 == query_2:
            continue
        try:
            answers = list(test_answers_1p_e[query_1])
            answer = random.choice(answers)
            concepts = is_dict[answer]
            concept = random.choice(list(concepts))
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_2[1])] = concept
    return ret

def get_test_filter_e_2u(train_answers_1p_e, test_answers_2u_e):
    ret = {}
    for query_test in test_answers_2u_e:
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        answers_2 = train_answers_1p_e.get((query_test[2], query_test[3]), set())
        ret[query_test] = answers_1 | answers_2
    return ret

def get_test_filter_c_2u(train_answers_1p_e, test_answers_2u_c, is_dict_train):
    ret = {}
    for query_test in test_answers_2u_c:
        filters = set()
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        answers_2 = train_answers_1p_e.get((query_test[2], query_test[3]), set())
        for answer in answers_1 | answers_2:
            concepts = is_dict_train.get(answer, set())
            for concept in concepts:
                filters.add(concept)
        ret[query_test] = filters
    return ret

In [21]:
test_answers_2u_e = get_test_answers_2u_e(train_answers_1p_e, test_answers_1p_e)
test_answers_2u_c = get_test_answers_2u_c(train_answers_1p_e, test_answers_1p_e, is_dict_all)
test_filter_answers_2u_e = get_test_filter_e_2u(train_answers_1p_e, test_answers_2u_e)
test_filter_answers_2u_c = get_test_filter_c_2u(train_answers_1p_e, test_answers_2u_c, is_dict_train)

ret_2u = {'test': {'e': test_answers_2u_e, 'c': test_answers_2u_c},
          'test_filter': {'e': test_filter_answers_2u_e, 'c': test_filter_answers_2u_c}}
save_obj(ret_2u, save_root + '2u.pkl')
print('Done 2u')

Done 2u


In [22]:
def get_test_answers_up_e(train_answers_1p_e, test_answers_1p_e, k=K_test):
    ret = {}
    h_2_rt = get_h_2_rt(train_answers_1p_e)
    while len(ret) < k:
        query_1 = random.choice(list(test_answers_1p_e.keys()))
        query_2 = random.choice(list(train_answers_1p_e.keys()))
        if query_1 == query_2:
            continue
        try:
            answers = list(test_answers_1p_e[query_1])
            answer = random.choice(answers)
            queries_3 = list(h_2_rt[answer])
            query_3 = random.choice(queries_3)
            if len(set([query_1[0], query_2[0], answer, query_3[1]])) != 4:
                continue
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_2[1], query_3[0])] = query_3[1]
    return ret

def get_test_answers_up_c(train_answers_1p_e, test_answers_1p_e, is_dict, k=K_test):
    ret = {}
    h_2_rt = get_h_2_rt(train_answers_1p_e)
    while len(ret) < k:
        query_1 = random.choice(list(test_answers_1p_e.keys()))
        query_2 = random.choice(list(train_answers_1p_e.keys()))
        if query_1 == query_2:
            continue
        try:
            answers = list(test_answers_1p_e[query_1])
            answer = random.choice(answers)
            queries_3 = list(h_2_rt[answer])
            query_3 = random.choice(queries_3)
            if len(set([query_1[0], query_2[0], answer, query_3[1]])) != 4:
                continue
            concepts = is_dict[query_3[1]]
            concept = random.choice(list(concepts))
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_2[1], query_3[0])] = concept
    return ret

def get_test_filter_e_up(train_answers_1p_e, test_answers_up_e):
    ret = {}
    for query_test in test_answers_up_e:
        filters = set()
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        answers_2 = train_answers_1p_e.get((query_test[2], query_test[3]), set())
        for answer in answers_1 | answers_2:
            answers_3 = train_answers_1p_e.get((answer, query_test[4]), set())
            for answer_3 in answers_3:
                filters.add(answer_3)
        ret[query_test] = filters
    return ret

def get_test_filter_c_up(train_answers_1p_e, test_answers_up_e, is_dict_train):
    ret = {}
    for query_test in test_answers_up_e:
        filters = set()
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        answers_2 = train_answers_1p_e.get((query_test[2], query_test[3]), set())
        for answer in answers_1 | answers_2:
            answers_3 = train_answers_1p_e.get((answer, query_test[4]), set())
            for answer_3 in answers_3:
                concepts = is_dict_train.get(answer_3, set())
                for concept in concepts:
                    filters.add(concept)
        ret[query_test] = filters
    return ret

In [23]:
test_answers_up_e = get_test_answers_up_e(train_answers_1p_e, test_answers_1p_e)
test_answers_up_c = get_test_answers_up_c(train_answers_1p_e, test_answers_1p_e, is_dict_all)
test_filter_answers_up_e = get_test_filter_e_up(train_answers_1p_e, test_answers_up_e)
test_filter_answers_up_c = get_test_filter_c_up(train_answers_1p_e, test_answers_up_c, is_dict_train)

ret_up = {'test': {'e': test_answers_up_e, 'c': test_answers_up_c},
          'test_filter': {'e': test_filter_answers_up_e, 'c': test_filter_answers_up_c}}
save_obj(ret_up, save_root + 'up.pkl')
print('Done up')

Done up


In [24]:
all_e = set()
    #得到全集
for key in train_answers_1p_e.keys():
    all_e  = all_e | train_answers_1p_e.get(key, set())

In [25]:
def get_train_answers_2in_e(train_answers_1p_e, k=K_train):
    ret = {}
    t_2_hr = get_t_2_hr(train_answers_1p_e)
    while len(ret) < k:
        #query_1 hr 的集合
        query_1 = random.choice(list(train_answers_1p_e.keys()))
        
        #t的集合
        answers_1 = list(train_answers_1p_e[query_1])
        answer_1 = random.choice(answers_1)
        
        #选answers1 不在 query_2 answer 的集合
        
        try:
            query_2 = random.choice(list(train_answers_1p_e.keys()))
            if answer_1  in list(train_answers_1p_e[query_2]):     
                continue
                
        except:
            continue
        
        
        ret[(query_1[0], query_1[1], query_2[0], query_2[1])] = answer_1
    return ret
        
def get_train_answers_2in_c(train_answers_1p_e, is_dict, k=K_train):
    ret = {}
    t_2_hr = get_t_2_hr(train_answers_1p_e)
    while len(ret) < k:
        #query_1 hr 的集合
        query_1 = random.choice(list(train_answers_1p_e.keys()))
        
        #t的集合
        answers_1 = list(train_answers_1p_e[query_1])
        answer_1 = random.choice(answers_1)
        
        #选answers1 不在 query_2 answer 的集合
        
        try:
            query_2 = random.choice(list(train_answers_1p_e.keys()))
            if answer_1  in list(train_answers_1p_e[query_2]):     
                continue
            concepts = is_dict[answer_1]
            concept = random.choice(list(concepts))
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_2[1])] = concept
    return ret

def get_test_answers_2in_e(train_answers_1p_e, test_answers_1p_e, k=K_test):
    ret = {}
    t_2_hr = get_t_2_hr(train_answers_1p_e)
    while len(ret) < k:
        query_1 = random.choice(list(test_answers_1p_e.keys()))
        answers_1 = list(test_answers_1p_e[query_1])
        answer_1 = random.choice(answers_1)
        try:
            query_2 = random.choice(list(test_answers_1p_e.keys()))
            if answer_1  in list(test_answers_1p_e[query_2]):     
                
                continue
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_2[1])] = answer_1
    return ret
        
def get_test_answers_2in_c(train_answers_1p_e, test_answers_1p_e, is_dict, k=K_test):
    ret = {}
    t_2_hr = get_t_2_hr(train_answers_1p_e)
    while len(ret) < k:
        query_1 = random.choice(list(test_answers_1p_e.keys()))
        answers_1 = list(test_answers_1p_e[query_1])
        answer_1 = random.choice(answers_1)
        try:
            query_2 = random.choice(list(test_answers_1p_e.keys()))
            if answer_1  in list(test_answers_1p_e[query_2]):     
                continue
            concepts = is_dict[answer_1]
            concept = random.choice(list(concepts))
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_2[1])] = concept
    return ret

def get_train_filter_e_2in(train_answers_1p_e, train_answers_2in_e):
    ret = {}
    all_e = set()
    #得到全集
    for key in train_answers_1p_e.keys():
        all_e  = all_e | train_answers_1p_e.get(key, set())
        
    for query in train_answers_2in_e:
        answers_1 = train_answers_1p_e.get((query[0], query[1]), set())
        answers_2 = train_answers_1p_e.get((query[2], query[3]), set())
        #answers_2 的补集和ansewer1的交集
    
        ret[query] = answers_1 & (all_e-answers_2)
 
    return ret

def get_train_filter_c_2in(train_answers_1p_e, train_answers_2in_c, is_dict_train):
    ret = {}
        
    for query in train_answers_2in_c:
        filters = set()
        answers_1 = train_answers_1p_e.get((query[0], query[1]), set())
        answers_2 = train_answers_1p_e.get((query[2], query[3]), set())

        for answer in answers_1 & (all_e-answers_2):
            concepts = is_dict_train.get(answer, set())
            filters = filters|set(concepts)
        ret[query] = filters

    return ret

def get_test_filter_e_2in(train_answers_1p_e, test_answers_2in_e):
    ret = {}
    for query_test in test_answers_2in_e:
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        answers_2 = train_answers_1p_e.get((query_test[2], query_test[3]), set())
        ret[query_test] = answers_1 & (all_e-answers_2)
    return ret

def get_test_filter_c_2in(train_answers_1p_e, test_answers_2in_c, is_dict_train):
    ret = {}
  
    for query_test in test_answers_2in_c:
        filters = set()
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        answers_2 = train_answers_1p_e.get((query_test[2], query_test[3]), set())
        for answer in answers_1 & (all_e-answers_2):
            concepts = is_dict_train.get(answer, set())
            
            filters = set(concepts)&filters
        ret[query_test] = filters
    return ret

In [26]:
train_answers_2in_e = get_train_answers_2in_e(train_answers_1p_e)
train_answers_2in_c = get_train_answers_2in_c(train_answers_1p_e, is_dict_train)
test_answers_2in_e = get_test_answers_2in_e(train_answers_1p_e, test_answers_1p_e)
test_answers_2in_c = get_test_answers_2in_c(train_answers_1p_e, test_answers_1p_e, is_dict_all)
train_filter_answers_2in_e = get_train_filter_e_2in(train_answers_1p_e, train_answers_2in_e)
train_filter_answers_2in_c = get_train_filter_c_2in(train_answers_1p_e, train_answers_2in_c, is_dict_train)
test_filter_answers_2in_e = get_test_filter_e_2in(train_answers_1p_e, test_answers_2in_e)
test_filter_answers_2in_c = get_test_filter_c_2in(train_answers_1p_e, test_answers_2in_c, is_dict_train)

ret_2in = {'train': {'e': train_answers_2in_e, 'c': train_answers_2in_c}, 
          'test': {'e': test_answers_2in_e, 'c': test_answers_2in_c},
          'train_filter': {'e': train_filter_answers_2in_e, 'c': train_filter_answers_2in_c},
          'test_filter': {'e': test_filter_answers_2in_e, 'c': test_filter_answers_2in_c}}
save_obj(ret_2in, save_root + '2in.pkl')
print('Done 2in')

Done 2in


In [27]:
def get_train_answers_3in_e(train_answers_1p_e, k=K_train):
    ret = {}
    t_2_hr = get_t_2_hr(train_answers_1p_e)
    while len(ret) < k:
        query_1 = random.choice(list(train_answers_1p_e.keys()))
        answers_1 = list(train_answers_1p_e[query_1])
        answer_1 = random.choice(answers_1)
        try:
            queries_2 = t_2_hr[answer_1]
            query_2 = random.choice(list(queries_2))
          
            query_3 = random.choice(list(train_answers_1p_e.keys()))
            if query_1 == query_2 or answer_1 in list(train_answers_1p_e[query_3]):
                continue
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_2[1], query_3[0], query_3[1])] = answer_1
    return ret

def get_train_answers_3in_c(train_answers_1p_e, is_dict, k=K_train):
    ret = {}
    t_2_hr = get_t_2_hr(train_answers_1p_e)
    while len(ret) < k:
        query_1 = random.choice(list(train_answers_1p_e.keys()))
        answers_1 = list(train_answers_1p_e[query_1])
        answer_1 = random.choice(answers_1)
        try:
            queries_2 = t_2_hr[answer_1]
            query_2 = random.choice(list(queries_2))
            query_3 = random.choice(list(train_answers_1p_e.keys()))
            if query_1 == query_2 or answer_1 in list(train_answers_1p_e[query_3]):
                continue
            concepts = is_dict[answer_1]
            concept = random.choice(list(concepts))
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_2[1], query_3[0], query_3[1])] = concept
    return ret

def get_test_answers_3in_e(train_answers_1p_e, test_answers_1p_e, k=K_test):
    ret = {}
    t_2_hr = get_t_2_hr(train_answers_1p_e)
    while len(ret) < k:
        query_1 = random.choice(list(test_answers_1p_e.keys()))
        answers_1 = list(test_answers_1p_e[query_1])
        answer_1 = random.choice(answers_1)
        try:
            queries_2 = t_2_hr[answer_1]
            query_2 = random.choice(list(queries_2))
            query_3 = random.choice(list(test_answers_1p_e.keys()))
            if query_1 == query_2 or answer_1 in list(test_answers_1p_e[query_3]):
                continue
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_2[1], query_3[0], query_3[1])] = answer_1
    return ret

def get_test_answers_3in_c(train_answers_1p_e, test_answers_1p_e, is_dict, k=K_test):
    ret = {}
    t_2_hr = get_t_2_hr(train_answers_1p_e)
    while len(ret) < k:
        query_1 = random.choice(list(test_answers_1p_e.keys()))
        answers_1 = list(test_answers_1p_e[query_1])
        answer_1 = random.choice(answers_1)
        try:
            queries_2 = t_2_hr[answer_1]
            query_2 = random.choice(list(queries_2))
            query_3 = random.choice(list(test_answers_1p_e.keys()))
            if query_1 == query_2 or answer_1 in list(test_answers_1p_e[query_3]):
                continue
            concepts = is_dict[answer_1]
            concept = random.choice(list(concepts))
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_2[1], query_3[0], query_3[1])] = concept
    return ret

def get_train_filter_e_3in(train_answers_1p_e, train_answers_3in_e):
    ret = {}
    for query in train_answers_3in_e:
        answers_1 = train_answers_1p_e.get((query[0], query[1]), set())
        answers_2 = train_answers_1p_e.get((query[2], query[3]), set())
        answers_3 = train_answers_1p_e.get((query[4], query[5]), set())
        ret[query] = answers_1 & answers_2 & (all_e-answers_3)
    return ret

def get_train_filter_c_3in(train_answers_1p_e, train_answers_3in_c, is_dict_train):
    ret = {}
    for query in train_answers_3in_c:
        filters = set()
        answers_1 = train_answers_1p_e.get((query[0], query[1]), set())
        answers_2 = train_answers_1p_e.get((query[2], query[3]), set())
        answers_3 = train_answers_1p_e.get((query[4], query[5]), set())
        for answer in answers_1 & answers_2 & (all_e-answers_3):
            concepts = is_dict_train.get(answer, set())
            for concept in concepts:
                filters.add(concept)
        ret[query] = filters
    return ret

def get_test_filter_e_3in(train_answers_1p_e, test_answers_3in_e):
    ret = {}
    for query_test in test_answers_3in_e:
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        answers_2 = train_answers_1p_e.get((query_test[2], query_test[3]), set())
        answers_3 = train_answers_1p_e.get((query_test[4], query_test[5]), set())
        ret[query_test] = answers_1 & answers_2 & (all_e-answers_3)
    return ret

def get_test_filter_c_3in(train_answers_1p_e, test_answers_3in_c, is_dict_train):
    ret = {}
    for query_test in test_answers_3in_c:
        filters = set()
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        answers_2 = train_answers_1p_e.get((query_test[2], query_test[3]), set())
        answers_3 = train_answers_1p_e.get((query_test[4], query_test[5]), set())
        for answer in answers_1 & answers_2 & (all_e-answers_3):
            concepts = is_dict_train.get(answer, set())
            for concept in concepts:
                filters.add(concept)
        ret[query_test] = filters
    return ret

In [28]:
train_answers_3in_e = get_train_answers_3in_e(train_answers_1p_e)
train_answers_3in_c = get_train_answers_3in_c(train_answers_1p_e, is_dict_train)
test_answers_3in_e = get_test_answers_3in_e(train_answers_1p_e, test_answers_1p_e)
test_answers_3in_c = get_test_answers_3in_c(train_answers_1p_e, test_answers_1p_e, is_dict_all)
train_filter_answers_3in_e = get_train_filter_e_3in(train_answers_1p_e, train_answers_3in_e)
train_filter_answers_3in_c = get_train_filter_c_3in(train_answers_1p_e, train_answers_3in_c, is_dict_train)
test_filter_answers_3in_e = get_test_filter_e_3in(train_answers_1p_e, test_answers_3in_e)
test_filter_answers_3in_c = get_test_filter_c_3in(train_answers_1p_e, test_answers_3in_c, is_dict_train)

ret_3in = {'train': {'e': train_answers_3in_e, 'c': train_answers_3in_c}, 
          'test': {'e': test_answers_3in_e, 'c': test_answers_3in_c},
          'train_filter': {'e': train_filter_answers_3in_e, 'c': train_filter_answers_3in_c},
          'test_filter': {'e': test_filter_answers_3in_e, 'c': test_filter_answers_3in_c}}
save_obj(ret_3in, save_root + '3in.pkl')
print('Done 3in')

Done 3in


In [29]:
def get_train_answers_inp_e(train_answers_1p_e, k=K_train):
    ret = {}
    h_2_rt = get_h_2_rt(train_answers_1p_e)
    t_2_hr = get_t_2_hr(train_answers_1p_e)
    while len(ret) < k:
        query_1 = random.choice(list(train_answers_1p_e.keys()))
        answers_1 = list(train_answers_1p_e[query_1])
        answer_1 = random.choice(answers_1)
        try:
            query_2 = random.choice(list(train_answers_1p_e.keys()))
            if answer_1 in list(train_answers_1p_e[query_2]):
                continue
            queries_3 = list(h_2_rt[answer_1])
            query_3 = random.choice(queries_3)
            if len(set([query_1[0], query_2[0], answer_1, query_3[1]])) != 4:
                continue
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_2[1], query_3[0])] = query_3[1]
    return ret

def get_train_answers_inp_c(train_answers_1p_e, is_dict, k=K_train):
    ret = {}
    h_2_rt = get_h_2_rt(train_answers_1p_e)
    t_2_hr = get_t_2_hr(train_answers_1p_e)
    while len(ret) < k:
        query_1 = random.choice(list(train_answers_1p_e.keys()))
        answers_1 = list(train_answers_1p_e[query_1])
        answer_1 = random.choice(answers_1)
        try:
            query_2 = random.choice(list(train_answers_1p_e.keys()))
            if answer_1 in list(train_answers_1p_e[query_2]):
                continue
            queries_3 = list(h_2_rt[answer_1])
            query_3 = random.choice(queries_3)
            if len(set([query_1[0], query_2[0], answer_1, query_3[1]])) != 4:
                continue
            concepts = is_dict[query_3[1]]
            concept = random.choice(list(concepts))
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_2[1], query_3[0])] = concept
    return ret

def get_train_filter_e_inp(train_answers_1p_e, train_answers_inp_e):
    ret = {}
    for query_test in train_answers_inp_e:
        filters = set()
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        answers_2 = train_answers_1p_e.get((query_test[2], query_test[3]), set())
        for answer_12 in answers_1 & (all_e-answers_2):
            answers_3 = train_answers_1p_e.get((answer_12, query_test[4]), set())
            for answer in answers_3:
                filters.add(answer)
        ret[query_test] = filters
    return ret

def get_train_filter_c_inp(train_answers_1p_e, train_answers_inp_c, is_dict_train):
    ret = {}
    for query_test in train_answers_inp_c:
        filters = set()
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        answers_2 = train_answers_1p_e.get((query_test[2], query_test[3]), set())
        for answer_12 in answers_1 & (all_e-answers_2):
            answers_3 = train_answers_1p_e.get((answer_12, query_test[4]), set())
            for answer in answers_3:
                concepts = is_dict_train.get(answer, set())
                for concept in concepts:
                    filters.add(concept)
        ret[query_test] = filters
    return ret   
    
def get_test_answers_inp_e(train_answers_1p_e, test_answers_1p_e, k=K_test):
    ret = {}
    h_2_rt = get_h_2_rt(train_answers_1p_e)
    t_2_hr = get_t_2_hr(train_answers_1p_e)
    while len(ret) < k:
        query_1 = random.choice(list(test_answers_1p_e.keys()))
        answers_1 = list(test_answers_1p_e[query_1])
        answer_1 = random.choice(answers_1)
        try:
            query_2 = random.choice(list(test_answers_1p_e.keys()))
            if answer_1 in list(test_answers_1p_e[query_2]):
                continue
            queries_3 = list(h_2_rt[answer_1])
            query_3 = random.choice(queries_3)
            if len(set([query_1[0], query_2[0], answer_1, query_3[1]])) != 4:
                continue
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_2[1], query_3[0])] = query_3[1]
    return ret

def get_test_answers_inp_c(train_answers_1p_e, test_answers_1p_e, is_dict, k=K_test):
    ret = {}
    h_2_rt = get_h_2_rt(train_answers_1p_e)
    t_2_hr = get_t_2_hr(train_answers_1p_e)
    while len(ret) < k:
        query_1 = random.choice(list(test_answers_1p_e.keys()))
        answers_1 = list(test_answers_1p_e[query_1])
        answer_1 = random.choice(answers_1)
        try:
            query_2 = random.choice(list(test_answers_1p_e.keys()))
            if answer_1 in list(test_answers_1p_e[query_2]):
                continue
            queries_3 = list(h_2_rt[answer_1])
            query_3 = random.choice(queries_3)
            if len(set([query_1[0], query_2[0], answer_1, query_3[1]])) != 4:
                continue
            concepts = is_dict[query_3[1]]
            concept = random.choice(list(concepts))
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_2[1], query_3[0])] = concept
    return ret

def get_test_filter_e_inp(train_answers_1p_e, test_answers_pi_e):
    ret = {}
    for query_test in test_answers_pi_e:
        filters = set()
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        answers_2 = train_answers_1p_e.get((query_test[2], query_test[3]), set())
        for answer_12 in answers_1 & (all_e-answers_2):
            answers_3 = train_answers_1p_e.get((answer_12, query_test[4]), set())
            for answer in answers_3:
                filters.add(answer)
        ret[query_test] = filters
    return ret

def get_test_filter_c_inp(train_answers_1p_e, test_answers_pi_c, is_dict_train):
    ret = {}
    for query_test in test_answers_pi_c:
        filters = set()
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        answers_2 = train_answers_1p_e.get((query_test[2], query_test[3]), set())
        for answer_12 in answers_1 & (all_e-answers_2):
            answers_3 = train_answers_1p_e.get((answer_12, query_test[4]), set())
            for answer in answers_3:
                concepts = is_dict_train.get(answer, set())
                for concept in concepts:
                    filters.add(concept)
        ret[query_test] = filters
    return ret

In [30]:
train_answers_inp_e = get_train_answers_inp_e(train_answers_1p_e)
train_answers_inp_c = get_train_answers_inp_c(train_answers_1p_e, is_dict_all)
train_filter_answers_inp_e = get_train_filter_e_inp(train_answers_1p_e, train_answers_inp_e)
train_filter_answers_inp_c = get_train_filter_c_inp(train_answers_1p_e, train_answers_inp_c, is_dict_train)


test_answers_inp_e = get_test_answers_inp_e(train_answers_1p_e, test_answers_1p_e)
test_answers_inp_c = get_test_answers_inp_c(train_answers_1p_e, test_answers_1p_e, is_dict_all)
test_filter_answers_inp_e = get_test_filter_e_inp(train_answers_1p_e, test_answers_inp_e)
test_filter_answers_inp_c = get_test_filter_c_inp(train_answers_1p_e, test_answers_inp_c, is_dict_train)

ret_inp = {'train':{'e':train_answers_inp_e, 'c':train_answers_inp_c},
           'train_filter':{'e':train_filter_answers_inp_e, 'c':train_filter_answers_inp_c},
           'test': {'e': test_answers_inp_e, 'c': test_answers_inp_c},
           'test_filter': {'e': test_filter_answers_inp_e, 'c': test_filter_answers_inp_c}}
save_obj(ret_inp, save_root + 'inp.pkl')
print('Done inp')

Done inp


In [31]:
def get_train_answers_pni_e(train_answers_1p_e, k=K_train):
    ret = {}
    h_2_rt = get_h_2_rt(train_answers_1p_e)
    t_2_hr = get_t_2_hr(train_answers_1p_e)
    while len(ret) < k:
        query_2 = random.choice(list(train_answers_1p_e.keys()))
        answers_2 =  train_answers_1p_e.get(query_2, set())
         
        answer_2 = random.choice(list(all_e-answers_2))
        answer_1 = query_2[0]
        try:
            query_1 = random.choice(list(t_2_hr[answer_1]))
            
            queries_3 = list(t_2_hr[answer_2])
            query_3 = random.choice(queries_3)
            if len(set([query_1[0], answer_1, answer_2, query_3[0]])) != 4:
                continue
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[1], query_3[0], query_3[1])] =  answer_2
    return ret

def get_train_answers_pni_c(train_answers_1p_e, is_dict, k=K_train):
    ret = {}
    h_2_rt = get_h_2_rt(train_answers_1p_e)
    t_2_hr = get_t_2_hr(train_answers_1p_e)  
    while len(ret) < k:
        query_2 = random.choice(list(train_answers_1p_e.keys()))
        answers_2 =  train_answers_1p_e.get(query_2, set())
         
        answer_2 = random.choice(list(all_e-answers_2))
        answer_1 = query_2[0]
        try:
            query_1 = random.choice(list(t_2_hr[answer_1]))
            
            queries_3 = list(t_2_hr[answer_2])
            query_3 = random.choice(queries_3)
            if len(set([query_1[0], answer_1, answer_2, query_3[0]])) != 4:
                continue
            concepts = is_dict[answer_2]
            concept = random.choice(list(concepts))
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[1], query_3[0], query_3[1])] = concept
    return ret

def get_train_filter_e_pni(train_answers_1p_e, train_answers_pni_e):
    ret = {}
    for query_test in train_answers_pni_e:
        filters = set()
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        for answer_1 in answers_1:
            answers_2 = train_answers_1p_e.get((answer_1, query_test[2]), set())
            answers_3 = train_answers_1p_e.get((query_test[3], query_test[4]), set())
            for answer in (all_e-answers_2) & answers_3:
                filters.add(answer)
        ret[query_test] = filters
    return ret

def get_train_filter_c_pni(train_answers_1p_e, train_answers_pni_c, is_dict_train):
    ret = {}
    for query_test in train_answers_pni_c:
        filters = set()
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        for answer_1 in answers_1:
            answers_2 = train_answers_1p_e.get((answer_1, query_test[2]), set())
            answers_3 = train_answers_1p_e.get((query_test[3], query_test[4]), set())
            for answer in (all_e-answers_2) & answers_3:
                concepts = is_dict_train.get(answer, set())
                for concept in concepts:
                    filters.add(concept)
        ret[query_test] = filters
    return ret    
    
    
def get_test_answers_pni_e(train_answers_1p_e, test_answers_1p_e, k=K_test):
    ret = {}
    h_2_rt = get_h_2_rt(train_answers_1p_e)
    t_2_hr = get_t_2_hr(train_answers_1p_e)
    while len(ret) < k:
        query_2 = random.choice(list(test_answers_1p_e.keys()))
        answers_2 =  test_answers_1p_e.get(query_2, set())
         
        answer_2 = random.choice(list(all_e-answers_2))
        answer_1 = query_2[0]
        try:
            query_1 = random.choice(list(t_2_hr[answer_1]))
            
            queries_3 = list(t_2_hr[answer_2])
            query_3 = random.choice(queries_3)
            if len(set([query_1[0], answer_1, answer_2, query_3[0]])) != 4:
                continue
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[1], query_3[0], query_3[1])] =  answer_2
    return ret

def get_test_answers_pni_c(train_answers_1p_e, test_answers_1p_e, is_dict, k=K_test):
    ret = {}
    h_2_rt = get_h_2_rt(train_answers_1p_e)
    t_2_hr = get_t_2_hr(train_answers_1p_e)  
    while len(ret) < k:
        query_2 = random.choice(list(test_answers_1p_e.keys()))
        answers_2 =  test_answers_1p_e.get(query_2, set())
         
        answer_2 = random.choice(list(all_e-answers_2))
        answer_1 = query_2[0]
        try:
            query_1 = random.choice(list(t_2_hr[answer_1]))
            
            queries_3 = list(t_2_hr[answer_2])
            query_3 = random.choice(queries_3)
            if len(set([query_1[0], answer_1, answer_2, query_3[0]])) != 4:
                continue
            concepts = is_dict[answer_2]
            concept = random.choice(list(concepts))
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[1], query_3[0], query_3[1])] = concept
    return ret

def get_test_filter_e_pni(train_answers_1p_e, test_answers_pi_e):
    ret = {}
    for query_test in test_answers_pi_e:
        filters = set()
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        for answer_1 in answers_1:
            answers_2 = train_answers_1p_e.get((answer_1, query_test[2]), set())
            answers_3 = train_answers_1p_e.get((query_test[3], query_test[4]), set())
            for answer in (all_e-answers_2) & answers_3:
                filters.add(answer)
        ret[query_test] = filters
    return ret

def get_test_filter_c_pni(train_answers_1p_e, test_answers_pi_c, is_dict_train):
    ret = {}
    for query_test in test_answers_pi_c:
        filters = set()
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        for answer_1 in answers_1:
            answers_2 = train_answers_1p_e.get((answer_1, query_test[2]), set())
            answers_3 = train_answers_1p_e.get((query_test[3], query_test[4]), set())
            for answer in (all_e-answers_2) & answers_3:
                concepts = is_dict_train.get(answer, set())
                for concept in concepts:
                    filters.add(concept)
        ret[query_test] = filters
    return ret

In [32]:
train_answers_pni_e = get_train_answers_pni_e(train_answers_1p_e)
train_answers_pni_c = get_train_answers_pni_c(train_answers_1p_e, is_dict_all)
train_filter_answers_pni_e = get_train_filter_e_pni(train_answers_1p_e, train_answers_pni_e)
train_filter_answers_pni_c = get_train_filter_c_pni(train_answers_1p_e, train_answers_pni_c, is_dict_train)

test_answers_pni_e = get_test_answers_pni_e(train_answers_1p_e, test_answers_1p_e)
test_answers_pni_c = get_test_answers_pni_c(train_answers_1p_e, test_answers_1p_e, is_dict_all)
test_filter_answers_pni_e = get_test_filter_e_pni(train_answers_1p_e, test_answers_pni_e)
test_filter_answers_pni_c = get_test_filter_c_pni(train_answers_1p_e, test_answers_pni_c, is_dict_train)

ret_pni = {'train':{'e':train_answers_pni_e,'c':train_answers_pni_c},
           'train_filter': {'e':train_filter_answers_pni_e, 'c':train_filter_answers_pni_c},
           'test': {'e': test_answers_pni_e, 'c': test_answers_pni_c},
           'test_filter': {'e': test_filter_answers_pni_e, 'c': test_filter_answers_pni_c}}
save_obj(ret_pni, save_root + 'pni.pkl')
print('Done pni')

Done pni


In [33]:
def get_test_answers_pin_e(train_answers_1p_e, test_answers_1p_e, k=K_test):
    ret = {}
    h_2_rt = get_h_2_rt(train_answers_1p_e)
    t_2_hr = get_t_2_hr(train_answers_1p_e)
    while len(ret) < k:
        query_1 = random.choice(list(test_answers_1p_e.keys()))
        answers_1 = list(test_answers_1p_e[query_1])
        answer_1 = random.choice(answers_1)
        try:
            queries_2 = list(h_2_rt[answer_1])
            query_2 = random.choice(queries_2)
             
            query_3 = random.choice(list(test_answers_1p_e.keys()))
            if  query_2[1] in list(test_answers_1p_e[query_3]):
                continue
            if len(set([query_1[0], answer_1, query_2[1], query_3[0]])) != 4:
                continue
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_3[0], query_3[1])] = query_2[1]
    return ret

def get_test_answers_pin_c(train_answers_1p_e, test_answers_1p_e, is_dict, k=K_test):
    ret = {}
    h_2_rt = get_h_2_rt(train_answers_1p_e)
    t_2_hr = get_t_2_hr(train_answers_1p_e)  
    while len(ret) < k:
        query_1 = random.choice(list(test_answers_1p_e.keys()))
        answers_1 = list(test_answers_1p_e[query_1])
        answer_1 = random.choice(answers_1)
        try:
            queries_2 = list(h_2_rt[answer_1])
            query_2 = random.choice(queries_2)
            query_3 = random.choice(list(test_answers_1p_e.keys()))
            if  query_2[1] in list(test_answers_1p_e[query_3]):
                continue
            if len(set([query_1[0], answer_1, query_2[1], query_3[0]])) != 4:
                continue
            concepts = is_dict[query_2[1]]
            concept = random.choice(list(concepts))
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_3[0], query_3[1])] = concept
    return ret

def get_test_filter_e_pin(train_answers_1p_e, test_answers_pi_e):
    ret = {}
    for query_test in test_answers_pi_e:
        filters = set()
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        for answer_1 in answers_1:
            answers_2 = train_answers_1p_e.get((answer_1, query_test[2]), set())
            answers_3 = train_answers_1p_e.get((query_test[3], query_test[4]), set())
            for answer in answers_2 & (all_e- answers_3):
                filters.add(answer)
        ret[query_test] = filters
    return ret

def get_test_filter_c_pin(train_answers_1p_e, test_answers_pi_c, is_dict_train):
    ret = {}
    for query_test in test_answers_pi_c:
        filters = set()
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        for answer_1 in answers_1:
            answers_2 = train_answers_1p_e.get((answer_1, query_test[2]), set())
            answers_3 = train_answers_1p_e.get((query_test[3], query_test[4]), set())
            for answer in answers_2 &  (all_e- answers_3):
                concepts = is_dict_train.get(answer, set())
                for concept in concepts:
                    filters.add(concept)
        ret[query_test] = filters
    return ret

def get_train_answers_pin_e(train_answers_1p_e, k=K_train):
    ret = {}
    h_2_rt = get_h_2_rt(train_answers_1p_e)
    t_2_hr = get_t_2_hr(train_answers_1p_e)
    while len(ret) < k:
        query_1 = random.choice(list(train_answers_1p_e.keys()))
        answers_1 = list(train_answers_1p_e[query_1])
        answer_1 = random.choice(answers_1)
        try:
            queries_2 = list(h_2_rt[answer_1])
            query_2 = random.choice(queries_2)
             
            query_3 = random.choice(list(train_answers_1p_e.keys()))
            if  query_2[1] in list(train_answers_1p_e[query_3]):
                continue
            if len(set([query_1[0], answer_1, query_2[1], query_3[0]])) != 4:
                continue
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_3[0], query_3[1])] = query_2[1]
    return ret

def get_train_answers_pin_c(train_answers_1p_e, is_dict, k=K_train):
    ret = {}
    h_2_rt = get_h_2_rt(train_answers_1p_e)
    t_2_hr = get_t_2_hr(train_answers_1p_e)  
    while len(ret) < k:
        query_1 = random.choice(list(train_answers_1p_e.keys()))
        answers_1 = list(train_answers_1p_e[query_1])
        answer_1 = random.choice(answers_1)
        try:
            queries_2 = list(h_2_rt[answer_1])
            query_2 = random.choice(queries_2)
            query_3 = random.choice(list(train_answers_1p_e.keys()))
            if  query_2[1] in list(train_answers_1p_e[query_3]):
                continue
            if len(set([query_1[0], answer_1, query_2[1], query_3[0]])) != 4:
                continue
            concepts = is_dict[query_2[1]]
            concept = random.choice(list(concepts))
        except:
            continue
        ret[(query_1[0], query_1[1], query_2[0], query_3[0], query_3[1])] = concept
    return ret

def get_train_filter_e_pin(train_answers_1p_e, train_answers_pin_e):
    ret = {}
    for query_test in train_answers_pin_e:
        filters = set()
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        for answer_1 in answers_1:
            answers_2 = train_answers_1p_e.get((answer_1, query_test[2]), set())
            answers_3 = train_answers_1p_e.get((query_test[3], query_test[4]), set())
            for answer in answers_2 & (all_e- answers_3):
                filters.add(answer)
        ret[query_test] = filters
    return ret

def get_train_filter_c_pin(train_answers_1p_e, train_answers_pin_c, is_dict_train):
    ret = {}
    for query_test in train_answers_pin_c:
        filters = set()
        answers_1 = train_answers_1p_e.get((query_test[0], query_test[1]), set())
        for answer_1 in answers_1:
            answers_2 = train_answers_1p_e.get((answer_1, query_test[2]), set())
            answers_3 = train_answers_1p_e.get((query_test[3], query_test[4]), set())
            for answer in answers_2 &  (all_e- answers_3):
                concepts = is_dict_train.get(answer, set())
                for concept in concepts:
                    filters.add(concept)
        ret[query_test] = filters
    return ret

In [34]:
train_answers_pin_e = get_train_answers_pin_e(train_answers_1p_e)
train_answers_pin_c = get_train_answers_pin_c(train_answers_1p_e,  is_dict_all)
train_filter_answers_pin_e = get_train_filter_e_pin(train_answers_1p_e, train_answers_pin_e)
train_filter_answers_pin_c = get_train_filter_c_pin(train_answers_1p_e, train_answers_pin_c, is_dict_train)

test_answers_pin_e = get_test_answers_pin_e(train_answers_1p_e, test_answers_1p_e)
test_answers_pin_c = get_test_answers_pin_c(train_answers_1p_e, test_answers_1p_e, is_dict_all)
test_filter_answers_pin_e = get_test_filter_e_pin(train_answers_1p_e, test_answers_pin_e)
test_filter_answers_pin_c = get_test_filter_c_pin(train_answers_1p_e, test_answers_pin_c, is_dict_train)

ret_pin = {
    'train':{'e': train_answers_pin_e,'c':train_answers_pin_c},
    'train_filter':{'e':train_filter_answers_pin_e, 'c':train_filter_answers_pin_c},
    'test': {'e': test_answers_pin_e, 'c': test_answers_pin_c},
          'test_filter': {'e': test_filter_answers_pin_e, 'c': test_filter_answers_pin_c}}
save_obj(ret_pin, save_root + 'pin.pkl')
print('Done pin')

Done pin
