In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
path_to_glove = '/content/drive/MyDrive/' + 'glove.840B.300d.txt'
l = [1,2,3,4,5,6,7]
n = 3
print([l[i:i + n] for i in range(0, len(l), n)])

[[1, 2, 3], [4, 5, 6], [7]]


In [1]:
import os
import random
import requests
import numpy as np

from gensim.models import KeyedVectors
import logging

from gensim import utils
import nltk

import networkx as nx
import matplotlib.pyplot as plt

import shutil
import smart_open
from sys import platform
import random
import tensorflow as tf
import json


This is just for creating the gensim file.

It will be saved into Google Drive.

After the glove_2.2M.txt has been written, save it to Drive to load later.

In [2]:
model = KeyedVectors.load_word2vec_format('data\\glove_2.2M.txt')

### Basic functions, for processing the data and building a graph



- get_words

- capitalize(word), low_case(word)

- infer_vector_from_word

- infer_vector_from_doc

- get_vectors_from_nodes_in_graph

- get_types_from_nodes_in_graph

- get_edge_name_with_signature

- get_node_name_with_signature

- add_triplets_to_graph_bw

- plot_graph

- get_chunks

- bin_data_into_buckets


In [3]:
_is_relevant = [.0, 1.]
_is_not_relevant = [1., 0.]

_logger = logging.getLogger(__name__)


def get_words(text):
    '''Use: tokenised = get_words(text)
  Pre: 
    text is a string
  Post: 
    words is a list of the words in the text''' 
    tokenizer = nltk.tokenize.TweetTokenizer()
    words = tokenizer.tokenize(utils.to_unicode(text))
    return words


def capitalize(word):
    return word[0].upper() + word[1:]


def low_case(word):
    return word[0].lower() + word[1:]


def infer_vector_from_word(model, word):
    '''Use: vector = infer_vector_from_word(model,word)
  Pre:
      model is models.keyedvectors from gensim (a mapping between keys and vectors)
      word is a key for model, such as a word from the vocabulary
  Post:
      vector is a gensim model vector representation, such as glove embedding, for word'''  
    vector = np.zeros(300)
    try:
        vector = model[word]
    except:
        try:
            vector = model[capitalize(word)]
        except:
            try:
                vector = model[low_case(word)]
            except:
                pass
    return vector


def infer_vector_from_doc(model, text):
    '''Use: vector = infer_vector_from_word(model,text)
    Pre:
        model is models.keyedvectors from gensim (a mapping between keys and vectors)
        text is a key for model, in this case a document (string of words)
    Post:
        vector is a gensim model vector representation, such as glove embedding, for text'''
    words = get_words(text)
    vector = np.zeros(300)
    for word in words:
        vector += infer_vector_from_word(model, word)
    norm = np.linalg.norm(vector)
    if norm > 0:
        vector /= norm
    return vector


def get_vectors_from_nodes_in_graph(g, model):
    '''Use: vectors = get_vectors_from_nodes_in_graph(graph, model)
    Pre:
        graph is a networkx graph
        model is models.keyedvectors from gensim (a mapping between keys and vectors)
    Post:
        vector is a numpy array of gensim model vector representations, such as glove embedding, for the text corresponding to each node in graph'''
    nodes = nx.nodes(g)
    vectors = []
    for node in nodes:
        text = node.replace('_', ' ')
        text = text.split('|')[0]
        vectors.append(infer_vector_from_doc(model, text))
    return np.array(vectors)


def get_types_from_nodes_in_graph(g):
    '''Use: vectors = get_types_from_nodes_in_graph(graph)
    Pre:
        graph is a networkx graph
    Post:
        vectors is a binary numpy array, with 1 if a node is a vertex and 0 if a node is an edge'''
    nodes = nx.nodes(g)
    vectors = []
    for node in nodes:
        texts = node.split('|')
        vector = np.zeros(3)
        if 'NODE' in texts:
            vector[0] = 1.
        if 'EDGE' in texts:
            vector[1] = 1.
        vectors.append(vector)
    return np.array(vectors)



In [4]:

def get_edge_name_with_signature(node_str):
    node_str = node_str.split('|')[0].lower()
    node_str += '|EDGE'
    return node_str


def get_node_name_with_signature(node_str):
    node_str = node_str.split('|')[0].lower()
    node_str += '|NODE'
    return node_str


def add_triplets_to_graph_bw(g, triplets):
    '''Use:  g = add_triplets_to_graph_bw(graph, triplets)
    Pre:
        graph is a networkx graph
        triplets is a list of triplets (node1, relation, node2)
    Post:
        g is graph with the entities and relations added to it'''
    for n1, r, n2 in triplets:
        clean_n1 = get_node_name_with_signature(n1)
        clean_n2 = get_node_name_with_signature(n2)
        clean_r = get_edge_name_with_signature(r)
        g.add_node(clean_n1)
        g.add_node(clean_n2)
        g.add_node(clean_r)
        g.add_edge(clean_n2, clean_r, **{'label': 'to_relation'})
        g.add_edge(clean_r, clean_n1, **{'label': 'to_node'})
    return g


def plot_graph(g):
    layout = nx.shell_layout(g)
    nx.draw_networkx(g, pos=layout)
    nx.draw_networkx_edge_labels(g, pos=layout)
    plt.show()


def get_chunks(l, n):
    '''Use: partitioned = get_chunks(l,n)
    Pre:
        l is a list
        n > 0
    Post:
        partitioned is a list of the elements of l grouped together in chunks of size n'''
    return [l[i:i + n] for i in range(0, len(l), n)]


def bin_data_into_buckets(data, batch_size):
    '''Use: buckets = bin_data_into_buckets(data, batch_size)
    Pre:
        data is a list with the training data from wikidata-disambig-train.json
        batch_size > 0
    Post:
        buckets is a list of the items in data, broken into batches'''
    buckets = []
    size_to_data_dict = {}
    for item in data:
        # mappings between the length of each item in data and the item added to size_to_data_dict
        seq_length = len(item['graph']['vectors'])
        question_length = len(item['question_vectors'])
        try:
            size_to_data_dict[(seq_length, question_length)].append(item)
        except:
            size_to_data_dict[(seq_length, question_length)] = [item]
    for key in size_to_data_dict.keys():
        # for each (seq_length, question_length), its corresponding item is broken into chunks of size batch_size, added to buckets
        data = size_to_data_dict[key]
        chunks = get_chunks(data, batch_size)
        for chunk in chunks:
            # chunks of size batch_size added to the bucket
            buckets.append(chunk)
    return buckets


### Functions for getting the graph ready

- get_bw_graph

- get_adjacency_matrices_and_vectors_given_triplets

- convert_text_into_vector_sequence

- get_item_mask_for_words

"With the topology of the Wikidata graph, the information of
each node is propagated onto the central item. Ideally, after
the graph convolutions, the vector at the position of the central
item summarizes the information in the graph."

In [5]:
def get_bw_graph(triplets):
    '''Use: graph = get_bw_graph(triplets)
    Pre:
        triplets is a list of [node1,relation,node2]
    Post:
        graph is a networkx directed graph representation of the triplets'''
    g_bw = nx.DiGraph()
    add_triplets_to_graph_bw(g_bw, triplets)
    return g_bw


def get_adjacency_matrices_and_vectors_given_triplets(triplets, central_item, model):
    '''Use: adj_vect_types = get_adjacency_matrices_and_vectors_given_triplets(triplets, central_item, model)
    Pre:
        triplets is a list of [node1,relation,node2]
        central_item is the central node in the graph represented by the triplets
        model is models.keyedvectors, such as word2vec: a mapping between keys (such as words) and a vector representation

    Post:
        adj_vect_types is a dict with:
            the adjacency matrix of the graph, the vector is glove embeddings for each node and its type (whether it is a vertex or edge)'''
    g_bw = get_bw_graph(triplets)

    vectors = get_vectors_from_nodes_in_graph(g_bw, model)
    node_types = get_types_from_nodes_in_graph(g_bw)
    nodelist = list(g_bw.nodes())
    try:
        central_node_index = nodelist.index(central_item + '|NODE')
        nodelist[central_node_index], nodelist[0] = nodelist[0], nodelist[central_node_index]
    except Exception as e:
        print('nodelist:', e)
        raise e
    A_bw = np.array(nx.to_numpy_matrix(g_bw, nodelist=nodelist))
    return {'A_bw': A_bw,
            'vectors': vectors,
            'types': node_types}




def convert_text_into_vector_sequence(model, text):
    '''Use: seq = convert_text_into_vector_sequence(model, text)
    Pre:
        model is a word2vec mapping
        text is a string
    Post:
        seq is a list of the embeddings of the words in text'''
    words = get_words(text)
    vectors = []
    for word in words:
        vectors.append(infer_vector_from_word(model, word))
    return vectors


def get_item_mask_for_words(text, item):
    '''Use: mask = get_item_mask_for_words(text, item)
    Pre:
        text is a string
        item is a string, name, possibly in text
    Post:
        mask is a binary list,  marks where in the text the item is (1 for the name, 0 everywhere else)
        "This mask acts as a "manually induced" attention of the item to disambiguate for"'''
    words = get_words(text)
    types = []
    words_in_item = get_words(item.lower())
    for word in words:
        types.append([1. if word.lower() in words_in_item else 0.] * 200)
    return types



def infer_vector_from_vector_nodes(vector_list):
    vector = np.zeros(300)
    for v in vector_list:
        vector += v
    norm = np.linalg.norm(vector)
    if norm > 0:
        vector /= norm
    return vector



### Functions for processing the json data file and generating a graph with the wikidata information

- translate_from_url

- create_text_item_graph_dict

- get_json_data

In [15]:
def translate_from_url(url):
    '''Use: item = translate_from_url(url)
    Pre:
        u is a string,  part of some wikidata url like the id
    Post:
        item is the data extracted from url (what comes after '/' and before '-')'''
    if '/' in url and '-' not in url:

        item = url.split('/')[-1]
#             print('from wikidata items_1: {}'.format(item))
    elif '/' in url and '-' in url:
        item = url.split('/')[-1].split('-')[0]
#             print('from wikidata items_2: {}'.format(item))
    else:
        item = url
#             print('from wikidata items_3: {}'.format(item))
    return item

def reverse_lookup(word):
    return reverse_dict[word.lower()]


def create_text_item_graph_dict(text, item, wikidata_id):
    '''Use: dict = create_text_item_graph_dict(text, item, wikidata_id)
    Pre:

    Post: 
      dict has the information from the graph in a dictionary form 
      '''

    text_item_graph_dict = {}
    text_item_graph_dict['text'] = text
    text_item_graph_dict['item'] = item
    text_item_graph_dict['wikidata_id'] = wikidata_id
    text_item_graph_dict['graph'] = get_graph_from_wikidata_id(wikidata_id, item)
    # text_item_graph_dict['item_vector'] = infer_vector_from_doc(_model, item)
    text_item_graph_dict['item_vector'] = infer_vector_from_vector_nodes(text_item_graph_dict['graph']['vectors'])
    text_item_graph_dict['question_vectors'] = convert_text_into_vector_sequence(model, text)
    text_item_graph_dict['question_mask'] = get_item_mask_for_words(text, item)
    return text_item_graph_dict



def get_json_data(json_data):
    '''Use: data, * = get_json_data(da)
    Pre: 
        da is a json file that has been loaded
    Post:
        
    '''
    data = []
    lost=[]
    count =1000
    for json_item in json_data[:count]:
       
        try:
            text = json_item['text']
            item = json_item['string']

            wikidata_id = json_item['correct_id']
            # print('before problem {}'.format(i))
            text_item_graph_dict = create_text_item_graph_dict(text, item, wikidata_id)
#             print('before problem if not with graph {}'.format(i))
            text_item_graph_dict['answer'] = _is_relevant
#             print('before problem if not with answer {}'.format(i))
            data.append(text_item_graph_dict)

            wikidata_id = json_item['wrong_id']
            text_item_graph_dict = create_text_item_graph_dict(text, item, wikidata_id)
            text_item_graph_dict['answer'] = _is_not_relevant
            
            

            data.append(text_item_graph_dict)
            
        except Exception as e:
#             print(str(e))
            lost.append(json_item)
    return data, lost




def get_graph_from_wikidata_id(wikidata_id, central_item):
    url = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql'
    data = requests.get(url, params={'query': _query % wikidata_id,
                                     'format': 'json'}).json()
    triplets = []
    for item in data['results']['bindings']:
        try:
            from_item = translate_from_url(wikidata_id)
            relation = translate_from_url(item['rel']['value'])
            to_item = translate_from_url(item['item']['value'])
            triplets.append((from_item, relation, to_item))
        except:
            pass
        try:
            from_item = translate_from_url(item['item']['value'])
            relation = translate_from_url(item['rel2']['value'])
            to_item = translate_from_url(item['to_item']['value'])
            triplets.append((from_item, relation, to_item))
        except:
            pass
    triplets = sorted(list(set(triplets)))
    if not triplets:
        raise RuntimeError("This graph contains no suitable triplets.")
    return get_adjacency_matrices_and_vectors_given_triplets(triplets, central_item, model)

In [16]:
_query = '''
SELECT ?rel ?item ?rel2 ?to_item {
  wd:%s ?rel ?item
  OPTIONAL { ?item ?rel2 ?to_item }
  FILTER regex (str(?item), '^((?!statement).)*$') .
  FILTER regex (str(?item), '^((?!https).)*$') .
} LIMIT 1500
'''


In [None]:
#  don't run that
#  graph= get_graph_from_wikidata_id('Q534153','captain marvel')

In [None]:
# print(nx.info(graph))
# print(triplets[0])

In [None]:
# don't run that
# nodelist = list(graph.nodes())

In [None]:
# don't run that
# edges_list = graph.edges()
# pos = nx.spring_layout(graph)
# plt.figure(num=None, figsize=(20, 20), dpi=80)
# plt.axis('off')
# fig = plt.figure(1)

# nx.draw_networkx_nodes(graph, pos, cmap=plt.get_cmap('jet'),  node_size = 500)

# nx.draw_networkx_edges(graph, pos, edgelist=edges_list, edge_color='r', arrows=True)
# plt.savefig('captain_marvel_graph.jpg',bbox_inches="tight")
# plt.show()


### Functions for training the GCN using TensorFlow

In [17]:
def compute_new_adjacency_matrix(embedding_size, attention_size, memory_dim, A, H, question_vector, name):
    Wa = tf.Variable(tf.random.uniform([embedding_size + memory_dim, attention_size], -_rw, _rw),
                     name='Wa_' + name)
    ba = tf.Variable(tf.random.uniform([attention_size], -_rw, _rw), name='b0_fw' + name)
    #problem with tf.nn
    WHQ_projection = lambda x: tf.nn.relu(tf.matmul(tf.concat([x, question_vector], axis=1), Wa) + ba)
    WHQ = tf.map_fn(WHQ_projection, H)
    WHQ = tf.transpose(WHQ, perm=[1, 0, 2])
    #problem with tf.nn
    WHQ_squared_projection = lambda x: tf.nn.softmax(tf.matmul(x, tf.transpose(x, perm=[1, 0])))
    WHQ_squared = tf.map_fn(WHQ_squared_projection, WHQ)
    new_A = tf.multiply(A, WHQ_squared)
    return new_A


def GCN_layer_fw(embedding_size, hidden_layer1_size, memory_dim, hidden, Atilde_fw, question_vector, name):
    new_A = compute_new_adjacency_matrix(embedding_size, 250, memory_dim, Atilde_fw, hidden, question_vector, name)
    W0_fw = tf.Variable(tf.random.uniform([embedding_size, hidden_layer1_size], -_rw, _rw),
                        name='W0_fw' + name)
    b0_fw = tf.Variable(tf.random.uniform([hidden_layer1_size], -_rw, _rw), name='b0_fw' + name)
    left_X1_projection_fw = lambda x: tf.matmul(x, W0_fw) + b0_fw
    left_X1_fw = tf.map_fn(left_X1_projection_fw, hidden)
    left_X1_fw = tf.transpose(left_X1_fw, perm=[1, 0, 2], name='left_X1_fw' + name)
    #problem with tf.nn
    X1_fw = tf.nn.relu(tf.matmul(new_A, left_X1_fw))
    X1_fw = tf.transpose(X1_fw, perm=[1, 0, 2])
    return X1_fw

In [22]:
TINY = 1e-6
ONE = tf.constant(1.)
NAMESPACE = 'gcn_qa'
forbidden_weight = 1.
_weight_for_positive_matches = 1.
_rw = 1e-1
tf.compat.v1.disable_eager_execution()
class GCN_QA(object):
    _nodes_vocab_size = 300
    _question_vocab_size = 300
    _question_vector_size = 150
    _types_size = 3
    _mask_size = 200
    _types_proj_size = 5
    _word_proj_size = 50
    _word_proj_size_for_rnn = 50
    _word_proj_size_for_item = 50
    _internal_proj_size = 250
    _hidden_layer1_size = 250
    _hidden_layer2_size = 250
    _output_size = 2

    _memory_dim = 100
    _stack_dimension = 2

    def __init__(self, dropout=1.0):
        tf.compat.v1.reset_default_graph()
        with tf.compat.v1.variable_scope(NAMESPACE):
            config = tf.compat.v1.ConfigProto(allow_soft_placement=True)
            self.sess = tf.compat.v1.Session(config=config)

            # Input variables
            self.node_X = tf.compat.v1.placeholder(tf.float32, shape=(None, None, self._nodes_vocab_size), name='node_X')
            self.types = tf.compat.v1.placeholder(tf.float32, shape=(None, None, self._types_size), name='types')
            self.Wt = tf.Variable(tf.random.uniform([self._types_size,
                                                     self._types_proj_size], -_rw, _rw))
            self.bt = tf.Variable(tf.random.uniform([self._types_proj_size], -_rw, _rw))
            #problem tf.nn
            self.types_projection = lambda x: tf.nn.relu(tf.matmul(x, self.Wt) + self.bt)
            self.types_internal = tf.map_fn(self.types_projection, self.types)
            self.question_vectors_fw = tf.compat.v1.placeholder(tf.float32, shape=(None, None, self._question_vocab_size),
                                                      name='question_vectors_inp_fw')
            self.question_vectors_bw = tf.compat.v1.placeholder(tf.float32, shape=(None, None, self._question_vocab_size),
                                                      name='question_vectors_inp_nw')
            self.question_mask = tf.compat.v1.placeholder(tf.float32, shape=(None, None, self._mask_size),
                                                name='question_mask')

            # The question is pre-processed by a bi-GRU
            self.Wq = tf.Variable(tf.random.uniform([self._question_vocab_size,
                                                     self._word_proj_size_for_rnn], -_rw, _rw))
            self.bq = tf.Variable(tf.random.uniform([self._word_proj_size_for_rnn], -_rw, _rw))
            self.internal_projection = lambda x: tf.nn.relu(tf.matmul(x, self.Wq) + self.bq)
            self.question_int_fw = tf.map_fn(self.internal_projection, self.question_vectors_fw)
            self.question_int_bw = tf.map_fn(self.internal_projection, self.question_vectors_bw)
            
            #problem rnn
            self.rnn_cell_fw = tf.compat.v1.nn.rnn_cell.MultiRNNCell([tf.compat.v1.nn.rnn_cell.GRUCell(self._memory_dim) for _ in range(self._stack_dimension)],
                                                state_is_tuple=True)
            self.rnn_cell_bw = tf.compat.v1.nn.rnn_cell.MultiRNNCell([tf.compat.v1.nn.rnn_cell.GRUCell(self._memory_dim) for _ in range(self._stack_dimension)],
                                                state_is_tuple=True)
            with tf.compat.v1.variable_scope('fw'):
                #problem dynamic rnn
                output_fw, state_fw = tf.compat.v1.nn.dynamic_rnn(self.rnn_cell_fw, self.question_int_fw, time_major=True,
                                                        dtype=tf.float32)
            with tf.compat.v1.variable_scope('bw'):
                #problem dynamen rnn
                output_bw, state_bw = tf.compat.v1.nn.dynamic_rnn(self.rnn_cell_bw, self.question_int_bw, time_major=True,
                                                        dtype=tf.float32)

            self.states = tf.concat(values=[output_fw, tf.reverse(output_bw, [0])], axis=2)
            self.question_vector_pre = tf.reduce_mean(tf.multiply(self.question_mask, self.states), axis=0)
            self.Wqa = tf.Variable(
                tf.random.uniform([2 * self._memory_dim, self._question_vector_size], -_rw, _rw),
                name='Wqa')
            self.bqa = tf.Variable(tf.random.uniform([self._question_vector_size], -_rw, _rw), name='bqa')
            #problem tf.nn
            self.question_vector = tf.nn.relu(tf.matmul(self.question_vector_pre, self.Wqa) + self.bqa)

            # Dense layer before gcn
            self.Wi = tf.Variable(tf.random.uniform([self._nodes_vocab_size,
                                                     self._word_proj_size], -_rw, _rw))
            self.bi = tf.Variable(tf.random.uniform([self._word_proj_size], -_rw, _rw))
            self.internal_projection2 = lambda x: tf.nn.relu(tf.matmul(x, self.Wi) + self.bi)
            self.word_embeddings = tf.map_fn(self.internal_projection2, self.node_X)

            self.inputs = tf.concat(values=[self.word_embeddings, self.types_internal], axis=2)
            self.Wp = tf.Variable(tf.random.uniform([self._word_proj_size + self._types_proj_size,
                                                     self._internal_proj_size], -_rw, _rw))
            self.bp = tf.Variable(tf.random.uniform([self._internal_proj_size], -_rw, _rw))
            self.enc_int_projection = lambda x: tf.nn.relu(tf.matmul(x, self.Wp) + self.bp)
            self.enc_int = tf.map_fn(self.enc_int_projection, self.inputs)

            # GCN part
            self.Atilde_fw = tf.nn.dropout(tf.compat.v1.placeholder(tf.float32, shape=(None, None, None), name="Atilde_fw"), 0.25)

            self.X1_fw = GCN_layer_fw(self._internal_proj_size,
                                      self._hidden_layer1_size,
                                      self._question_vector_size,
                                      self.enc_int,
                                      self.Atilde_fw,
                                      self.question_vector,
                                      '_1')
            #problem tf.nn
            self.X1_fw_dropout = tf.nn.dropout(self.X1_fw, dropout)

            self.X2_fw = GCN_layer_fw(self._hidden_layer1_size,
                                      self._hidden_layer1_size,
                                      self._question_vector_size,
                                      self.X1_fw_dropout,
                                      self.Atilde_fw,
                                      self.question_vector,
                                      '_2')
            self.X2_fw_dropout = tf.nn.dropout(self.X2_fw, dropout)

            self.X3_fw = GCN_layer_fw(self._hidden_layer1_size,
                                      self._hidden_layer1_size,
                                      self._question_vector_size,
                                      self.X2_fw_dropout,
                                      self.Atilde_fw,
                                      self.question_vector,
                                      '_3')
            self.X3_fw_dropout = tf.nn.dropout(self.X3_fw, dropout)

            self.X4_fw = GCN_layer_fw(self._hidden_layer1_size,
                                      self._hidden_layer1_size,
                                      self._question_vector_size,
                                      self.X3_fw_dropout,
                                      self.Atilde_fw,
                                      self.question_vector,
                                      '_4')
            #tf.nn
            self.X4_fw_dropout = tf.nn.dropout(self.X4_fw, dropout)
            self.first_node = self.X4_fw_dropout[0]
            self.concatenated = tf.concat(values=[self.question_vector, self.first_node], axis=1)

            # Final feedforward layers
            self.Ws1 = tf.Variable(
                tf.random.uniform([self._question_vector_size
                                   + self._hidden_layer1_size,
                                   self._hidden_layer2_size], -_rw, _rw),
                name='Ws1')
            self.bs1 = tf.Variable(tf.random.uniform([self._hidden_layer2_size], -_rw, _rw), name='bs1')
            #problem tf.nn
            self.first_hidden = tf.nn.relu(tf.matmul(self.concatenated, self.Ws1) + self.bs1)
            self.first_hidden_dropout = tf.nn.dropout(self.first_hidden, dropout)

            self.Wf = tf.Variable(
                tf.random.uniform([self._hidden_layer2_size, self._output_size], -_rw,
                                  _rw),
                name='Wf')
            self.bf = tf.Variable(tf.random.uniform([self._output_size], -_rw, _rw), name='bf')
            #problem tf.nn
            self.outputs = tf.nn.softmax(tf.matmul(self.first_hidden_dropout, self.Wf) + self.bf)

            # Loss function and training
            self.y_ = tf.compat.v1.placeholder(tf.float32, shape=(None, self._output_size), name='y_')
            self.outputs2 = tf.squeeze(self.outputs)
            self.y2_ = tf.squeeze(self.y_)
            self.one = tf.ones_like(self.outputs)
            self.tiny = self.one * TINY
            self.cross_entropy = (tf.reduce_mean(
                -tf.reduce_sum(self.y_ * tf.math.log(self.outputs + self.tiny) * _weight_for_positive_matches
                               + (self.one - self.y_) * tf.math.log(
                    self.one - self.outputs + self.tiny))
            ))

        # Clipping the gradient
        optimizer = tf.compat.v1.train.AdamOptimizer(1e-4)
        gvs = optimizer.compute_gradients(self.cross_entropy)
        capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gvs if var.name.find(NAMESPACE) != -1]
        self.train_step = optimizer.apply_gradients(capped_gvs)
        self.sess.run(tf.compat.v1.global_variables_initializer())

        # Adding the summaries
        tf.summary.scalar('cross_entropy', self.cross_entropy)
        self.merged = tf.compat.v1.summary.merge_all()
        self.train_writer = tf.compat.v1.summary.FileWriter('./train', self.sess.graph)

    def _add_identity(self, A):
        num_nodes = A.shape[0]
        identity = np.identity(num_nodes)
        return identity + A

    def __train(self, A_fw, node_X, types, item_vector, question_vectors, question_mask, y):
        item_vector = np.array(item_vector)
        Atilde_fw = np.array([self._add_identity(item) for item in A_fw])

        node_X = np.array(node_X)
        node_X = np.transpose(node_X, (1, 0, 2))

        types = np.array(types)
        types = np.transpose(types, (1, 0, 2))

        question_vectors = np.array(question_vectors)
        question_vectors_fw = np.transpose(question_vectors, (1, 0, 2))
        question_vectors_bw = question_vectors_fw[::-1, :, :]

        question_mask = np.array(question_mask)
        question_mask = np.transpose(question_mask, (1, 0, 2))

        y = np.array(y)

        feed_dict = {}
        feed_dict.update({self.node_X: node_X})
        feed_dict.update({self.types: types})
        feed_dict.update({self.question_vectors_fw: question_vectors_fw})
        feed_dict.update({self.question_vectors_bw: question_vectors_bw})
        feed_dict.update({self.question_mask: question_mask})
        feed_dict.update({self.Atilde_fw: Atilde_fw})
        feed_dict.update({self.y_: y})

        loss, _, summary, outputs2, y2 = self.sess.run(
            [self.cross_entropy, self.train_step, self.merged, self.outputs2, self.y2_], feed_dict)
        return loss, summary

    def train(self, data, epochs=20):
        for epoch in range(epochs):
            loss, _ = self.__train([data[i][0] for i in range(len(data))],
                                   [data[i][1] for i in range(len(data))],
                                   [data[i][2] for i in range(len(data))],
                                   [data[i][3] for i in range(len(data))],
                                   [data[i][4] for i in range(len(data))],
                                   [data[i][5] for i in range(len(data))],
                                   [data[i][6] for i in range(len(data))])
            print(loss)
            sys.stdout.flush()

    def __predict(self, A_fw, node_X, types, item_vector, question_vectors, question_mask):
        item_vector = np.array(item_vector)
        Atilde_fw = np.array([self._add_identity(item) for item in A_fw])

        node_X = np.array(node_X)
        node_X = np.transpose(node_X, (1, 0, 2))

        types = np.array(types)
        types = np.transpose(types, (1, 0, 2))

        question_vectors = np.array(question_vectors)
        question_vectors_fw = np.transpose(question_vectors, (1, 0, 2))
        question_vectors_bw = question_vectors_fw[::-1, :, :]

        question_mask = np.array(question_mask)
        question_mask = np.transpose(question_mask, (1, 0, 2))

        feed_dict = {}
        feed_dict.update({self.node_X: node_X})
        feed_dict.update({self.types: types})
        feed_dict.update({self.question_vectors_fw: question_vectors_fw})
        feed_dict.update({self.question_vectors_bw: question_vectors_bw})
        feed_dict.update({self.question_mask: question_mask})
        feed_dict.update({self.Atilde_fw: Atilde_fw})

        y_batch = self.sess.run([self.outputs2], feed_dict)
        return y_batch

    def __standardize_item(self, item):
        if item[0] < item[1]:
            return [0., 1.]
        return [1., 0.]

    def predict(self, A_fw, node_X, types, item_vector, question_vectors, question_mask):
        output = self.__predict([A_fw], [node_X], [types], [item_vector], [question_vectors], [question_mask])
        return self.__standardize_item(output[0])

    # Loading and saving functions

    def save(self, filename):
        saver =  tf.compat.v1.train.Saver()
        saver.save(self.sess, filename)

    def load_tensorflow(self, filename):
        saver =  tf.compat.v1.train.Saver([v for v in tf.global_variables() if NAMESPACE in v.name])
        saver.restore(self.sess, filename)

    @classmethod
    def load(self, filename, dropout=1.0):
        model = GCN_QA(dropout)
        model.load_tensorflow(filename)
        return model

### The function for training and saving the model

In [23]:
def train(data, model, saving_dir, name_prefix, epochs=20, bucket_size=10, trace_every=1):
    import random
    import sys

    buckets = bin_data_into_buckets(data, bucket_size)
    for i in range(epochs):
        random_buckets = sorted(buckets, key=lambda x: random.random())
        sys.stderr.write('--------- Epoch ' + str(i) + ' ---------\n')
        for bucket in random_buckets:
            graph_bucket = []
            try:
                for item in bucket:
                    node_vectors = item['graph']['vectors']
                    types = item['graph']['types']
                    A_bw = item['graph']['A_bw']
                    y = item['answer']
                    item_vector = item['item_vector']
                    question_vectors = item['question_vectors']
                    question_mask = item['question_mask']
                    graph_bucket.append((A_bw, node_vectors, types, item_vector, question_vectors, question_mask, y))
                if len(graph_bucket) > 0:
                    model.train(graph_bucket, 1)
            except Exception as e:
                print('Exception caught during training: ' + str(e))
        if i % trace_every == 0:
            save_filename = saving_dir + name_prefix + '-' + str(i) + '.tf'
            sys.stderr.write('Saving into ' + save_filename + '\n')
            model.save(save_filename)

In [30]:
import pickle

In [20]:
_bucket_size = 10
_minimum_trace = 10
with open(os.path.join('../../dataset/wikidata-disambig-train.json')) as f:
    json_data = json.load(f)
data, lost = get_json_data(json_data)

print(len(data))
print(len(lost))


nodelist: 'nutrition|NODE' is not in list
nodelist: 'spanish|NODE' is not in list
nodelist: 'south|NODE' is not in list
682
933


In [32]:
pickle.dump(data, open('../../dataset/data.p', 'wb'))
pickle.dump(lost, open('../../dataset/lost.p','wb'))


MemoryError: 

In [24]:
print('Loading model: ')
_saving_dir = '../data/'
nn_model = GCN_QA(dropout=0.25)
train(data,
      nn_model,
      _saving_dir,
      name_prefix='qa',
      epochs=60,
      bucket_size=10,
      trace_every=1,
      )



Exception caught during training: Fetch argument None has invalid type <class 'NoneType'>
Exception caught during training: Fetch argument None has invalid type <class 'NoneType'>
Exception caught during training: Fetch argument None has invalid type <class 'NoneType'>
Exception caught during training: Fetch argument None has invalid type <class 'NoneType'>
Exception caught during training: Fetch argument None has invalid type <class 'NoneType'>
Exception caught during training: Fetch argument None has invalid type <class 'NoneType'>
Exception caught during training: Fetch argument None has invalid type <class 'NoneType'>
Exception caught during training: Fetch argument None has invalid type <class 'NoneType'>
Exception caught during training: Fetch argument None has invalid type <class 'NoneType'>
Exception caught during training: Fetch argument None has invalid type <class 'NoneType'>
Exception caught during training: Fetch argument None has invalid type <class 'NoneType'>
Exception

KeyboardInterrupt: 

In [None]:
blah