# NMT seq2seq English to Cypher

## Import packages

In [12]:
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import unicodedata
import re
import numpy as np
import os
import io
import time
import datetime

Print TensorFlow version

In [13]:
print(tf.__version__)
!pip install -q -U --user tb-nightly

2.0.0-dev20190513


In [14]:
# Download the file
ENGLISH_TXT_PATH = 'data/questions/english.txt'
CYPHER_TXT_PATH = 'data/questions/cypher.txt'

In [15]:
# Converts unicode file to ascii

def unicode_to_ascii(s):
    return ''.join(c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn')

def preprocess_sentence_english(w):
    w = unicode_to_ascii(w.lower().strip())
    
    #create space between a word and the following punctuation
    w = re.sub(r"([?.!,¿])", r" \1 ", w)
    w = re.sub(r'[" "]+', " ", w)
    
    # replacing everything with space except (a-z, A-Z, ".", "?", "!", ",")
    w = re.sub(r"[^a-zA-Z?.!,¿{}[]():->]+", " ", w)

    w = w.rstrip().strip()

    # adding a start and an end token to each sentence so that the model know when to start and stop predicting.
    w = '<start> ' + w + ' <end>'
    return w

def preprocess_sentence_cypher(w):
    w = unicode_to_ascii(w.lower().strip())
    
    #create space between a word and the following punctuation
    w = re.sub(r"([?;!,¿])", r" \1 ", w)
    w = re.sub(r'[" "]+', " ", w)
    
    # replacing everything with space except (a-z, A-Z, ".", "?", "!", ",")
    w = re.sub(r"[^a-zA-Z?.!,¿{}[]():->]+", " ", w)

    w = w.rstrip().strip()

    # adding a start and an end token to each sentence so that the model know when to start and stop predicting.
    w = '<start> ' + w + ' <end>'
    return w
    

In [16]:
cypher_sentence = u"MATCH (c:crop {name:'Schima wallichii'})-[:HAS]->(description:description) RETURN description.life_form;"

print(preprocess_sentence_cypher(cypher_sentence).encode('utf-8'))

b"<start> match (c:crop {name:'schima wallichii'})-[:has]->(description:description) return description.life_form ; <end>"


In [17]:
path_to_zip = tf.keras.utils.get_file(
    'spa-eng.zip', origin='http://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip',
    extract=True)

path_to_file = os.path.dirname(path_to_zip)+"/spa-eng/spa.txt"

In [18]:

# Clean each english and cypher sentence
# Return word pairs in format: [ENGLISH, SPANISH]

def create_dataset(ENGLISH_TXT_PATH, CYPHER_TXT_PATH):
   
    english = []
    cypher = []
    
    with open(ENGLISH_TXT_PATH) as infile:
        for line in infile:
            if line:
                processed_line = preprocess_sentence_english(line)
                english.append(processed_line)
            
    with open(CYPHER_TXT_PATH) as infile:
        for line in infile:
            if line:
                processed_line = preprocess_sentence_cypher(line)
                cypher.append(processed_line)
            
#     with open(ENGLISH_TXT_PATH) as fe:
#         line = fe.readline()
#         preprocess_line = preprocess_sentence_english(line)
        
#         english.append(preprocess_line)
#         while line:
#             line = fe.readline()
#             preprocess_line = preprocess_sentence_english(line)
#             english.append(preprocess_line)
            
#     with open(CYPHER_TXT_PATH) as fc:
#         line = fc.readline()
#         preprocess_line = preprocess_sentence_cypher(line)
        
#         cypher.append(preprocess_line)
#         while line:
#             line = fc.readline()
#             preprocess_line = preprocess_sentence_cypher(line)
#             cypher.append(preprocess_line)
#     del english[-1]
    #cypher = cypher[:-1]
    return cypher, english

In [19]:
cypher, english = create_dataset(ENGLISH_TXT_PATH, CYPHER_TXT_PATH)

In [20]:
print(english[-2])
print(cypher[-2])

<start> describe the habit of madagascar almond . <end>
<start> match (a:crop_alias {name: 'madagascar almond'})-[:is_alias_of]->(c:crop) match (c)-[:has]->(description:description) return a.name , c.name , description.habit ; <end>


In [21]:
print(len(english))
print(len(cypher))

470640
470640


In [22]:
def max_length(tensor):
    return max(len(t) for t in tensor)

In [23]:
def tokenize(lang):
    lang_tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='')
    lang_tokenizer.fit_on_texts(lang)
    tensor = lang_tokenizer.texts_to_sequences(lang)
    tensor = tf.keras.preprocessing.sequence.pad_sequences(tensor, padding='post')
    return tensor, lang_tokenizer

In [24]:
def load_dataset(ENGLISH_TXT_PATH, CYPHER_TXT_PATH):
    # creating cleaned input, output pairs
    targ_lang, inp_lang = create_dataset(ENGLISH_TXT_PATH, CYPHER_TXT_PATH)

    input_tensor, inp_lang_tokenizer = tokenize(inp_lang)
    target_tensor, targ_lang_tokenizer = tokenize(targ_lang)

    return input_tensor, target_tensor, inp_lang_tokenizer, targ_lang_tokenizer

## Load dataset

In [25]:
input_tensor, target_tensor, inp_lang, targ_lang = load_dataset(ENGLISH_TXT_PATH, CYPHER_TXT_PATH)

# Calculate maximum of length of the target tensors
max_length_targ, max_length_inp = max_length(target_tensor), max_length(input_tensor)

# Creating training and validation sets using an 80-20 split
input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val = train_test_split(input_tensor, target_tensor, test_size=0.2)


In [26]:
# Show length
len(input_tensor_train), len(target_tensor_train), len(input_tensor_val), len(target_tensor_val)

(376512, 376512, 94128, 94128)

In [27]:
def convert(lang, tensor):
    for t in tensor:
        if t!=0:
            print ("%d ----> %s" % (t, lang.index_word[t]))

In [28]:
print ("Input Language; index to word mapping")
convert(inp_lang, input_tensor_train[0])
print ()
print ("Target Language; index to word mapping")
convert(targ_lang, target_tensor_train[0])

Input Language; index to word mapping
1 ----> <start>
17 ----> give
9 ----> me
13 ----> a
34 ----> comprehensive
12 ----> description
3 ----> about
69 ----> common
1046 ----> horehound
2 ----> <end>

Target Language; index to word mapping
4 ----> <start>
2 ----> match
6 ----> (a:crop_alias
7 ----> {name:
39 ----> 'common
1554 ----> horehound'})-[:is_alias_of]->(c:crop)
2 ----> match
11 ----> (c)-[:has]->(ecology)
2 ----> match
15 ----> (c)-[:has]->(description:description)
2 ----> match
13 ----> (ecology)-[:grows_in]->(optimal)
2 ----> match
12 ----> (ecology)-[:grows_in]->(absolute)
2 ----> match
19 ----> (optimal)-[:consist_of]->(soil_ph_optimal:soil_ph_optimal)
2 ----> match
17 ----> (absolute)-[:consist_of]->(temperature:temperature_required_optimal)
8 ----> return
9 ----> a.name
1 ----> ,
10 ----> c.name
1 ----> ,
20 ----> description.habit
1 ----> ,
18 ----> description.life_form
1 ----> ,
21 ----> description.physiology
1 ----> ,
23 ----> soil_ph_optimal.max
1 ----> ,
22 ----> s

In [29]:
BUFFER_SIZE = len(input_tensor_train)
BATCH_SIZE = 64
steps_per_epoch = len(input_tensor_train)//BATCH_SIZE
embedding_dim = 256
units = 1024
vocab_inp_size = len(inp_lang.word_index)+1
vocab_tar_size = len(targ_lang.word_index)+1

dataset = tf.data.Dataset.from_tensor_slices((input_tensor_train, target_tensor_train)).shuffle(BUFFER_SIZE)
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)

In [30]:
example_input_batch, example_target_batch = next(iter(dataset))
example_input_batch.shape, example_target_batch.shape

(TensorShape([64, 21]), TensorShape([64, 42]))

In [31]:
class Encoder(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz):
        super(Encoder, self).__init__()
        self.batch_sz = batch_sz
        self.enc_units = enc_units
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(self.enc_units,
                                       return_sequences=True,
                                       return_state=True,
                                       recurrent_initializer='glorot_uniform')

    def call(self, x, hidden):
        x = self.embedding(x)
        output, state = self.gru(x, initial_state = hidden)
        return output, state

    def initialize_hidden_state(self):
        return tf.zeros((self.batch_sz, self.enc_units))

In [32]:
encoder = Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE)

# sample input
sample_hidden = encoder.initialize_hidden_state()
sample_output, sample_hidden = encoder(example_input_batch, sample_hidden)
print ('Encoder output shape: (batch size, sequence length, units) {}'.format(sample_output.shape))
print ('Encoder Hidden state shape: (batch size, units) {}'.format(sample_hidden.shape))

Encoder output shape: (batch size, sequence length, units) (64, 21, 1024)
Encoder Hidden state shape: (batch size, units) (64, 1024)


In [33]:
class BahdanauAttention(tf.keras.Model):
    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = tf.keras.layers.Dense(units)
        self.W2 = tf.keras.layers.Dense(units)
        self.V = tf.keras.layers.Dense(1)

    def call(self, query, values):
        # hidden shape == (batch_size, hidden size)
        # hidden_with_time_axis shape == (batch_size, 1, hidden size)
        # we are doing this to perform addition to calculate the score
        hidden_with_time_axis = tf.expand_dims(query, 1)

        # score shape == (batch_size, max_length, hidden_size)
        score = self.V(tf.nn.tanh(
            self.W1(values) + self.W2(hidden_with_time_axis)))

        # attention_weights shape == (batch_size, max_length, 1)
        # we get 1 at the last axis because we are applying score to self.V
        attention_weights = tf.nn.softmax(score, axis=1)

        # context_vector shape after sum == (batch_size, hidden_size)
        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)

        return context_vector, attention_weights

In [34]:
attention_layer = BahdanauAttention(10)
attention_result, attention_weights = attention_layer(sample_hidden, sample_output)

print("Attention result shape: (batch size, units) {}".format(attention_result.shape))
print("Attention weights shape: (batch_size, sequence_length, 1) {}".format(attention_weights.shape))

Attention result shape: (batch size, units) (64, 1024)
Attention weights shape: (batch_size, sequence_length, 1) (64, 21, 1)


In [35]:
class Decoder(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz):
        super(Decoder, self).__init__()
        self.batch_sz = batch_sz
        self.dec_units = dec_units
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(self.dec_units,
                                       return_sequences=True,
                                       return_state=True,
                                       recurrent_initializer='glorot_uniform')
        self.fc = tf.keras.layers.Dense(vocab_size)

        # used for attention
        self.attention = BahdanauAttention(self.dec_units)

    def call(self, x, hidden, enc_output):
        # enc_output shape == (batch_size, max_length, hidden_size)
        context_vector, attention_weights = self.attention(hidden, enc_output)

        # x shape after passing through embedding == (batch_size, 1, embedding_dim)
        x = self.embedding(x)

        # x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
        x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)

        # passing the concatenated vector to the GRU
        output, state = self.gru(x)

        # output shape == (batch_size * 1, hidden_size)
        output = tf.reshape(output, (-1, output.shape[2]))

        # output shape == (batch_size, vocab)
        x = self.fc(output)

        return x, state, attention_weights

In [36]:
decoder = Decoder(vocab_tar_size, embedding_dim, units, BATCH_SIZE)

sample_decoder_output, _, _ = decoder(tf.random.uniform((64, 1)),
                                      sample_hidden, sample_output)

print ('Decoder output shape: (batch_size, vocab size) {}'.format(sample_decoder_output.shape))

Decoder output shape: (batch_size, vocab size) (64, 12278)


In [37]:
optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=True, reduction='none')

def loss_function(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)

    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask

    return tf.reduce_mean(loss_)

In [38]:
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                 encoder=encoder,
                                 decoder=decoder)

current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
train_log_dir = 'logs/gradient_tape/' + current_time + '/train'
test_log_dir = 'logs/gradient_tape/' + current_time + '/test'
train_summary_writer = tf.summary.create_file_writer(train_log_dir)
test_summary_writer = tf.summary.create_file_writer(test_log_dir)

In [39]:
@tf.function
def train_step(inp, targ, enc_hidden):
    loss = 0

    with tf.GradientTape() as tape:
        enc_output, enc_hidden = encoder(inp, enc_hidden)

        dec_hidden = enc_hidden

        dec_input = tf.expand_dims([targ_lang.word_index['<start>']] * BATCH_SIZE, 1)

        # Teacher forcing - feeding the target as the next input
        for t in range(1, targ.shape[1]):
            # passing enc_output to the decoder
            predictions, dec_hidden, _ = decoder(dec_input, dec_hidden, enc_output)

            loss += loss_function(targ[:, t], predictions)

            # using teacher forcing
            dec_input = tf.expand_dims(targ[:, t], 1)

    batch_loss = (loss / int(targ.shape[1]))

    variables = encoder.trainable_variables + decoder.trainable_variables

    gradients = tape.gradient(loss, variables)

    optimizer.apply_gradients(zip(gradients, variables))

    return batch_loss

In [29]:
EPOCHS = 10

for epoch in range(EPOCHS):
    start = time.time()
    enc_hidden = encoder.initialize_hidden_state()
    total_loss = 0

    for (batch, (inp, targ)) in enumerate(dataset.take(steps_per_epoch)):
        batch_loss = train_step(inp, targ, enc_hidden)
        total_loss += batch_loss
        
#         tf.summary.scalar('batch_loss', batch_loss.numpy(), step=epoch)


        if batch % 100 == 0:
            print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1,
                                                     batch,
                                                     batch_loss.numpy()))
    # saving (checkpoint) the model every 2 epochs
    if (epoch + 1) % 1 == 0:
        checkpoint.save(file_prefix = checkpoint_prefix)

    print('Epoch {} Loss {:.4f}'.format(epoch + 1,
                                      total_loss / steps_per_epoch))
    print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))

Epoch 1 Batch 0 Loss 6.0176
Epoch 1 Batch 100 Loss 1.1830
Epoch 1 Batch 200 Loss 0.8816
Epoch 1 Batch 300 Loss 0.6476
Epoch 1 Batch 400 Loss 0.4990
Epoch 1 Batch 500 Loss 0.4205
Epoch 1 Batch 600 Loss 0.3549
Epoch 1 Batch 700 Loss 0.3588
Epoch 1 Batch 800 Loss 0.4750
Epoch 1 Batch 900 Loss 0.3923
Epoch 1 Batch 1000 Loss 0.3332
Epoch 1 Batch 1100 Loss 0.3218
Epoch 1 Batch 1200 Loss 0.3481
Epoch 1 Batch 1300 Loss 0.3041
Epoch 1 Batch 1400 Loss 0.3299
Epoch 1 Batch 1500 Loss 0.2966
Epoch 1 Batch 1600 Loss 0.3020
Epoch 1 Batch 1700 Loss 0.3081
Epoch 1 Batch 1800 Loss 0.3143
Epoch 1 Batch 1900 Loss 0.3153
Epoch 1 Batch 2000 Loss 0.3032
Epoch 1 Batch 2100 Loss 0.3081
Epoch 1 Batch 2200 Loss 0.2861
Epoch 1 Batch 2300 Loss 0.2908
Epoch 1 Batch 2400 Loss 0.2678
Epoch 1 Batch 2500 Loss 0.2730
Epoch 1 Batch 2600 Loss 0.2559
Epoch 1 Batch 2700 Loss 0.2756
Epoch 1 Batch 2800 Loss 0.2874
Epoch 1 Batch 2900 Loss 0.2632
Epoch 1 Batch 3000 Loss 0.2668
Epoch 1 Batch 3100 Loss 0.2591
Epoch 1 Batch 3200 L

Epoch 5 Batch 2200 Loss 0.0000
Epoch 5 Batch 2300 Loss 0.0000
Epoch 5 Batch 2400 Loss 0.0000
Epoch 5 Batch 2500 Loss 0.0000
Epoch 5 Batch 2600 Loss 0.0000
Epoch 5 Batch 2700 Loss 0.0000
Epoch 5 Batch 2800 Loss 0.0000
Epoch 5 Batch 2900 Loss 0.0000
Epoch 5 Batch 3000 Loss 0.0000
Epoch 5 Batch 3100 Loss 0.0000
Epoch 5 Batch 3200 Loss 0.0000
Epoch 5 Batch 3300 Loss 0.0000
Epoch 5 Batch 3400 Loss 0.0000
Epoch 5 Batch 3500 Loss 0.0000
Epoch 5 Batch 3600 Loss 0.0000
Epoch 5 Batch 3700 Loss 0.0000
Epoch 5 Batch 3800 Loss 0.0000
Epoch 5 Batch 3900 Loss 0.0000
Epoch 5 Batch 4000 Loss 0.0135
Epoch 5 Batch 4100 Loss 0.0013
Epoch 5 Batch 4200 Loss 0.0010
Epoch 5 Batch 4300 Loss 0.0002
Epoch 5 Batch 4400 Loss 0.0008
Epoch 5 Batch 4500 Loss 0.0008
Epoch 5 Batch 4600 Loss 0.0006
Epoch 5 Batch 4700 Loss 0.0002
Epoch 5 Batch 4800 Loss 0.0006
Epoch 5 Batch 4900 Loss 0.0001
Epoch 5 Batch 5000 Loss 0.0001
Epoch 5 Batch 5100 Loss 0.0001
Epoch 5 Batch 5200 Loss 0.0001
Epoch 5 Batch 5300 Loss 0.0003
Epoch 5 

Epoch 9 Batch 4400 Loss 0.0000
Epoch 9 Batch 4500 Loss 0.0000
Epoch 9 Batch 4600 Loss 0.0000
Epoch 9 Batch 4700 Loss 0.0000
Epoch 9 Batch 4800 Loss 0.0000
Epoch 9 Batch 4900 Loss 0.0000
Epoch 9 Batch 5000 Loss 0.0000
Epoch 9 Batch 5100 Loss 0.0000
Epoch 9 Batch 5200 Loss 0.0000
Epoch 9 Batch 5300 Loss 0.0000
Epoch 9 Batch 5400 Loss 0.0000
Epoch 9 Batch 5500 Loss 0.0000
Epoch 9 Batch 5600 Loss 0.0000
Epoch 9 Batch 5700 Loss 0.0000
Epoch 9 Batch 5800 Loss 0.0000
Epoch 9 Loss 0.0000
Time taken for 1 epoch 1359.4686961174011 sec

Epoch 10 Batch 0 Loss 0.0000
Epoch 10 Batch 100 Loss 0.0000
Epoch 10 Batch 200 Loss 0.0000
Epoch 10 Batch 300 Loss 0.0000
Epoch 10 Batch 400 Loss 0.0000
Epoch 10 Batch 500 Loss 0.0000
Epoch 10 Batch 600 Loss 0.0000
Epoch 10 Batch 700 Loss 0.0000
Epoch 10 Batch 800 Loss 0.0000
Epoch 10 Batch 900 Loss 0.0313
Epoch 10 Batch 1000 Loss 0.0035
Epoch 10 Batch 1100 Loss 0.0009
Epoch 10 Batch 1200 Loss 0.0019
Epoch 10 Batch 1300 Loss 0.0002
Epoch 10 Batch 1400 Loss 0.0000


In [40]:
def evaluate(sentence):
    attention_plot = np.zeros((max_length_targ, max_length_inp))

    sentence = preprocess_sentence_english(sentence)

    inputs = [inp_lang.word_index[i] for i in sentence.split(' ')]
    inputs = tf.keras.preprocessing.sequence.pad_sequences([inputs],
                                                           maxlen=max_length_inp,
                                                           padding='post')
    inputs = tf.convert_to_tensor(inputs)

    result = ''

    hidden = [tf.zeros((1, units))]
    enc_out, enc_hidden = encoder(inputs, hidden)

    dec_hidden = enc_hidden
    dec_input = tf.expand_dims([targ_lang.word_index['<start>']], 0)

    for t in range(max_length_targ):
        predictions, dec_hidden, attention_weights = decoder(dec_input,
                                                             dec_hidden,
                                                             enc_out)

        # storing the attention weights to plot later on
        attention_weights = tf.reshape(attention_weights, (-1, ))
        attention_plot[t] = attention_weights.numpy()

        predicted_id = tf.argmax(predictions[0]).numpy()

        result += targ_lang.index_word[predicted_id] + ' '

        if targ_lang.index_word[predicted_id] == '<end>':
            return result, sentence, attention_plot

        # the predicted ID is fed back into the model
        dec_input = tf.expand_dims([predicted_id], 0)

    return result, sentence, attention_plot

In [41]:
# function for plotting the attention weights
def plot_attention(attention, sentence, predicted_sentence):
    fig = plt.figure(figsize=(10,10))
    ax = fig.add_subplot(1, 1, 1)
    ax.matshow(attention, cmap='viridis')

    fontdict = {'fontsize': 14}

    ax.set_xticklabels([''] + sentence, fontdict=fontdict, rotation=90)
    ax.set_yticklabels([''] + predicted_sentence, fontdict=fontdict)

    plt.show()

In [42]:
from py2neo import Graph
graph = Graph(password="farmers@heart")

In [43]:
def translate(sentence):
    result, sentence, attention_plot = evaluate(sentence)
    
#     print('Input: %s' % (sentence))
#     print('Predicted translation: {}'.format(result))
    
    clean_sentence = result.replace('<start>', '')
    clean_sentence = result.replace('<end>', '')
#     print(clean_sentence)

#     attention_plot = attention_plot[:len(result.split(' ')), :len(sentence.split(' '))]
#     plot_attention(attention_plot, sentence.split(' '), result.split(' '))
    
    print(graph.run(clean_sentence).to_table())

In [44]:
# # restoring the latest checkpoint in checkpoint_dir
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f29c2675fd0>

In [46]:
translate(u'What is the scientific name of mango?')

 a.name | c.name           
--------|------------------
 mango  | mangifera indica 



In [36]:
print(english[1050])
print(cypher[1050])

<start> what is the minimum temperature to grow muli ? <end>
<start> match (a:crop_alias {name: 'muli'})-[:is_alias_of]->(c:crop) match (c)-[:has]->(ecology) match (ecology)-[:grows_in]->(absolute) match (absolute)-[:consist_of]->(temperature:temperature_required_optimal) return a.name , c.name , temperature.min ; <end>


In [37]:
graph.run("match (c:crop {name:'hippophae rhamnoides'})-[:has]->(ecology) match (ecology)-[:grows_in]->(optimal) match (optimal)-[:consist_of]->(temperature:temperature_required_optimal) return temperature.min , temperature.max ;").to_table()

temperature.min,temperature.max
8.0,20.0


In [38]:
print(english[15000])
print(cypher[15000])

<start> describe the life form of dakhar . <end>
<start> match (a:crop_alias {name: 'dakhar'})-[:is_alias_of]->(c:crop) match (c)-[:has]->(description:description) return a.name , c.name , description.life_form ; <end>
