# RDFS NN

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.callbacks import TensorBoard, ModelCheckpoint
from keras.layers import Input, Dense, Dropout, Bidirectional, GRU, RepeatVector, TimeDistributed
from keras.models import Model

Using TensorFlow backend.


In [2]:
import tensorflow as tf

if tf.test.gpu_device_name():

    print('Default GPU Device:{}'.format(tf.test.gpu_device_name()))

else:

   print("Please install GPU version of TF")

Default GPU Device:/device:GPU:0


In [4]:
col_names=["subject", "predicate", "object"]
df = pd.read_csv("./data/jupyter/all_lubm.nt", delimiter=' ', names=col_names)
df['object'] = df['object'].apply(lambda x: x.rstrip('.'))

In [5]:
df.describe()

Unnamed: 0,subject,predicate,object
count,100573,100573,100573
unique,17189,18,13948
top,<http://www.Department0.University0.edu/FullPr...,<http://swat.cse.lehigh.edu/onto/univ-bench.ow...,xxx-xxx-xxxx
freq,14,21489,8330


In [7]:
subjects = df["subject"].unique()
predicates = df["predicate"].unique()
objects = df["object"].unique()

In [8]:
df_encoded = df.copy()

In [9]:
subject_to_id = {subject: i for i, subject in enumerate(subjects)}
predicate_to_id = {predicate: i for i, predicate in enumerate(predicates)}
object_to_id = {object: i for i, object in enumerate(objects)}

In [10]:
df_encoded["subject"] = df["subject"].map(subject_to_id)
df_encoded["predicate"] = df["predicate"].map(predicate_to_id)
df_encoded["object"] = df["object"].map(object_to_id)

In [11]:
df_encoded

Unnamed: 0,subject,predicate,object
0,0,0,0
1,0,1,1
2,0,1,2
3,0,2,3
4,0,3,4
5,0,4,5
6,0,5,6
7,1,3,7
8,1,6,8
9,1,5,9


In [23]:
import pandas as pd
import numpy as np
from keras.models import Model
from keras.layers import Input, LSTM, Dense
from sklearn.model_selection import train_test_split

def create_dataframe_translation_model_LSTM(df):
    input_texts = df['subject'].str.cat(df['predicate'], sep=' ')
    target_texts = df['object']

    # Create input and target character index mappings
    input_characters = sorted(set(' '.join(input_texts)))
    target_characters = sorted(set(' '.join(target_texts)))
    num_encoder_tokens = len(input_characters)
    num_decoder_tokens = len(target_characters)

    input_token_index = dict([(char, i) for i, char in enumerate(input_characters)])
    target_token_index = dict([(char, i) for i, char in enumerate(target_characters)])

    # Encode input and target sequences as one-hot vectors
    max_encoder_seq_length = max([len(txt) for txt in input_texts])
    max_decoder_seq_length = max([len(txt) for txt in target_texts])

    encoder_input_data = np.zeros((len(input_texts), max_encoder_seq_length, num_encoder_tokens), dtype='float32')
    decoder_input_data = np.zeros((len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype='float32')
    decoder_target_data = np.zeros((len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype='float32')

    for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
        for t, char in enumerate(input_text):
            encoder_input_data[i, t, input_token_index[char]] = 1.
        for t, char in enumerate(target_text):
            decoder_input_data[i, t, target_token_index[char]] = 1.
            if t > 0:
                decoder_target_data[i, t - 1, target_token_index[char]] = 1.

    # Split data into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(encoder_input_data, decoder_input_data, test_size=0.2, random_state=42)

    # Define and compile the NMT model
    latent_dim = 256
    encoder_inputs = Input(shape=(None, num_encoder_tokens))
    encoder = LSTM(latent_dim, return_state=True)
    encoder_outputs, state_h, state_c = encoder(encoder_inputs)
    encoder_states = [state_h, state_c]

    decoder_inputs = Input(shape=(None, num_decoder_tokens))
    decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
    decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
    decoder_dense = Dense(num_decoder_tokens, activation='softmax')
    decoder_outputs = decoder_dense(decoder_outputs)

    model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
    model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

    # Train the NMT model
    batch_size = 64
    epochs = 50
    history = model.fit([X_train, y_train], y_train, batch_size=batch_size, epochs=epochs, validation_data=([X_val, y_val], y_val))

    return model

In [26]:
import pandas as pd
import numpy as np
from keras.models import Model
from keras.layers import Input, Dense, Dropout, GRU, Bidirectional, RepeatVector, TimeDistributed
from sklearn.model_selection import train_test_split

def create_dataframe_translation_model_GRU(df):
    # Create input and target text sequences
    input_texts = df.apply(lambda row: row['subject'] + ' ' + row['predicate'], axis=1)
    target_texts = df['object']

    # Create input and target character index mappings
    input_characters = sorted(set(' '.join(input_texts)))
    target_characters = sorted(set(' '.join(target_texts)))
    num_encoder_tokens = len(input_characters)
    num_decoder_tokens = len(target_characters)

    input_token_index = dict([(char, i) for i, char in enumerate(input_characters)])
    target_token_index = dict([(char, i) for i, char in enumerate(target_characters)])

    # Encode input and target sequences as one-hot vectors
    max_encoder_seq_length = max([len(txt) for txt in input_texts])
    max_decoder_seq_length = max([len(txt) for txt in target_texts])

    encoder_input_data = np.zeros((len(input_texts), max_encoder_seq_length, num_encoder_tokens), dtype='float32')
    decoder_input_data = np.zeros((len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype='float32')
    decoder_target_data = np.zeros((len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype='float32')

    for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
        for t, char in enumerate(input_text):
            encoder_input_data[i, t, input_token_index[char]] = 1.
        for t, char in enumerate(target_text):
            decoder_input_data[i, t, target_token_index[char]] = 1.
            if t > 0:
                decoder_target_data[i, t - 1, target_token_index[char]] = 1.

    # Split data into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(encoder_input_data, decoder_input_data, test_size=0.2, random_state=42)

    # Define and compile the NMT model with GRU layers
    latent_dim = 256
    encoder_inputs = Input(shape=(None, num_encoder_tokens))
    encoder = Bidirectional(GRU(latent_dim, name="gru_sequence_encoder"), name='bidirectional')
    encoder_outputs = encoder(encoder_inputs)

    decoder_inputs = Input(shape=(None, num_decoder_tokens))
    decoder = GRU(latent_dim * 2, return_sequences=True, name='sequence_decoder')
    decoder_outputs = decoder(decoder_inputs, initial_state=encoder_outputs)
    decoder_dense = Dense(num_decoder_tokens, activation='softmax')
    decoder_outputs = decoder_dense(decoder_outputs)

    model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
    model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

    # Train the NMT model
    batch_size = 128
    epochs = 1
    history = model.fit([X_train, y_train[:, :-1, :]], y_train[:, 1:, :], batch_size=batch_size, epochs=epochs, validation_data=([X_val, y_val[:, :-1, :]], y_val[:, 1:, :]))

    return model

In [27]:
model = create_dataframe_translation_model_GRU(df)

Train on 80458 samples, validate on 20115 samples
Epoch 1/1


In [39]:
df_train, df_test = train_test_split(df, test_size=0.2, random_state=42)

In [40]:
# Define the input and output dimensions of the model
input_dim = 100
output_dim = 50

latent_dim=256
batch_size=64
epochs=1

# Define the number of hidden units in the model
hidden_units = 256

In [41]:
# Initialize the GraphTranslationModel object
# model = GraphTranslationModel(df_train, latent_dim=latent_dim, batch_size=batch_size, epochs=epochs)

TypeError: Tensors in list passed to 'values' of 'Pack' Op have types [int32, <NOT CONVERTIBLE TO TENSOR>, int32] that don't all match.

In [None]:
translations = model.translate(df_test)