In [1]:
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F

class MLP(nn.Module):
    def __init__(self, d_in, d_out):
        super(MLP, self).__init__()
        self.d_in = d_in

        self.linear1 = nn.Linear(d_in, 1024)
        self.linear2 = nn.Linear(1024, 256)
        self.linear3 = nn.Linear(256,64)
        self.linear4 = nn.Linear(64,32)
        self.linear5 = nn.Linear(32, d_out)

    def forward(self, X):
        X = X.view(-1, self.d_in)
        X = self.linear1(X)
        X = self.linear2(X)
        X = self.linear3(X)
        X = self.linear4(X)
        return F.relu(self.linear5(X))

# input numpy arrays: 	in_vectors_train, out_vectors_train

In [2]:
import json
from config import Environment as env, VCSLAM

model = VCSLAM()
with open(model.PARSED_MODELS_PATH, 'r') as file:
    models = json.load(file)

print('# of models', len(models))



# of models 101


In [3]:
from preprocess.steiner_tree import get_mapped_attributes, get_anchored_target_nodes

index = 1
mod = models[index]
mapped_nodes = get_mapped_attributes(index=index,model=model)
anchored_nodes = get_anchored_target_nodes(mod,mapped_nodes)
anchored_nodes

{'http://tmdtkg#agent': ['http://tmdtkg#ticket', 'http://tmdtkg#business'],
 'http://tmdtkg#geoposition': ['http://tmdtkg#latitude',
  'http://tmdtkg#longitude']}

In [4]:
from util.parse import generate_dictionaries, generate_id_dict, encode_triples
from math import floor
import random

test_triples = random.sample(triples, floor(len(triples) / 10))

classes, predicates = generate_dictionaries(triples)
classes_mapping = generate_id_dict(classes)
predicates_mapping = generate_id_dict(predicates)
train_triples = triples


print("training triples size", len(train_triples))

train_data = encode_triples(train_triples, classes_mapping, predicates_mapping)
valid_data = encode_triples(test_triples, classes_mapping, predicates_mapping)

if env.LOG_LEVEL >= 10:
    print("Train data size: ", train_data.shape)
    print("Valid data size: ", valid_data.shape)

in_vectors_train = []
out_vectors_train = []

2022-05-02 08:51:59,301 [INFO] - PRE 10 triples
(('http://schema.org/Offer', 'http://schema.org/name', 'http://www.w3.org/2001/XMLSchema#string'), 18330)
(('http://schema.org/Offer', 'http://schema.org/price', 'http://www.w3.org/2001/XMLSchema#string'), 16230)
(('http://schema.org/CreativeWork', 'http://schema.org/name', 'http://www.w3.org/2001/XMLSchema#string'), 12255)
(('http://schema.org/Offer', 'http://schema.org/availableAtOrFrom', 'http://schema.org/Place'), 11253)
(('http://schema.org/Offer', 'http://schema.org/description', 'http://www.w3.org/2001/XMLSchema#string'), 11190)
(('http://schema.org/CreativeWork', 'http://schema.org/copyrightYear', 'http://www.w3.org/2001/XMLSchema#string'), 10845)
(('http://schema.org/Offer', 'http://schema.org/mainEntityOfPage', 'http://schema.org/CreativeWork'), 10353)
(('http://schema.org/Offer', 'http://schema.org/seller', 'http://schema.dig.isi.edu/ontology/PersonOrOrganization'), 8616)
(('http://schema.org/Offer', 'http://schema.dig.isi.edu/

training triples size 148886
Train data size:  (148886, 3)
Valid data size:  (14888, 3)


In [5]:

for s,p,o in train_data:
    subject_v = torch.zeros(len(classes_mapping))
    subject_v[s] = 1
    object_v = torch.zeros(len(classes_mapping))
    object_v[o] = 1
    combined_v = torch.cat([subject_v,object_v])
    in_vectors_train.append(combined_v)
    relation_v = torch.zeros(len(classes_mapping))
    relation_v[p] = 1
    out_vectors_train.append(relation_v)


D_in = in_vectors_train[0].shape[0]
D_out = out_vectors_train[0].shape[0]

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

Using cpu device


In [7]:
def model_loss_ce(m, data, dev):
  losses = []
  for s,p,o in data:
      subject_v = torch.zeros(len(classes_mapping))
      subject_v[s] = 1
      object_v = torch.zeros(len(classes_mapping))
      object_v[o] = 1
      combined_v = torch.cat([subject_v,object_v])

      out = m(combined_v.to(dev))
      out = out.detach().cpu()
      loss = nn.CrossEntropyLoss()(out.view(1,-1), torch.LongTensor([p]))
      losses.append(loss.detach().numpy().flat[0])

  return np.mean(losses)

In [8]:
import time

model = MLP(D_in, D_out).to(torch.device(device))

for epoch in range(0, 1):
    loss = -100
    start = time.time()
    for i in range(0, int(len(in_vectors_train))):
        input_vector = in_vectors_train[i]
        output_vector = out_vectors_train[i]

        optimizer.zero_grad()
        output = model(input_vector)
        loss = loss_function(output.view(1,-1), output_vector.nonzero().view(1))
        loss.backward()
        optimizer.step()
    print(epoch, model_loss_ce(model,valid_data,device), time.time() - start)

0 3.838979 2300.1545646190643
