## Import Libraries

In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [None]:
!pip install SPARQLWrapper

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
[0m

In [90]:
import os
import json
import ast
import pandas as pd
import numpy as np
from sklearn import metrics
from sklearn.model_selection import train_test_split
from SPARQLWrapper import SPARQLWrapper, JSON

from transformers import AutoTokenizer, AutoModel
from transformers import DataCollatorWithPadding

import torch
import torch.nn.functional as F
import torch.nn as nn

from torch.utils.data import Dataset, DataLoader, Subset
from torch.nn.utils.rnn import pad_sequence

# from torchsummary import summary

In [91]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(DEVICE)

cpu


## Download Model

In [92]:
tokenizer_class = AutoTokenizer.from_pretrained("indolem/indobert-base-uncased")
# model_class = AutoModel.from_pretrained("indolem/indobert-base-uncased")

# Data collator is used for padding in batch
data_collator = DataCollatorWithPadding(tokenizer=tokenizer_class)

In [93]:
class BERT_Lexidqa2_class(nn.Module):
    def __init__(self, bert, input_size = 768, output_size = 18):
        super().__init__()

        self.bert = bert

        self.dropout = nn.Dropout(0.1)

        self.relu = nn.ReLU()

        self.linear1 = nn.Linear(input_size, 512)
        self.linear2 = nn.Linear(512, output_size)

        self.id2label = {0: "T1", 1: "T2", 2: "T3", 3: "T4",
              4: "T5", 5: "T6", 6: "T7", 7: "T8", 8: "T9",
              9: "T10", 10: "T11", 11: "T12", 12: "T13",
              13: "T14", 14: "T15", 15: "T16", 16: "T17", 17: "T18"}

    def forward(self, x):
        x, _ = self.bert(x, return_dict=False)

        # Only take the first column, Pooled Output, The Context
        x = x[:, 0, :]
        x = x.view(-1, 768)

        x = self.linear1(x)
        x = self.relu(x)
        x = self.dropout(x)

        x = self.linear2(x)

        return x

    def predict(input):
      output = self.forward(torch.tensor(input['input_ids']).view(1, -1))

      id_max = torch.argmax(output)
      return self.id2label[id_max.item()]

In [94]:
save_path = 'model/class/'
model_class_path = os.path.join(save_path, "indobert_class_tesaurus"+".pth")

bert_class = torch.load(model_class_path)

In [95]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForTokenClassification

tokenizer_ner = AutoTokenizer.from_pretrained("kiipliwooke/KIPBERT")
bert_ner = AutoModelForTokenClassification.from_pretrained("kiipliwooke/KIPBERT")

# Define Utils

## Dataset

In [96]:
class Lexidqa2Dataset(Dataset):
  """LexID QA2 Dataset"""

  def __init__(self, dataframe, tokenizer_class, tokenizer_ner):
    """
      dataframe (Pandas DataFrame): Dataset in DataFrame format
      tokenizer (Tokenizer) : Sentence to Vector
    """
    self.qa_frame = dataframe
    self.tokenizer_class = tokenizer_class
    self.tokenizer_ner = tokenizer_ner

    self.label2id = {"T1": 0, "T2": 1, "T3": 2, "T4": 3,
                "T5": 4, "T6": 5, "T7": 6, "T8": 7, "T9": 8,
                "T10": 9, "T11": 10, "T12": 11, "T13": 12,
                "T14": 13, "T15": 14, "T16": 15, "T17": 16, "T18": 17}
    self.id2label = {0: "T1", 1: "T2", 2: "T3", 3: "T4",
                4: "T5", 5: "T6", 6: "T7", 7: "T8", 8: "T9",
                9: "T10", 10: "T11", 11: "T12", 12: "T13",
                13: "T14", 14: "T15", 15: "T16", 16: "T17", 17: "T18"}

  def __len__(self):
    return len(self.qa_frame)

  def __getitem__(self, idx):
    if torch.is_tensor(idx):
      idx = idx.tolist()

    question = self.qa_frame.loc[idx, "question"]
    question_tokenized_class = tokenizer_class(question, truncation=True)
    question_input_ids_class = torch.tensor(question_tokenized_class['input_ids'])
    question_tokenized_ner = tokenizer_ner(question, truncation=True)
    question_input_ids_ner = torch.tensor(question_tokenized_ner['input_ids'])

    answer = self.qa_frame.loc[idx, "answer"]
    template_label = self.qa_frame.loc[idx, "template_index"]

    template_id = self.one_hot(template_label, self.label2id)

    sample = (question,
              question_input_ids_class,
              question_input_ids_ner,
              answer,
              template_label,
              template_id,
              torch.tensor(idx))

    return sample

  def one_hot(self, label, label2id):
    one_hot_label = torch.zeros(len(label2id))
    one_hot_label[label2id[label]] = 1

    return one_hot_label

## BERT Classification

### BERT + Full Connected Layer + Softmax

In [97]:
class BERT_Lexidqa2_class(nn.Module):
    def __init__(self, bert, input_size = 768, output_size = 18):
        super().__init__()

        self.bert = bert

        self.dropout = nn.Dropout(0.1)

        self.relu = nn.ReLU()

        self.linear1 = nn.Linear(input_size, 512)
        self.linear2 = nn.Linear(512, output_size)

    def forward(self, x):
        x, _ = self.bert(x, return_dict=False)

        # Only take the first column, Pooled Output, The Context
        x = x[:, 0, :]
        x = x.view(-1, 768)

        x = self.linear1(x)
        x = self.relu(x)
        x = self.dropout(x)

        x = self.linear2(x)

        return x

### Load Model - Example

In [None]:
# save_path = 'model/class/'
# model_class_path = os.path.join(save_path, "indobert_class_tesaurus"+".pth")

# bert_class = torch.load(model_class_path, map_location=torch.device(DEVICE))

# bert_class = model_class

In [98]:
bert_class.to(DEVICE)

BERT_Lexidqa2_class(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(31923, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affin

## BERT NER

### Entity Extraction

In [99]:
# prediction in the form: list of number
def extract_entity(prediction, tokens):
  sequence_length = len(tokens)

  current_index = -1
  sentence = ''
  ner = []
  entities = []
  after_punctuation = False

  for index, (entity_token, entity_word) in enumerate(zip(prediction, tokens)):
    if entity_word in ['[CLS]', '[SEP]', '[PAD]']:
      ner.append(-100)
      continue
    if entity_token == 6: # 6 is B-LAW
      ner.append(1)
      if sentence != '':
        entities.append(sentence)
        sentence = ''
      current_index = index
      sentence = entity_word
    elif entity_token == 25:  # 25 is I-LAW
      ner.append(1)
      if index == current_index + 1:
        current_index = index

        if entity_word in ('-', '/') :
          sentence += entity_word
          after_punctuation = True
        elif entity_word == ',':
          sentence += entity_word
        elif after_punctuation:
          sentence += entity_word
          after_punctuation = False
        elif entity_word.startswith("##"):
          sentence += entity_word[2:]
        else:
          sentence += ' ' + entity_word
    else:
      ner.append(0)

  if sentence != '':
    entities.append(sentence)
    sentence = ''

  if len(ner) < sequence_length:
    ner.append([0 for i in range(sequence_length - len(ner))])

  return entities, ner

## SPARQL Query

In [100]:
# T1: Siapa yang menetapkan legal_title?
def template_1(legal_document):
  return '''
  SELECT DISTINCT ?answer
  WHERE {
      <''' + legal_document + '''> lexid-s:hasEnactionOfficial ?Official .
      ?Official rdfs:label ?answer .
  }
  '''

# T2: Kapan legal_title diundangkan?
def template_2(legal_document):
  return '''
  SELECT DISTINCT ?answer
  WHERE
  {
    <''' + legal_document + '''> lexid-s:hasPromulgationDate ?answer .
  }
  '''

# T3: Kapan legal_title ditetapkan?
def template_3(legal_document):
  return '''
  SELECT distinct ?answer
  WHERE
  {
    <''' + legal_document + '''> lexid-s:hasEnactionDate ?answer .
  }
  '''

# T4: Apa pertimbangan dalam membuat legal_title?
def template_4(legal_document):
  return '''
  SELECT distinct (group_concat(?value; separator = ";") as ?answer)
  WHERE
  {
    SELECT distinct ?value
    WHERE
    {
      <''' + legal_document + '''> lexid-s:considers ?value .
    }
    ORDER BY ?value
  }
  '''

# T5: Apa saja dasar hukum dalam membuat legal_title?
def template_5(legal_document):
  return '''
  SELECT distinct (group_concat(?value; separator = ";") as ?answer)
  WHERE
  {
    SELECT distinct ?value
    WHERE
    {
      <''' + legal_document + '''> lexid-s:hasLegalBasis ?LegalBasis .
      ?LegalBasis rdfs:label ?value .
    }
    ORDER BY ?LegalDocumentLabel ?value
  }
  '''

# T6: Apa saja peraturan yang berelasi dengan legal_title?
def template_6(legal_document):
  return '''
  SELECT distinct (group_concat(?value; separator = ";") as ?answer)
  WHERE
  {
    SELECT distinct (coalesce(?label, ?ans) as ?value)
    WHERE
    {
      <''' + legal_document + '''> lexid-s:hasLegalBasis | lexid-s:implements | lexid-s:amends | lexid-s:repeals ?ans ;
                   rdfs:label ?LegalDocumentLabel .
      ?ans a lexid-s:LegalDocument .
      OPTIONAL {
          ?ans rdfs:label ?label .
      }
    }
    ORDER BY ?LegalDocumentLabel ?value
  }
  '''

# T7: Peraturan manakah yang diubah oleh legal_title?
def template_7(legal_document):
  return '''
  SELECT distinct (group_concat(?value; separator = ";") as ?answer)
  WHERE
  {
    SELECT distinct (coalesce(?label , ?ans) as ?value)
    WHERE {
        <''' + legal_document + '''> a lexid-s:LegalDocument ;
                      rdfs:label ?LegalDocumentLabel ;
                      lexid-s:amends ?ans .
        ?ans a lexid-s:LegalDocument .
        OPTIONAL {
            ?ans rdfs:label ?label .
        }
    }
    ORDER BY ?value
  }
  '''

# T8: Peraturan manakah yang dicabut oleh legal_title?
def template_8(legal_document):
  return '''
  SELECT distinct (group_concat(?value; separator = ";") as ?answer)
  WHERE
  {
    SELECT distinct (coalesce(?label , ?ans) as ?value)
    WHERE {
        <''' + legal_document + '''> a lexid-s:LegalDocument ;
                      rdfs:label ?LegalDocumentLabel ;
                      lexid-s:repeals ?ans .
        ?ans a lexid-s:LegalDocument .
        OPTIONAL {
            ?ans rdfs:label ?label .
        }
    }
    ORDER BY ?value
  }
  '''

# T9: Apa saja Bab yang dibahas dalam legal_title?
def template_9(legal_document):
  return '''
  SELECT distinct (group_concat(?value; separator = ";") as ?answer)
  WHERE {
      SELECT distinct (concat(?contentLabel, ": ", ?contentName) as ?value)
      WHERE {
          <''' + legal_document + '''> a lexid-s:LegalDocument ;
              rdfs:label ?LegalDocumentLabel ;
              lexid-s:hasContent/lexid-s:hasPart* [
                  a lexid-s:Chapter ;
                  rdfs:label ?contentLabel ;
                  lexid-s:name ?contentName ;
              ] .
      }
      ORDER BY ?value
  }
  '''

# T10: Berapa jumlah Pasal yang diatur dalam legal_title?
def template_10(legal_document):
  return '''
  SELECT (count(*) as ?answer)
  WHERE {
      <''' + legal_document + '''> a lexid-s:LegalDocument ;
                    rdfs:label ?LegalDocumentLabel ;
                    lexid-s:hasContent/lexid-s:hasPart*/a lexid-s:Article .
  }
  '''

# T11: Bagaimana bunyi pasal_num dalam legal_title?
def template_11(legal_document, pasal_num):
  return '''
  SELECT distinct (group_concat(?value; separator = ";") as ?answer)
  WHERE {
      SELECT distinct (concat(coalesce(?sectionName, ""), " ", ?ans) as ?value)
      WHERE {
          <''' + legal_document + '''> a lexid-s:LegalDocument ;
                        rdfs:label ?LegalDocumentLabel ;
                        lexid-s:hasContent/lexid-s:hasPart* ?article .
          ?article a lexid-s:Article ;
                  rdfs:label "''' + pasal_num + '''"^^xsd:string .
          {
              {
                  ?article lexid-s:hasPart [
                      a lexid-s:Section ;
                      lexid-s:name ?sectionName ;
                      dct:description ?ans ;
                  ] .
              }
              UNION
              {
                  ?article dct:description ?ans .
              }
          }
      }
      ORDER BY ?value
  }
  '''

# T12: Bagaimana bunyi pasal_num ayat_num dalam legal_title?
def template_12(legal_document, pasal_num, ayat_num):
  return '''
  SELECT distinct ?answer
  WHERE {
      <''' + legal_document + '''> a lexid-s:LegalDocument ;
      rdfs:label ?LegalDocumentLabel ;
      lexid-s:hasContent/lexid-s:hasPart* [
          a lexid-s:Article ;
          rdfs:label \"''' + pasal_num + '''\"^^xsd:string ;
          lexid-s:hasPart [
              a lexid-s:Section ;
              rdfs:label \"''' + ayat_num + '''\"^^xsd:string ;
              dct:description ?answer ;
          ] ;
      ] .
  }
  '''

# T13: Bagaimana perubahan bunyi pasal_num (ayat_num) dalam legal_title?
def template_13(legal_document, pasal_num):
  return '''
  SELECT distinct (concat('Tahun ', STR(?year), ' Nomor ', ?number, ': ', ?GroupConcat) as ?answer)
  WHERE
  {
      SELECT distinct ?year ?number (lcase(group_concat(distinct ?ValueWithPasalNum; separator = ";")) as ?GroupConcat)
      WHERE
      {
        {
              SELECT distinct ?year ?number (?larticle as ?pasal_num) (COALESCE(?lsection, "") as ?ayat_num)
              (lcase(IF(BOUND(?lsection),
                          concat(?larticle, " ", coalesce(?lsection, "")),
                          ?larticle))
                      as ?lcontent)
              (concat(?lcontent, ': ', ?value) as ?ValueWithPasalNum)
            WHERE
              {
                  {
                      SELECT distinct ?article ?year ?number
                      WHERE
                      {
                          <''' + legal_document + '''> a lexid-s:LegalDocument;
                              rdfs:label ?LegalDocumentLabel ;
                              lexid-s:hasContent/lexid-s:hasPart* ?parent ;
                              lexid-s:amendedBy [
                                lexid-s:hasContent/lexid-s:modifies ?modification;
                                lexid-s:regulationYear ?year ;
                                lexid-s:regulationNumber ?number ;
                              ] .
                          ?modification lexid-s:hasModificationTarget ?parent ;
                                      lexid-s:hasModificationContent [ lexid-s:hasPart* ?article ;] .
                          ?article a lexid-s:Article ;
                      }
                  }

              BIND("''' + pasal_num + '''" as ?larticle)
              ?article rdfs:label ?larticle .
              {
                {
                  ?article lexid-s:hasPart [
                    a lexid-s:Section ;
                    rdfs:label ?lsection ;
                    dct:description ?value ;
                  ] .
                }
                UNION
                {
                  ?article dct:description ?value .
                }
              }
          }
          ORDER BY ?year ?number ?pasal_num ?ayat_num ?value
        }
      }
      GROUP BY ?year ?number
      ORDER BY desc(?year) desc(?number) ?GroupConcat
  }
  '''

# T14: Apa saja pasal yang dihapus dalam legal_title?
def template_14(legal_document):
  return '''
  SELECT distinct (group_concat(distinct ?value; separator = ";") as ?answer)
  WHERE {
    <''' + legal_document + '''> a lexid-s:LegalDocument;
      rdfs:label ?LegalDocumentLabel;
      lexid-s:amendedBy/lexid-s:hasContent/lexid-s:hasPart*/lexid-s:deletes ?deleted .
      ?deleted a lexid-s:Article ;
        rdfs:label ?value .
  }
  ORDER BY ?value
  '''

# T15: Apa saja pasal yang ditambahkan dalam legal_title?
def template_15(legal_document):
  return '''
  SELECT distinct (group_concat(distinct ?value; separator = ";") as ?answer)
  WHERE {
    <''' + legal_document + '''> a lexid-s:LegalDocument;
      rdfs:label ?LegalDocumentLabel;
      lexid-s:amendedBy/lexid-s:hasContent/lexid-s:hasPart*/lexid-s:adds/lexid-s:hasAdditionContent ?content .
      ?content a lexid-s:Article;
            rdfs:label ?value .
  }
  ORDER BY ?value
  '''

# T16: Apakah legal_title mengalami amendemen?
def template_16(legal_document):
  return '''
  SELECT (IF(count(distinct ?LegalDocument) > 0, "Ya, sudah diamendemen", "Tidak, tidak diamendemen") as ?answer)
  WHERE {
      ?LegalDocument a lexid-s:LegalDocument ;
        lexid-s:amends <''' + legal_document + '''> .
      <''' + legal_document + '''> a lexid-s:LegalDocument ;
        rdfs:label ?LegalDocumentLabel .
  }
  '''

# T17: Apakah legal_title masih berlaku?
def template_17(legal_document):
  return '''
  SELECT (IF(count(distinct ?LegalDocument) > 0, "Tidak berlaku lagi", "Ya, masih berlaku") as ?answer)
  WHERE {
      ?LegalDocument a lexid-s:LegalDocument ;
        lexid-s:repeals <''' + legal_document + '''> .
      <''' + legal_document + '''> a lexid-s:LegalDocument ;
        rdfs:label ?LegalDocumentLabel .
  }
  '''

# T18: Apa saja pasal yang diubah dalam legal_title?
def template_18(legal_document):
  return '''
  SELECT distinct (group_concat(distinct ?value; separator = ";") as ?answer)
  WHERE {
    <''' + legal_document + '''> a lexid-s:LegalDocument;
      rdfs:label ?LegalDocumentLabel;
      lexid-s:amendedBy/lexid-s:hasContent/lexid-s:hasPart*/lexid-s:modifies/lexid-s:hasModificationContent ?content .
      ?content a lexid-s:Article;
            rdfs:label ?value .
  }
  ORDER BY ?value
  '''

## Knowledge Graph

In [101]:
# sparql = SPARQLWrapper(os.environ.get('URL', 'http://localhost:8100/blazegraph/sparql'))
sparql = SPARQLWrapper(os.environ.get('URL', 'http://localhost:7302/repositories/lexid'))
sparql.setReturnFormat(JSON)
# locale.setlocale(locale.LC_ALL, "id_ID.UTF-8")
prefix = '''
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX dbo: <http://dbpedia.org/ontology/>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX wd: <https://www.wikidata.org/wiki/>
PREFIX lexid-s: <https://w3id.org/lex-id/schema/>
PREFIX lexid: <https://w3id.org/lex-id/data/>
'''

In [102]:
def query_graph(query):
  sparql.setQuery(prefix + query)

  informations_arr = []
  try:
    result = sparql.queryAndConvert()
    for ans in result['results']['bindings']:
      chunk = {
        'solution' : ans['answer']['value']
      }
      informations_arr.append(chunk)
    # if not answer:
    #     return "Jawaban tidak ditemukan"
    # return answer if question_type not in [2, 3] else datetime.strptime(answer, "%Y-%m-%d").strftime("%d %B %Y")

  except:
    print("error")

  return informations_arr

## Entity Linking

In [103]:
def get_entity_from_label(legal_label):
  query = '''
  SELECT DISTINCT ?LegalDocument
  WHERE {
      ?LegalDocument a lexid-s:LegalDocument ;
                    rdfs:label ?LegalLabel .

      FILTER CONTAINS(LCASE(?LegalLabel), LCASE("''' + legal_label + '''"))
  }
  '''

  sparql.setQuery(prefix + query)

  informations_arr = []
  try:
    result = sparql.queryAndConvert()
    for ans in result['results']['bindings']:
      chunk = {
        'legal_document' : ans['LegalDocument']['value']
      }
      informations_arr.append(chunk)
    # if not answer:
    #     return "Jawaban tidak ditemukan"
    # return answer if question_type not in [2, 3] else datetime.strptime(answer, "%Y-%m-%d").strftime("%d %B %Y")

  except:
    print("error")

  if len(informations_arr) > 0:
    return informations_arr[0]['legal_document']
  else:
    return ''

# Inference

In [104]:
id2label = {0: "T1", 1: "T2", 2: "T3", 3: "T4",
                4: "T5", 5: "T6", 6: "T7", 7: "T8", 8: "T9",
                9: "T10", 10: "T11", 11: "T12", 12: "T13",
                13: "T14", 14: "T15", 15: "T16", 16: "T17", 17: "T18"}

def predict_classification(input):
  input = tokenizer_class(input)
  output = bert_class(torch.tensor(input['input_ids']).view(1, -1))

  id_max = torch.argmax(output)
  return id2label[id_max.item()]

In [105]:
def predict_ner(input):
  input = tokenizer_ner(input)
  logits = bert_ner(torch.tensor(input['input_ids']).view(1, -1)).logits
  pred = torch.argmax(logits, dim=2)
  print(pred)

  print(input.tokens())
  entity, ner = extract_entity(pred, input.tokens())
  print(entity)
  print(ner)

  return entity

In [106]:
def predict_query(template_label, entity):
  if template_label == "T1":
    legal_label = entity[0]
    legal_document = get_entity_from_label(legal_label)
    return template_1(legal_document)
  elif template_label == "T2":
    legal_label = entity[0]
    legal_document = get_entity_from_label(legal_label)
    return template_2(legal_document)
  elif template_label == "T3":
    legal_label = entity[0]
    legal_document = get_entity_from_label(legal_label)
    return template_3(legal_document)
  elif template_label == "T4":
    legal_label = entity[0]
    legal_document = get_entity_from_label(legal_label)
    return template_4(legal_document)
  elif template_label == "T5":
    legal_label = entity[0]
    legal_document = get_entity_from_label(legal_label)
    return template_5(legal_document)
  elif template_label == "T6":
    legal_label = entity[0]
    legal_document = get_entity_from_label(legal_label)
    return template_6(legal_document)
  elif template_label == "T7":
    legal_label = entity[0]
    legal_document = get_entity_from_label(legal_label)
    return template_7(legal_document)
  elif template_label == "T8":
    legal_label = entity[0]
    legal_document = get_entity_from_label(legal_label)
    return template_8(legal_document)
  elif template_label == "T9":
    legal_label = entity[0]
    legal_document = get_entity_from_label(legal_label)
    return template_9(legal_document)
  elif template_label == "T10":
    legal_label = entity[0]
    legal_document = get_entity_from_label(legal_label)
    return template_10(legal_document)
  elif template_label == "T11":
    legal = ''
    pasal = ''
    for i in range(len(entity)):
      if 'pasal' in entity[i]:
        sentences = entity[i].split(" ")
        for j in range(sentences.index("pasal") + 1, len(sentences)):
          if sentences[j].isnumeric():
            pasal = sentences[j]
            del entity[i]
            break
      if pasal != "":
        break

    if len(entity) == 1:
      legal_label = entity[0]
    else:
      max_sentence = 0
      max_num = 0
      for i in range(len(entity)):
        sentences = entity[i].split(" ")
        if len(sentences) > max_num:
          max_sentence = entity[i]
          max_num = len(sentences)

      legal_label = max_sentence
    pasal = 'Pasal ' + pasal
    legal_document = get_entity_from_label(legal_label)
    return template_11(legal_document, pasal)
  elif template_label == "T12":
    legal = ''
    pasal = ''
    ayat = ''
    for i in range(len(entity)):
      if 'pasal' in entity[i] and 'ayat' in entity[i]:
        sentences = entity[i].split(" ")
        pasal_index = sentences.index("pasal")
        ayat_index = sentences.index("ayat")
        if pasal_index < ayat_index:
          for j in range(sentences.index("pasal") + 1, len(sentences)):
            if sentences[j].isnumeric():
              pasal = sentences[j]
              break
        else:
          for j in range(sentences.index("ayat") + 1, len(sentences)):
            if sentences[j].isnumeric():
              ayat = sentences[j]
              break

        if pasal != '':
          for j in range(sentences.index("ayat") + 1, len(sentences)):
            if sentences[j].isnumeric():
              ayat = sentences[j]
              break
        elif ayat != '':
          for j in range(sentences.index("pasal") + 1, len(sentences)):
            if sentences[j].isnumeric():
              pasal = sentences[j]
              break

        if pasal != '' and ayat != '':
          del entity[i]
      elif pasal == '' and 'pasal' in entity[i]:
        sentences = entity[i].split(" ")
        for j in range(sentences.index("pasal") + 1, len(sentences)):
          if sentences[j].isnumeric():
            pasal = sentences[j]
            del entity[i]
            break
      elif ayat == '' and 'ayat' in entity[i]:
        sentences = entity[i].split(" ")
        for j in range(sentences.index("ayat") + 1, len(sentences)):
          if sentences[j].isnumeric():
            pasal = sentences[j]
            del entity[i]
            break
      if pasal != "":
        break

    if len(entity) == 1:
      legal_label = entity[0]
    else:
      max_sentence = 0
      max_num = 0
      for i in range(len(entity)):
        sentences = entity[i].split(" ")
        if len(sentences) > max_num:
          max_sentence = entity[i]
          max_num = len(sentences)

      legal_label = max_sentence
    pasal = 'Pasal ' + pasal
    ayat = 'ayat ' + ayat
    legal_document = get_entity_from_label(legal_label)
    return template_12(legal_document, pasal, ayat)
  elif template_label == "T13":
    legal = ''
    pasal = ''
    for i in range(len(entity)):
      if 'pasal' in entity[i]:
        sentences = entity[i].split(" ")
        for j in range(sentences.index("pasal") + 1, len(sentences)):
          if sentences[j].isnumeric():
            pasal = sentences[j]
            del entity[i]
            break
      if pasal != "":
        break

    if len(entity) == 1:
      legal_label = entity[0]
    else:
      max_sentence = 0
      max_num = 0
      for i in range(len(entity)):
        sentences = entity[i].split(" ")
        if len(sentences) > max_num:
          max_sentence = entity[i]
          max_num = len(sentences)

      legal_label = max_sentence
    pasal = 'Pasal ' + pasal
    legal_document = get_entity_from_label(legal_label)
    return template_13(legal_document, pasal)
  elif template_label == "T14":
    legal_label = entity[0]
    legal_document = get_entity_from_label(legal_label)
    return template_14(legal_document)
  elif template_label == "T15":
    legal_label = entity[0]
    legal_document = get_entity_from_label(legal_label)
    return template_15(legal_document)
  elif template_label == "T16":
    legal_label = entity[0]
    legal_document = get_entity_from_label(legal_label)
    return template_16(legal_document)
  elif template_label == "T17":
    legal_label = entity[0]
    legal_document = get_entity_from_label(legal_label)
    return template_17(legal_document)
  elif template_label == "T18":
    legal_label = entity[0]
    legal_document = get_entity_from_label(legal_label)
    return template_18(legal_document)

In [107]:
import time

def predict(input):
  start = time.time()
  template_label = predict_classification(input)
  entity = predict_ner(input)
  print(template_label)
  print(entity)

  query = predict_query(template_label, entity)
  print(query)
  end = time.time()
  print("Time " + str(end-start))

  return query_graph(query)

# Run

In [108]:
input = tokenizer_class("Pasal apa yang diubah dalam dokumen?")
print(input)
bert_class.to(DEVICE)
output = bert_class(torch.tensor(input['input_ids']).to(DEVICE).view(1, -1))
output[0]

{'input_ids': [3, 3459, 2064, 1497, 6997, 1558, 4764, 35, 4], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1]}


tensor([ 2.7027e-01, -1.3302e-01, -3.8437e-01, -9.6040e-01, -2.3418e-01,
        -2.8237e-01,  3.1069e-04, -7.2000e-01, -1.7688e-01, -1.7205e-01,
        -1.7414e-01, -2.9747e-01,  4.6242e-01, -6.0761e-01,  3.7117e-01,
        -7.8936e-01, -5.5256e-01,  7.0025e+00], grad_fn=<SelectBackward0>)

In [89]:
predict_ner("Apakah Peraturan Menteri Keuangan Republik Indonesia Nomor 218 Tahun 2014 ini pernah diameliorasi")

tensor([[38, 38,  6, 25, 25, 25, 25, 25, 25, 25, 25, 25, 38, 38, 38, 38, 38, 38]])
['[CLS]', 'apakah', 'peraturan', 'menteri', 'keuangan', 'republik', 'indonesia', 'nomor', '21', '##8', 'tahun', '2014', 'ini', 'pernah', 'diam', '##eli', '##orasi', '[SEP]']
Start Here
tensor(38) [CLS]
tensor(38) apakah
tensor(6) peraturan
tensor(25) menteri
tensor(25) keuangan
tensor(25) republik
tensor(25) indonesia
tensor(25) nomor
tensor(25) 21
tensor(25) ##8
##8
tensor(25) tahun
tensor(25) 2014
tensor(38) ini
tensor(38) pernah
tensor(38) diam
tensor(38) ##eli
tensor(38) ##orasi
tensor(38) [SEP]
['peraturan menteri keuangan republik indonesia nomor 218 tahun 2014']
[-100, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, -100]


['peraturan menteri keuangan republik indonesia nomor 218 tahun 2014']

In [56]:
predict("Apakah Peraturan Menteri Keuangan Republik Indonesia Nomor 218 Tahun 2014 ini pernah diameliorasi")

['[CLS]', 'apakah', 'peraturan', 'menteri', 'keuangan', 'republik', 'indonesia', 'nomor', '21', '##8', 'tahun', '2014', 'ini', 'pernah', 'diam', '##eli', '##orasi', '[SEP]']
[]
[-100, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
T16
[]


IndexError: list index out of range

## T1

In [None]:
predict("Orang yang memberlakukan keabsahan Peraturan Menteri Riset Teknologi Dan Pendidikan Tinggi Republik Indonesia Nomor 70 Tahun 2016 yang valid")

tensor([38, 38, 38, 38, 38,  6, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
        38, 38, 38])
T1
['peraturan menteri riset teknologi dan pendidikan tinggi republik indonesia nomor 70 tahun 2016']

  SELECT DISTINCT ?answer
  WHERE {
      <https://w3id.org/lex-id/data/Permen_Ristekdikti_2016_70> lexid-s:hasEnactionOfficial ?Official .
      ?Official rdfs:label ?answer .
  }
  
Time 15.178528308868408


[{'solution': 'Mohamad Nasir'}]

## T2

In [None]:
predict("Kapan Peraturan Pemerintah Republik Indonesia Nomor 28 Tahun 2017 tersebut diundangkan")

tensor([38, 38,  6, 25, 25, 25, 25, 25, 25, 25, 38, 38, 38])
T2
['peraturan pemerintah republik indonesia nomor 28 tahun 2017']

  SELECT DISTINCT ?answer
  WHERE
  {
    <https://w3id.org/lex-id/data/PP_2017_28> lexid-s:hasPromulgationDate ?answer .
  }
  
Time 9.626583099365234


[{'solution': '2017-07-24'}]

## T3

In [None]:
predict("Tanggal berapa berlangsungnya pelolosannya Peraturan Menteri Pendayagunaan Aparatur Negara Dan Reformasi Birokrasi Republik Indonesia Nomor 10 Tahun 2018")

tensor([38, 38, 38, 38, 38, 38, 38,  6, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
        25, 25, 25, 38])
T3
['peraturan menteri pendayagunaan aparatur negara dan reformasi birokrasi republik indonesia nomor 10 tahun 2018']

  SELECT distinct ?answer
  WHERE
  {
    <https://w3id.org/lex-id/data/Permen_PANRB_2018_10> lexid-s:hasEnactionDate ?answer .
  }
  
Time 15.110788106918335


[{'solution': '2018-01-26'}]

## T4

In [None]:
predict("Peraturan Menteri Kehutanan Republik Indonesia Nomor 59 Tahun 2011 disusun karena pertimbangan kunci")

tensor([38,  6, 25, 25, 25, 25, 25, 25, 25, 25, 38, 38, 38, 38, 38])
T4
['peraturan menteri kehutanan republik indonesia nomor 59 tahun 2011']

  SELECT distinct (group_concat(?value; separator = ";") as ?answer)
  WHERE
  {
    SELECT distinct ?value
    WHERE
    {
      <https://w3id.org/lex-id/data/Permen_Hut_2011_59> lexid-s:considers ?value .
    }
    ORDER BY ?value
  }
  
Time 10.871532917022705


[{'solution': 'bahwa berdasarkan Pasal 42 ayat (8) Peraturan Pemerintah Nomor 6 Tahun 2007 tentang Tata Hutan dan Penyusunan Rencana Pengelolaan Hutan serta Pemanfaatan Hutan, sebagaimana telah diubah dengan Peraturan Pemerintah Nomor 3 Tahun 2008, telah ditetapkan Peraturan Menteri Kehutanan Nomor P.13/Menhut-II/2009 tentang Hutan Tanaman Hasil Rehabilitasi sebagaimana telah diubah dengan Peraturan Menteri Kehutanan Nomor P.15/Menhut-II/2010;bahwa berdasarkan evaluasi pelaksanaan serta dalam rangka mencegah deforestasi dan kerusakan hutan, Peraturan Menteri sebagaimana huruf a perlu disesuaikan;bahwa berdasarkan pertimbangan sebagaimana dimaksud pada huruf a dan huruf b, perlu menetapkan Peraturan Menteri Lingkungan Hidup dan Kehutanan tentang Perubahan Atas Peraturan Menteri Kehutanan Nomor P.59/Menhut-II/2011 tentang Hutan Tanaman Hasil Rehabilitasi;bahwa dalam rangka melaksanakan ketentuan Pasal 42 ayat (8) Peraturan Pemerintah Nomor 6 Tahun 2007 tentang Tata Hutan dan Penyusunan R

## T5

In [None]:
predict("Apa norma-norma hukum yang mendukung saat pembangunan dokumen Peraturan Pemerintah Republik Indonesia Nomor 16 Tahun 1996 tersebut")

tensor([38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,  6, 25, 25, 25, 25, 25, 25,
        25, 38, 38])
T5
['peraturan pemerintah republik indonesia nomor 16 tahun 1996']

  SELECT distinct (group_concat(?value; separator = ";") as ?answer)
  WHERE
  {
    SELECT distinct ?value
    WHERE
    {
      <https://w3id.org/lex-id/data/PP_1996_16> lexid-s:hasLegalBasis ?LegalBasis .
      ?LegalBasis rdfs:label ?value .
    }
    ORDER BY ?LegalDocumentLabel ?value
  }
  
Time 13.70339322090149


[{'solution': 'Peraturan Pemerintah Republik Indonesia Nomor 12 Tahun 1969;Peraturan Pemerintah Republik Indonesia Nomor 3 Tahun 1983;Undang-Undang Republik Indonesia Nomor 9 Tahun 1969'}]

## T6

In [None]:
predict("Apa peraturan hukum yang bersinggungan dengan Peraturan Walikota Cirebon Nomor 14 Tahun 2019 tersebut")

tensor([38, 38, 38, 38, 38, 38, 38, 38, 38,  6, 25, 25, 25, 25, 25, 25, 38, 38])
T6
['peraturan walikota cirebon nomor 14 tahun 2019']

  SELECT distinct (group_concat(?value; separator = ";") as ?answer)
  WHERE
  {
    SELECT distinct (coalesce(?label, ?ans) as ?value)
    WHERE
    {
      <https://w3id.org/lex-id/data/Perwali_Cirebon_2019_14> lexid-s:hasLegalBasis | lexid-s:implements | lexid-s:amends | lexid-s:repeals ?ans ;
                   rdfs:label ?LegalDocumentLabel .
      ?ans a lexid-s:LegalDocument .
      OPTIONAL {
          ?ans rdfs:label ?label .
      }
    }
    ORDER BY ?LegalDocumentLabel ?value
  }
  
Time 11.528024435043335


[{'solution': 'Peraturan Daerah Kota Cirebon Nomor 6 Tahun 2016;Peraturan Daerah Kota Cirebon Nomor 7 Tahun 2016;Peraturan Daerah Kota Cirebon Nomor 9 Tahun 2016;Peraturan Menteri Dalam Negeri Republik Indonesia Nomor 130 Tahun 2018;Peraturan Pemerintah Republik Indonesia Nomor 12 Tahun 2017;Peraturan Pemerintah Republik Indonesia Nomor 17 Tahun 2018;Peraturan Pemerintah Republik Indonesia Nomor 18 Tahun 2016;Peraturan Walikota Cirebon Nomor 36 Tahun 2011;Peraturan Walikota Cirebon Nomor 67 Tahun 2016;Undang-Undang Republik Indonesia Nomor 16 Tahun 1950;Undang-Undang Republik Indonesia Nomor 23 Tahun 2014;Undang-Undang Republik Indonesia Nomor 30 Tahun 2014;Undang-Undang Republik Indonesia Nomor 33 Tahun 2004'}]

## T7

In [None]:
predict("Peraturan Daerah Kabupaten Rokan Hilir Nomor 7 Tahun 2013 ini mengganti hukum yang mana")

tensor([38,  6, 25, 25, 25, 25, 25, 25, 25, 25, 38, 38, 38, 38, 38, 38])
T7
['peraturan daerah kabupaten rokan hilir nomor 7 tahun 2013']

  SELECT distinct (group_concat(?value; separator = ";") as ?answer)
  WHERE
  {
    SELECT distinct (coalesce(?label , ?ans) as ?value)
    WHERE {
        <https://w3id.org/lex-id/data/Perkab_Rokan_Hilir_2013_7> a lexid-s:LegalDocument ;
                      rdfs:label ?LegalDocumentLabel ;
                      lexid-s:amends ?ans .
        ?ans a lexid-s:LegalDocument .
        OPTIONAL {
            ?ans rdfs:label ?label .
        }
    }
    ORDER BY ?value
  }
  
Time 10.455859661102295


[{'solution': 'Peraturan Daerah Kabupaten Rokan Hilir Nomor 9 Tahun 2009'}]

## T8

In [None]:
predict("Undang-Undang Republik Indonesia Nomor 6 Tahun 2014 ini menghilangkan peraturan hukum apa")

tensor([38,  6, 25, 25, 25, 25, 25, 25, 25, 25, 38, 38, 38, 38, 38, 38])
T8
['undang - undang republik indonesia nomor 6 tahun 2014']


IndexError: list index out of range

## T9

In [None]:
predict("Peraturan Menteri Dalam Negeri Republik Indonesia Nomor 33 Tahun 2012 menguraikan bagian-bagian apa")

tensor([38,  6, 25, 25, 25, 25, 25, 25, 25, 25, 25, 38, 38, 38, 38, 38, 38])
T9
['peraturan menteri dalam negeri republik indonesia nomor 33 tahun 2012']

  SELECT distinct (concat(?contentLabel, ": ", ?contentName) as ?answer)
  WHERE {
      <https://w3id.org/lex-id/data/Permen_Dagri_2012_33> a lexid-s:LegalDocument ;
          rdfs:label ?LegalDocumentLabel ;
          lexid-s:hasContent/lexid-s:hasPart* [
              a lexid-s:Chapter ;
              rdfs:label ?contentLabel ;
              lexid-s:name ?contentName ;
          ] .
  }
  ORDER BY ?answer
  
Time 11.23725414276123


[{'solution': 'BAB I : KETENTUAN UMUM: KETENTUAN UMUM'},
 {'solution': 'BAB II : RUANG LINGKUP ORKEMAS: RUANG LINGKUP ORKEMAS'},
 {'solution': 'BAB III : TAHAPAN PENDAFTARAN Pasal 5 Pendaftaran orkemas dilakukan oleh pengurus melalui tahapan: a. pengajuan permohonan; b. penelitian dokumen persyaratan; c. penelitian lapangan; dan d. penerbitan SKT.: TAHAPAN PENDAFTARAN Pasal 5 Pendaftaran orkemas dilakukan oleh pengurus melalui tahapan: a. pengajuan permohonan; b. penelitian dokumen persyaratan; c. penelitian lapangan; dan d. penerbitan SKT.'},
 {'solution': 'BAB IV : ISI DAN MASA BERLAKU SKT: ISI DAN MASA BERLAKU SKT'},
 {'solution': 'BAB IX : PEMBINAAN DAN PENGAWASAN: PEMBINAAN DAN PENGAWASAN'},
 {'solution': 'BAB V : PERPANJANGAN, PERUBAHAN, PEMBEKUAN, ATAU PENCABUTAN SKT: PERPANJANGAN, PERUBAHAN, PEMBEKUAN, ATAU PENCABUTAN SKT'},
 {'solution': 'BAB VI : TIM FASILITASI ORKEMAS: TIM FASILITASI ORKEMAS'},
 {'solution': 'BAB VII : PENGEMBANGAN DATABASE ORKEMAS: PENGEMBANGAN DATABASE ORK

## T10

In [None]:
predict("Total pasal yang tercantum di Peraturan Badan Pengawas Tenaga Nuklir Republik Indonesia Nomor 3 Tahun 2019 tersebut")

tensor([38, 38, 38, 38, 38, 38,  6, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 38,
        38])
T10
['peraturan badan pengawas tenaga nuklir republik indonesia nomor 3 tahun 2019']

  SELECT (count(*) as ?answer)
  WHERE {
      <https://w3id.org/lex-id/data/Peraturan_Bapeten_2019_3> a lexid-s:LegalDocument ;
                    rdfs:label ?LegalDocumentLabel ;
                    lexid-s:hasContent/lexid-s:hasPart*/a lexid-s:Article .
  }
  
Time 14.6880624294281


[{'solution': '18'}]

## T11

In [None]:
predict("Pasal 7 di dalam Peraturan Badan Ekonomi Kreatif Republik Indonesia Nomor 1 Tahun 2015 berbunyi")

tensor([38,  6, 25, 38, 38,  6, 25, 25, 25, 25, 25, 25, 25, 25, 25, 38, 38])
T11
['pasal 7', 'peraturan badan ekonomi kreatif republik indonesia nomor 1 tahun 2015']

  SELECT distinct (group_concat(?value; separator = ";") as ?answer)
  WHERE {
      SELECT distinct (concat(coalesce(?sectionName, ""), " ", ?ans) as ?value)
      WHERE {
          <https://w3id.org/lex-id/data/Peraturan_Bekraf_2015_1> a lexid-s:LegalDocument ;
                        rdfs:label ?LegalDocumentLabel ;
                        lexid-s:hasContent/lexid-s:hasPart* ?article .
          ?article a lexid-s:Article ;
                  rdfs:label "Pasal 7"^^xsd:string .
          {
              {
                  ?article lexid-s:hasPart [
                      a lexid-s:Section ;
                      lexid-s:name ?sectionName ;
                      dct:description ?ans ;
                  ] .
              }
              UNION
              {
                  ?article dct:description ?ans .
              }

[{'solution': '(1) Wakil Kepala berada di bawah dan bertanggung jawab kepada Kepala.;(2) Wakil Kepala mempunyai tugas membantu Kepala dalam memimpin\r\npelaksanaan tugas dan fungsi Badan Ekonomi Kreatif.'}]

## T12

In [None]:
predict("Apa bunyi yang dinyatakan oleh Pasal 53 pada ayat 6 di Undang-Undang Republik Indonesia Nomor 30 Tahun 2014 tersebut")

tensor([38, 38, 38, 38, 38, 38,  6, 25, 25, 25, 25, 38,  6, 25, 25, 25, 25, 25,
        25, 25, 25, 38, 38])
T12
['pasal 53 pada ayat 6', 'undang - undang republik indonesia nomor 30 tahun 2014']


IndexError: list index out of range

## T13

In [None]:
predict("Gimana yang dikoreksi pada Pasal 30 di Peraturan Menteri Desa, Pembangunan Daerah Tertinggal, Dan Transmigrasi Republik Indonesia Nomor 9 Tahun 2015 ini setelah adanya penambahan")

tensor([38, 38, 38, 38, 38, 38,  6, 25, 38,  6, 25, 25, 25, 25, 25, 25, 25, 25,
        25, 25, 25, 25, 25, 25, 25, 38, 38, 38, 38, 38])
T13
['pasal 30', 'peraturan menteri desa , pembangunan daerah tertinggal , dan transmigrasi republik indonesia nomor 9 tahun 2015']


IndexError: list index out of range

## T14

In [None]:
predict("Pasal-pasal apa yang dianulir dalam Peraturan Menteri Agraria Dan Tata Ruang/ Kepala Badan Pertanahan Nasional Republik Indonesia Nomor 33 Tahun 2016 ini")

tensor([38, 38, 38, 38, 38, 38, 38, 38, 38,  6, 25, 25, 25, 25, 25, 25, 25, 25,
        25, 25, 25, 25, 25, 25, 25, 25, 38, 38])
T14
['peraturan menteri agraria dan tata ruang / kepala badan pertanahan nasional republik indonesia nomor 33 tahun 2016']


IndexError: list index out of range

## T15

In [None]:
predict("Pasal-pasal mana saja yang mengalami penyisipan dalam Peraturan Menteri Perindustrian Republik Indonesia Nomor 147 Tahun 2009 ini")

tensor([38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,  6, 25, 25, 25, 25, 25, 25,
        25, 25, 38, 38])
T15
['peraturan menteri perindustrian republik indonesia nomor 147 tahun 2009']

  SELECT distinct (group_concat(distinct ?value; separator = ";") as ?answer)
  WHERE {
    <https://w3id.org/lex-id/data/Permen_Perin_2009_147> a lexid-s:LegalDocument;
      rdfs:label ?LegalDocumentLabel;
      lexid-s:amendedBy/lexid-s:hasContent/lexid-s:hasPart*/lexid-s:adds/lexid-s:hasAdditionContent ?content .
      ?content a lexid-s:Article;
            rdfs:label ?value .
  }
  ORDER BY ?value
  
Time 14.23629903793335


[{'solution': 'Pasal 6A'}]

## T16

In [None]:
predict("Sudahkah Peraturan Pemerintah Republik Indonesia Nomor 36 Tahun 2007 ini dimodifikasi")

tensor([38, 38, 38,  6, 25, 25, 25, 25, 25, 25, 25, 38, 38, 38])
T16
['peraturan pemerintah republik indonesia nomor 36 tahun 2007']

  SELECT (IF(count(distinct ?LegalDocument) > 0, "Ya, sudah diamendemen", "Tidak, tidak diamendemen") as ?answer)
  WHERE {
      ?LegalDocument a lexid-s:LegalDocument ;
        lexid-s:amends <https://w3id.org/lex-id/data/PP_2007_36> .
      <https://w3id.org/lex-id/data/PP_2007_36> a lexid-s:LegalDocument ;
        rdfs:label ?LegalDocumentLabel .
  }
  
Time 4.266550302505493


[{'solution': 'Ya, sudah diamendemen'}]

## T17

In [None]:
predict("Masihkah Peraturan Menteri Pertahanan Republik Indonesia Nomor 09 Tahun 2014 itu masih ditegakkan")

tensor([38, 38, 38,  6, 25, 25, 25, 25, 25, 25, 25, 25, 38, 38, 38, 38])
T17
['peraturan menteri pertahanan republik indonesia nomor 09 tahun 2014']

  SELECT (IF(count(distinct ?LegalDocument) > 0, "Tidak berlaku lagi", "Ya, masih berlaku") as ?answer)
  WHERE {
      ?LegalDocument a lexid-s:LegalDocument ;
        lexid-s:repeals <https://w3id.org/lex-id/data/Permen_Han_2014_09> .
      <https://w3id.org/lex-id/data/Permen_Han_2014_09> a lexid-s:LegalDocument ;
        rdfs:label ?LegalDocumentLabel .
  }
  
Time 4.909379720687866


[{'solution': 'Tidak berlaku lagi'}]

## T18

In [None]:
predict("Peraturan Menteri Energi Dan Sumber Daya Mineral Republik Indonesia Nomor 28 Tahun 2009 diganti ketentuan yang mana")

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument index in method wrapper_CUDA__index_select)

# Testing

## Load Testing Data

In [109]:
test_df = pd.read_csv('dataset/test_tesaurus.csv')
display(test_df.head(5))

Unnamed: 0,question,answer,ner,legal_document,query,template_index,pasal_num,ayat_num
0,Apakah Peraturan Lembaga Kebijakan Pengadaan B...,Tidak berlaku lagi,"[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, ...",https://w3id.org/lex-id/data/Peraturan_LKPP_20...,\n SELECT (IF(count(distinct ?LegalDocument) ...,T17,,
1,Peraturan Menteri Kehutanan Republik Indonesia...,45,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0]",https://w3id.org/lex-id/data/Permen_Hut_2010_45,\n SELECT (count(*) as ?answer)\n WHERE {\n ...,T10,,
2,Peraturan Menteri Perdagangan Republik Indones...,30,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0]",https://w3id.org/lex-id/data/Permen_Dag_2017_84,\n SELECT (count(*) as ?answer)\n WHERE {\n ...,T10,,
3,Sudahkah Peraturan Menteri Keuangan Republik I...,"Ya, sudah diamendemen","[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0]",https://w3id.org/lex-id/data/Permen_Keu_2012_190,\n SELECT (IF(count(distinct ?LegalDocument) ...,T16,,
4,Pejabat yang meratifikasi keabsahan hukum dari...,Budi Karya Sumadi,"[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",https://w3id.org/lex-id/data/Permen_Hub_2017_119,\n SELECT DISTINCT ?answer\n WHERE {\n ...,T1,,


In [None]:
qa_dataset_test = Lexidqa2Dataset(test_df, tokenizer_class, tokenizer_ner)

In [None]:
def collate_fn_class(data):
    """
       data: is a list of tuples with (example, label, length)
             where 'example' is a tensor of arbitrary shape
             and label/length are scalars
    """

    _, input_ids_class, attention_mask_class, input_ids_ner, attention_mask_ner, _, _, _, indexes = zip(*data)
    features_class = pad_sequence(input_ids_class, batch_first=True)
    masks_class = pad_sequence(attention_mask_class, batch_first=True)
    features_ner = pad_sequence(input_ids_ner, batch_first=True)
    masks_ner = pad_sequence(attention_mask_ner, batch_first=True)
    indexes_holder = torch.stack(indexes)
    return features_class, masks_class, features_ner, masks_ner, indexes_holder

In [110]:
BATCH_SIZE = 64
torch.manual_seed(12345)
test_instances = DataLoader(qa_dataset_test, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn_class)

## Test

### Remote Machine - To run BERT

In [None]:
torch.manual_seed(12345)

tr_loss = 0

bert_class.to(DEVICE)
bert_class.eval()
bert_ner.to(DEVICE)
bert_ner.eval()

label2id = {"T1": 0, "T2": 1, "T3": 2, "T4": 3,
            "T5": 4, "T6": 5, "T7": 6, "T8": 7, "T9": 8,
            "T10": 9, "T11": 10, "T12": 11, "T13": 12,
            "T14": 13, "T15": 14, "T16": 15, "T17": 16, "T18": 17}
id2label = {0: "T1", 1: "T2", 2: "T3", 3: "T4",
                4: "T5", 5: "T6", 6: "T7", 7: "T8", 8: "T9",
                9: "T10", 10: "T11", 11: "T12", 12: "T13",
                13: "T14", 14: "T15", 15: "T16", 16: "T17", 17: "T18"}

nb_classes = 18

true_pred = [0 for i in range(nb_classes)]
false_pred = [0 for i in range(nb_classes)]

remote_recap = []

true_recaps = []
false_recaps = []

conf_matrix = torch.zeros(nb_classes, nb_classes)
# Untuk setiap data di training data
for data in test_instances:

  with torch.no_grad():
    # Mendapatkan output dari model
    input_class, mask_class, input_ner, mask_ner, indexes = data
    input_class = input_class.to(DEVICE)
    input_ner = input_ner.to(DEVICE)

    pred_class = bert_class(input_class)
    _, template_ids = torch.max(pred_class, 1)

    logits_ner = bert_ner(input_ner).logits
    pred_ner = torch.argmax(logits_ner, dim=2)

    for t_id, p, i, index in zip(template_ids.to('cpu'), pred_ner.to('cpu'), input_ner.to('cpu'), indexes):
      # entity, ner = extract_entity(p, tokenizer_ner.convert_ids_to_tokens(i))
      # print(entity)
      # answer_pred = ''
      # if len(entity) > 0:
      #   query = predict_query(id2label[t_id.item()], entity)
      #   # print(query)
      #   answer_pred = query_graph(query)
      # else:
      #   answer_pred = ''
      # if len(answer_pred) > 0:
      #   answer_pred = answer_pred[0]['solution']
      # else:
      #   answer_pred = ''

      remote_recap.append((index.item(), t_id.item(), p.tolist(), tokenizer_ner.convert_ids_to_tokens(i)))

      # gold = qa_dataset_test[index]
      # question = gold[0]
      # answer_gold = gold[3]
      # template_label_gold = gold[4]

      # if answer_pred == answer_gold:
      #   true_pred[label2id[template_label_gold]] += 1
      #   true_recaps.append((question, template_label_gold, id2label[t_id.item()], entity))
      # else:
      #   false_pred[label2id[template_label_gold]] += 1
      #   false_recaps.append((question, template_label_gold, id2label[t_id.item()], entity))


# acc = [true_pred[i] / (true_pred[i] + false_pred[i]) for i in range(nb_classes)]

# display_labels = ['T1', 'T2', 'T3', 'T4', 'T5', 'T6', 'T7', 'T8', 'T9', 'T10', 'T11', 'T12', 'T13', 'T14', 'T15', 'T16', 'T17', 'T18']

remote_recap_df = pd.DataFrame(data=remote_recap, columns=['index', 'template_id', 'ner', 'tokens'])
remote_recap_df.to_csv('dataset/remote_recap_app.csv')

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.


### Local Machine - To run SPARQL

In [111]:
remote_recap_df = pd.read_csv('dataset/remote_recap_app.csv')
remote_recap_df

Unnamed: 0.1,Unnamed: 0,index,template_id,ner,tokens
0,0,0,16,"[38, 38, 6, 25, 25, 25, 25, 25, 25, 25, 25, 25...","['[CLS]', 'apakah', 'peraturan', 'lembaga', 'k..."
1,1,1,9,"[38, 6, 25, 25, 25, 25, 25, 25, 25, 25, 38, 38...","['[CLS]', 'peraturan', 'menteri', 'kehutanan',..."
2,2,2,9,"[38, 6, 25, 25, 25, 25, 25, 25, 25, 25, 38, 38...","['[CLS]', 'peraturan', 'menteri', 'perdagangan..."
3,3,3,15,"[38, 38, 38, 6, 25, 25, 25, 25, 25, 25, 25, 25...","['[CLS]', 'sudah', '##kah', 'peraturan', 'ment..."
4,4,4,0,"[38, 38, 38, 38, 38, 38, 38, 6, 25, 25, 25, 25...","['[CLS]', 'pejabat', 'yang', 'meratifikasi', '..."
...,...,...,...,...,...
8313,8313,8313,14,"[38, 6, 25, 25, 25, 25, 25, 25, 25, 25, 38, 38...","['[CLS]', 'peraturan', 'menteri', 'keuangan', ..."
8314,8314,8314,2,"[38, 38, 6, 25, 25, 25, 25, 25, 25, 25, 25, 38...","['[CLS]', 'kapan', 'peraturan', 'menteri', 'ke..."
8315,8315,8315,15,"[38, 38, 38, 6, 25, 25, 25, 25, 25, 25, 25, 25...","['[CLS]', 'sudah', '##kah', 'peraturan', 'ment..."
8316,8316,8316,6,"[38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 6, 25...","['[CLS]', 'sebutkan', 'ketentuan', 'hukum', 'y..."


In [112]:
label2id = {"T1": 0, "T2": 1, "T3": 2, "T4": 3,
            "T5": 4, "T6": 5, "T7": 6, "T8": 7, "T9": 8,
            "T10": 9, "T11": 10, "T12": 11, "T13": 12,
            "T14": 13, "T15": 14, "T16": 15, "T17": 16, "T18": 17}
id2label = {0: "T1", 1: "T2", 2: "T3", 3: "T4",
                4: "T5", 5: "T6", 6: "T7", 7: "T8", 8: "T9",
                9: "T10", 10: "T11", 11: "T12", 12: "T13",
                13: "T14", 14: "T15", 15: "T16", 16: "T17", 17: "T18"}

nb_classes = 18

true_pred = [0 for i in range(nb_classes)]
false_pred = [0 for i in range(nb_classes)]

true_recaps = []
false_recaps = []

# Note: ast.asliteratl is useful to change list in the form of string (from the df) to actual list
# Note: It cannot change int (like t_id and index) to int. It only changes string, maybe.
for t_id, p, i, index in zip(remote_recap_df['template_id'], remote_recap_df['ner'], remote_recap_df['tokens'], remote_recap_df['index']):
  entity, ner = extract_entity(ast.literal_eval(p), ast.literal_eval(i))
  answer_pred = ''
  query = ''
  if len(entity) > 0:
    query = predict_query(id2label[t_id], entity)
    answer_pred = query_graph(query)
  else:
    answer_pred = ''
  if len(answer_pred) > 0:
    answer_pred = answer_pred[0]['solution']
  else:
    answer_pred = ''

  gold = qa_dataset_test[index]
  question = gold[0]
  answer_gold = gold[3]
  template_label_gold = gold[4]

  if answer_pred == answer_gold:
    true_pred[label2id[template_label_gold]] += 1
    true_recaps.append((question, template_label_gold, id2label[t_id], entity, query, answer_pred, answer_gold))
  else:
    false_pred[label2id[template_label_gold]] += 1
    false_recaps.append((question, template_label_gold, id2label[t_id], entity, query, answer_pred, answer_gold))

# acc is amount data, acc value
acc = [((true_pred[i] + false_pred[i]), true_pred[i] / (true_pred[i] + false_pred[i])) for i in range(nb_classes)]

# Q is for Ninggar's and Handi's way of collecting Template by its bigger term
# acc_q is amount data, acc_q value
acc_q = []
upper_fraction = 0
lower_fraction = 0

# Q1: T1, T2, T3, T4
q1 = [0, 1, 2, 3]
for i in q1:
  upper_fraction += true_pred[i]
  lower_fraction += true_pred[i] + false_pred[i]
acc_q.append((lower_fraction, upper_fraction / lower_fraction))

# Q2: T5, T6, T7, T8
q2 = [4, 5, 6, 7]
for i in q2:
  upper_fraction += true_pred[i]
  lower_fraction += true_pred[i] + false_pred[i]
acc_q.append((lower_fraction, upper_fraction / lower_fraction))

# Q3: T9, T10
q3 = [8, 9]
for i in q3:
  upper_fraction += true_pred[i]
  lower_fraction += true_pred[i] + false_pred[i]
acc_q.append((lower_fraction, upper_fraction / lower_fraction))

# Q4: T11, T12
q4 = [10, 11]
for i in q4:
  upper_fraction += true_pred[i]
  lower_fraction += true_pred[i] + false_pred[i]
acc_q.append((lower_fraction, upper_fraction / lower_fraction))

# Q5: T13, T14, T15, T16, T17, T18
q5 = [12, 13, 14, 15, 16, 17]
for i in q5:
  upper_fraction += true_pred[i]
  lower_fraction += true_pred[i] + false_pred[i]
acc_q.append((lower_fraction, upper_fraction / lower_fraction))

display_labels = ['T1', 'T2', 'T3', 'T4', 'T5', 'T6', 'T7', 'T8', 'T9', 'T10', 'T11', 'T12', 'T13', 'T14', 'T15', 'T16', 'T17', 'T18']

disp_df = pd.DataFrame(data=acc,
                        index=display_labels, columns=['Amount of Data', 'Accuracy'])
display(disp_df)

display_labels_q = ['Q1', 'Q2', 'Q3', 'Q4', 'Q5']

disp_df_q = pd.DataFrame(data=acc_q,
                        index=display_labels_q, columns=['Amount of Data', 'Accuracy'])
display(disp_df_q)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error
error


Unnamed: 0,Amount of Data,Accuracy
T1,500,0.952
T2,500,0.988
T3,500,0.994
T4,500,0.964
T5,500,0.99
T6,500,0.99
T7,500,0.992
T8,500,0.992
T9,500,0.956
T10,500,1.0


Unnamed: 0,Amount of Data,Accuracy
Q1,2000,0.9745
Q2,4000,0.98275
Q3,5000,0.9818
Q4,6000,0.946833
Q5,8318,0.949988


In [113]:
df_true_recaps = pd.DataFrame(true_recaps, columns=['question', 'true_label', 'pred_label', 'entity_ner', 'query', 'predicted_answer', 'gold_answer'])

save_path = 'dataset/'
if not os.path.exists(save_path):
  os.makedirs(save_path)
df_true_recaps.to_csv('dataset/lexid_qa2_app_true_recaps.csv', index=False)

In [114]:
df_false_recaps = pd.DataFrame(false_recaps, columns=['question', 'true_label', 'pred_label', 'entity_ner', 'query', 'predicted_answer', 'gold_answer'])

save_path = 'dataset/'
if not os.path.exists(save_path):
  os.makedirs(save_path)
df_false_recaps.to_csv('dataset/lexid_qa2_app_false_recaps.csv', index=False)