In [163]:

import os
from importlib import reload
from pathlib import Path
from typing import Optional, List, Mapping, Any

from dotenv import load_dotenv
import torch
from langchain.embeddings import HuggingFaceEmbeddings 
from llama_index import LangchainEmbedding, PromptHelper
from llama_index import GPTSimpleVectorIndex
from llama_index import LLMPredictor, ServiceContext
from rdflib import Graph

import aikg.utils.llm as akllm
import aikg.utils.rdf as akrdf
reload(akrdf)
reload(akllm)
load_dotenv()

True

In [55]:

question = "What is the tallest Pokemon?"
prompt_template = """Question: {question}

Answer: Let's think step by step."""



In [56]:
ontology = '/tmp/ontology.nt'
instances = '/tmp/instances.nq'

In [57]:
!curl https://www.pokemonkg.org/ontology/ontology.nt -o $ontology
!curl https://www.pokemonkg.org/download/dump/poke-a.nq.gz -o - | gzip -dc  > $instances

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  108k  100  108k    0     0   385k      0 --:--:-- --:--:-- --:--:--  384k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1599k  100 1599k    0     0  3685k      0 --:--:-- --:--:-- --:--:-- 3692k


In [58]:
!head $instances

<https://pokemonkg.org/dataset/artwork/sugimori-early-japan> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/ns/dcat#Dataset> <https://pokemonkg.org/dataset/artwork/sugimori-early-japan> .
<https://pokemonkg.org/dataset/artwork/sugimori-early-japan> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/ns/prov#Entity> <https://pokemonkg.org/dataset/artwork/sugimori-early-japan> .
<https://pokemonkg.org/dataset/artwork/sugimori-early-japan> <http://purl.org/dc/terms/accrualPeriodicity> <http://purl.org/linked-data/sdmx/2009/code#freq-A> <https://pokemonkg.org/dataset/artwork/sugimori-early-japan> .
<https://pokemonkg.org/dataset/artwork/sugimori-early-japan> <http://purl.org/dc/terms/description> "This dataset provides meta information about early version of official Pokémon artwork in Japan of Pokémon in the national Pokédex."@en <https://pokemonkg.org/dataset/artwork/sugimori-early-japan> .
<https://pokemonkg.org/dataset/artwork/sugimori-early-japan

In [126]:
reload(akrdf)
# From quads to triples (dropping named graphs context)
instance_quads = ConjunctiveGraph()
instance_quads.parse(instances, format='nquads')

# One graph per subject (to generate documents)
instance_graphs = [s for g in instance_quads.contexts() for s in akrdf.split_by_subject(g)]
schema_graph = Graph().parse(ontology, format='nt')

In [173]:
reload(akllm)
# define prompt helper
# set maximum input size
max_input_size = 2048
# set number of output tokens
num_output = 256
# set maximum chunk overlap
max_chunk_overlap = 20

prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)


# define our LLM
llm_predictor = LLMPredictor(llm=akllm.CustomLLM())
embed_model = LangchainEmbedding(HuggingFaceEmbeddings())

service_context = ServiceContext.from_defaults(
    llm_predictor=llm_predictor,
    prompt_helper=prompt_helper,
    embed_model=embed_model,
)


Loading checkpoint shards: 100%|██████████| 39/39 [03:37<00:00,  5.59s/it]


In [169]:
reload(akrdf)
import joblib
# Load each instance graph as a document
# The schema is injected in each graph to
# provide human readable context
os.environ["TOKENIZERS_PARALLELISM"] = "false"
loader = akrdf.CustomRDFReader()
doc_graphs = map(lambda g: g | schema_graph, instance_graphs[:50])
runner = joblib.Parallel(n_jobs=12)
documents = runner(joblib.delayed(loader.load_data)(g) for g in doc_graphs)
documents = [doc for doc in documents if doc.text]
index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context)


Batches: 100%|██████████| 1/1 [00:00<00:00,  5.33it/s]
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 486 tokens


In [170]:
documents

[Document(text='<Hitmonlee> <in egg group> <Human-like>\n<Hitmonlee> <may have hidden ability> <unburden>\n<Hitmonlee> <has colour> <Brown>\n<Hitmonlee> <has Type> <Fighting>\n<Hitmonlee> <may have ability> <limber>\n<Hitmonlee> <may have ability> <reckless>\n<Hitmonlee> <has shape> <Humanoid>', doc_id='1874788c-fe9c-4750-917a-67d3f763a636', embedding=None, doc_hash='b094cf3176eb8f9d5bbf59ab1949cb97e0043fb98fa0c0305fe9524762e0cba3', extra_info=None),
 Document(text='<Scrafty> <in egg group> <Field>\n<Scrafty> <in egg group> <Dragon>\n<Scrafty> <may have hidden ability> <intimidate>\n<Scrafty> <has colour> <Red>\n<Scrafty> <has Type> <Dark>\n<Scrafty> <has Type> <Fighting>\n<Scrafty> <may have ability> <moxie>\n<Scrafty> <may have ability> <shed-skin>\n<Scrafty> <has shape> <Upright>', doc_id='6a8ce074-9e22-4fe1-b916-0d5d53cd76e6', embedding=None, doc_hash='a24650c61c097f7c0bd39550d3781a66d7ebcc042181c3854c953568a2c19944', extra_info=None),
 Document(text='<Spectral Thief> <has maximum 

In [136]:
[x for x in document_graphs[0]]

[(rdflib.term.URIRef('https://pokemonkg.org/instance/pokemon/hitmonlee'),
  rdflib.term.URIRef('https://pokemonkg.org/ontology#mayHaveAbility'),
  rdflib.term.URIRef('https://pokemonkg.org/instance/ability/reckless')),
 (rdflib.term.URIRef('https://pokemonkg.org/instance/pokemon/hitmonlee'),
  rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#comment'),
  rdflib.term.Literal("Hitmonlee (Japanese: サワムラー Sawamular) is a Fighting-type Pokémon introduced in Generation I.\nIt evolves from Tyrogue starting at level 20 when Tyrogue's Attack is higher than its Defense. It is one of Tyrogue's final forms, the others being Hitmonchan and Hitmontop.", lang='en')),
 (rdflib.term.URIRef('https://pokemonkg.org/instance/pokemon/hitmonlee'),
  rdflib.term.URIRef('https://pokemonkg.org/ontology#isAbleToApply'),
  rdflib.term.URIRef('https://pokemonkg.org/instance/move/feint')),
 (rdflib.term.URIRef('https://pokemonkg.org/instance/pokemon/hitmonlee'),
  rdflib.term.URIRef('https://pokemonkg.org/o

In [171]:

# Query and print response
response = index.query(question)
print(response)

Batches: 100%|██████████| 1/1 [00:00<00:00, 35.12it/s]


NameError: name 'num_output' is not defined