In [1]:

import os

from dotenv import load_dotenv
import torch
from langchain.llms.base import LLM
from langchain.embeddings import HuggingFaceEmbeddings 
from llama_index import LangchainEmbedding, PromptHelper
from llama_index import download_loader, GPTSimpleVectorIndex
from llama_index import LLMPredictor, ServiceContext
from transformers import pipeline
from typing import Optional, List, Mapping, Any
load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:

question = "What is the tallest Pokemon?"
model_id = "chainyo/alpaca-lora-7b"
prompt_template = """Question: {question}

Answer: Let's think step by step."""



In [3]:
ontology = '/tmp/ontology.nt'
instances = '/tmp/instances.nq'

In [4]:
!curl https://www.pokemonkg.org/ontology/ontology.nt -o $ontology
!curl https://www.pokemonkg.org/download/dump/poke-a.nq.gz -o - | gzip -dc > $instances

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  108k  100  108k    0     0   383k      0 --:--:-- --:--:-- --:--:--  384k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1599k  100 1599k    0     0  1442k      0  0:00:01  0:00:01 --:--:-- 1443k


In [5]:
!head $instances

<https://pokemonkg.org/dataset/artwork/sugimori-early-japan> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/ns/dcat#Dataset> <https://pokemonkg.org/dataset/artwork/sugimori-early-japan> .
<https://pokemonkg.org/dataset/artwork/sugimori-early-japan> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/ns/prov#Entity> <https://pokemonkg.org/dataset/artwork/sugimori-early-japan> .
<https://pokemonkg.org/dataset/artwork/sugimori-early-japan> <http://purl.org/dc/terms/accrualPeriodicity> <http://purl.org/linked-data/sdmx/2009/code#freq-A> <https://pokemonkg.org/dataset/artwork/sugimori-early-japan> .
<https://pokemonkg.org/dataset/artwork/sugimori-early-japan> <http://purl.org/dc/terms/description> "This dataset provides meta information about early version of official Pokémon artwork in Japan of Pokémon in the national Pokédex."@en <https://pokemonkg.org/dataset/artwork/sugimori-early-japan> .
<https://pokemonkg.org/dataset/artwork/sugimori-early-japan

In [6]:

class CustomLLM(LLM):
    model_name = model_id
    pipeline = pipeline("text-generation", model=model_name, model_kwargs={"torch_dtype":torch.bfloat16})

    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        prompt_length = len(prompt)
        response = self.pipeline(prompt, max_new_tokens=num_output)[0]["generated_text"]

        # only return newly generated tokens
        return response[prompt_length:]

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        return {"name_of_model": self.model_name}

    @property
    def _llm_type(self) -> str:
        return "custom"

Loading checkpoint shards: 100%|██████████| 39/39 [00:37<00:00,  1.03it/s]


In [7]:


# define prompt helper
# set maximum input size
max_input_size = 2048
# set number of output tokens
num_output = 256
# set maximum chunk overlap
max_chunk_overlap = 20

prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)



# define our LLM
llm_predictor = LLMPredictor(llm=CustomLLM())
embed_model = LangchainEmbedding(HuggingFaceEmbeddings())

service_context = ServiceContext.from_defaults(
    llm_predictor=llm_predictor,
    prompt_helper=prompt_helper
)


INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2
Downloading (…)a8e1d/.gitattributes: 100%|██████████| 1.18k/1.18k [00:00<00:00, 432kB/s]
Downloading (…)_Pooling/config.json: 100%|██████████| 190/190 [00:00<00:00, 65.8kB/s]
Downloading (…)b20bca8e1d/README.md: 100%|██████████| 10.6k/10.6k [00:00<00:00, 3.95MB/s]
Downloading (…)0bca8e1d/config.json: 100%|██████████| 571/571 [00:00<00:00, 179kB/s]
Downloading (…)ce_transformers.json: 100%|██████████| 116/116 [00:00<00:00, 46.4kB/s]
Downloading (…)e1d/data_config.json: 100%|██████████| 39.3k/39.3k [00:00<00:00, 449kB/s]
Downloading pytorch_model.bin:  81%|████████▏ | 357M/438M [00:02<00:00, 152MB/s] 

OSError: [Errno 28] No space left on device

Downloading pytorch_model.bin:  84%|████████▍ | 367M/438M [00:14<00:00, 152MB/s]

In [None]:

# Load the your data
RDFReader = download_loader("RDFReader")
document = RDFReader().load_data(file=instances)
index = GPTSimpleVectorIndex.from_documents(document, service_context=service_context)


In [42]:

# Query and print response
response = index.query("What is the")
print(response)

INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 0 tokens


In [4]:

import re
from langchain import PromptTemplate

# Detect input {variables} from template
variables = re.findall(r"{(\w+)}", prompt_template)
prompt = PromptTemplate(
    template=prompt_template, input_variables=variables
)


In [10]:
from langchain import HuggingFacePipeline, LLMChain
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

import dotenv
dotenv.load_dotenv()

True

In [11]:

tokenizer = AutoTokenizer.from_pretrained(model_id)

In [12]:

model = AutoModelForCausalLM.from_pretrained(model_id)

Loading checkpoint shards: 100%|██████████| 39/39 [00:38<00:00,  1.00it/s]


In [13]:

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=128)

In [14]:

llm = HuggingFacePipeline(pipeline=pipe)

In [15]:

llm_chain = LLMChain(prompt=prompt, llm=llm)

In [17]:

print(llm_chain.run("Give me 5 water pokemons?"))

 First, you need to define what a "water pokemon" is. Is it a pokemon that is part of the Water type? Is it a pokemon that has a water-related move? Is it a pokemon that has a water-related ability?

Once you have defined what a "water pokemon" is, you can then ask the question: "Give me 5 water pokemons?"

The answer to this question is:

1. Magikarp
2. Gyarados
3. Poliwag
4. Poliwrath
5. Squirt
