# LangChain Playground

## Init Model and use some prompt template

In [1]:
from langchain import PromptTemplate, LLMChain
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.llms import GPT4All

import config

In [3]:
template = "Tell me the DnD rule for the follow: {action}"
prompt = PromptTemplate(template=template, input_variables=['action'])

In [4]:
callbacks = [StreamingStdOutCallbackHandler()]
llm = GPT4All(model=config.gpt_4all_groovy, backend='gptj', callbacks=callbacks)

Found model file.


In [5]:
llm_chain = LLMChain(prompt=prompt, llm=llm)

In [6]:
llm_chain.run('Spellcasting')

, Magic Weapon and Divine Intervention.
, Magic Weapon and Divine Intervention.

', Magic Weapon and Divine Intervention.'

## Using indexers for IR

### Convert text pages to embeddings and store in vectorstore

In [7]:
from langchain.embeddings import HuggingFaceInstructEmbeddings

In [8]:
embeddings = HuggingFaceInstructEmbeddings(
    query_instruction="Represent the query for retrieval: "
)

load INSTRUCTOR_Transformer
max_seq_length  512


In [22]:
from langchain.document_loaders import PyPDFLoader
import os

if not os.path.exists('phb_faiss_index'):
    loader = PyPDFLoader("C:\\Users\\phili\\Documents\\DND_5E_PHB.pdf")
    documents = loader.load()

In [23]:
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter

if not os.path.exists('phb_faiss_index'):
    print('Re-Indexing Player Handbook')

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=32)
    pages = text_splitter.split_documents(documents)

    faiss_index = FAISS.from_documents(pages, embeddings)
    faiss_index.save_local('phb_faiss_index')
else:
    faiss_index = FAISS.load_local('phb_faiss_index', embeddings)

Re-Indexing Player Handbook


## Combine Language model and Index Store to get a Document Question Answering Chain

In [26]:
from langchain.chains.question_answering import load_qa_chain

llm_qa_chain = load_qa_chain(llm=llm, chain_type='stuff')

def get_answer_for_question(question: str):
    similar_documents = faiss_index.similarity_search(question)
    return llm_qa_chain.run({'input_documents': similar_documents, 'question': question})

In [27]:
get_answer_for_question('What is a Rogue in Dungeons and Dragons?')

 A rogue (short for "rogue hunter") character has skills not only focused on combat but also those related to stealth, deception, charm or trickery like pickpocketing, burglaring etc. The archetype reflects the preferred techniques of individual players such as lockpicking, climbing walls and traps among other things while having a broad expertise in many areas which is rare for few characters that can match it up with
 A rogue (short for "rogue hunter") character has skills not only focused on combat but also those related to stealth, deception, charm or trickery like pickpocketing, burglaring etc. The archetype reflects the preferred techniques of individual players such as lockpicking, climbing walls and traps among other things while having a broad expertise in many areas which is rare for few characters that can match it up with

' A rogue (short for "rogue hunter") character has skills not only focused on combat but also those related to stealth, deception, charm or trickery like pickpocketing, burglaring etc. The archetype reflects the preferred techniques of individual players such as lockpicking, climbing walls and traps among other things while having a broad expertise in many areas which is rare for few characters that can match it up with'

# (Semi) Open Models based on GPT NEOX

### OpenAssistant 12B parameter Pythia (LAION)
Not sure if I can run this on a local machine since it requires ~16GB of VRAM for
inference in 8-Bit mode.

In [28]:
from transformers import AutoTokenizer, AutoModelForCausalLM

#tokenizer = AutoTokenizer.from_pretrained("OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5")
#model = AutoModelForCausalLM.from_pretrained("OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5")

Downloading (…)okenizer_config.json:   0%|          | 0.00/444 [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/303 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/661 [00:00<?, ?B/s]

Downloading (…)model.bin.index.json:   0%|          | 0.00/47.3k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading (…)l-00001-of-00003.bin:   0%|          | 0.00/10.0G [00:00<?, ?B/s]

KeyboardInterrupt: 

### Stable LM (Stability AI)

In [None]:
import os
os.environ['HUGGINGFACEHUB_API_TOKEN'] = config.hugging_face_hub_token

In [None]:
from transformers import BitsAndBytesConfig
from langchain import HuggingFacePipeline
import torch

quantization_config = BitsAndBytesConfig(
    load_in_8bit=True,
    llm_int8_threshold=6.0,
)

repo_id = "stabilityai/stablelm-tuned-alpha-3b"
llm = HuggingFacePipeline.from_model_id(model_id=repo_id,
                                        task="text-generation",
                                        model_kwargs={"temperature":0, "max_length":64},
                                        torch_dtype=torch.float16,
                                        low_cpu_mem_usage=True,
                                        load_in_8bit=True,
                                        quantization_config=quantization_config,
                                        device_map='auto'
                                       )

In [None]:
prompt = PromptTemplate(template="Country of origin of this food: {food}", input_variables=['food'])
llm_chain = LLMChain(prompt=prompt, llm=llm)
print(llm_chain.run("Schnitzel"))