In [8]:
from langchain import HuggingFacePipeline
from langchain import PromptTemplate, HuggingFaceHub, LLMChain
import torch
from instruct_pipeline import InstructionTextGenerationPipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForSeq2SeqLM

from langchain.document_loaders import UnstructuredPDFLoader, OnlinePDFLoader, PyPDFLoader, PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter,  CharacterTextSplitter, SentenceTransformersTokenTextSplitter
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
import os

In [2]:
torch.cuda.is_available()

False

## Import Data

### Load PDF

In [9]:
loader = PyPDFLoader("robinson_crusoe_rulebook.pdf")
data = loader.load()

In [10]:
# Note: If you're using PyPDFLoader then it will split by page for you already
print (f'{len(data)} document(s) in your data')

40 document(s) in your data


### Chunk Text

In [11]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)

# Split by token
# text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
#     chunk_size=500, chunk_overlap=20
# )

texts = text_splitter.split_documents(data)

In [12]:
print (f'{len(texts)} documents')

176 documents


### Create Embeddings

In [13]:
# replacing  OpenAI Embedding with open source?
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

### Load Embedding Vectors into Chroma DB

In [14]:
db = Chroma.from_documents(texts, embedding_function)

#### Test Similarity Search

In [9]:
query = "What does the dog do?"
docs = db.similarity_search(query)

In [10]:
# Highest Ranked Chunk
print(docs[0].page_content)

The Dog is represented by his card and 1 purple Action pawn. In a solo game, place his card next to the board and the Action pawn on it. He is used like a neutral Action pawn in all respects. He can be used every round for either the Hunt or Explore actions only. Dog’s Action pawn need not be assigned to any Action if the players do not wish it. He cannot die.
VARIANTS
EASIER GAME
If players think a scenario is too hard for them, they can make it easier by:


In [14]:
pipeline = pipeline(
    "text2text-generation",
    model=model, 
    tokenizer=tokenizer, 
    max_length=128
)

local_llm = HuggingFacePipeline(pipeline=pipeline)

In [18]:
prompt_template = """ Use the following pieces of context and your knowledge of Robinson Crusoe: Adventures on the Cursed Island to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Answer the question: """

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [19]:
query = "How does combat work?"
docs = db.similarity_search(query)

In [20]:
chain = load_qa_chain(local_llm, chain_type="stuff", prompt=PROMPT)

In [22]:
print(chain.run(input_documents=docs, question=query))

The combat procedure is identical to combat caused by other situations. Once combat commences, the Action is complete. enemy’s strength.


In [22]:
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')

In [23]:
llm = OpenAI(temperature=.07, openai_api_key=OPENAI_API_KEY)
chain = load_qa_chain(llm, chain_type="stuff", prompt=PROMPT)

In [25]:
print(chain.run(input_documents=docs, question=query))


Combat is resolved by following the steps printed left to right on the card. The character owning the topmost Action pawn resolves the Action, and then the topmost Beast card is drawn from the Hunting deck and combat commences. During combat, the group can fight a beast to collect food and/or fur, and the resolving character will take wounds if the Weapon level is not high enough. The Threat Effect is prevented by resolving the card.
