In [1]:
from langchain import HuggingFacePipeline
from langchain import PromptTemplate, HuggingFaceHub, LLMChain
import torch
from instruct_pipeline import InstructionTextGenerationPipeline
from transformers import AutoTokenizer, pipeline, AutoModelForSeq2SeqLM, AutoModel, AutoConfig
from ctransformers import AutoModelForCausalLM
from langchain.document_loaders import UnstructuredPDFLoader, OnlinePDFLoader, PyPDFLoader, PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter, SentenceTransformersTokenTextSplitter
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI, CTransformers
import re
import os

In [None]:
torch.cuda.is_available()

## Import Data

### Load PDF

In [4]:
loader = PyPDFLoader("robinson_crusoe_rulebook.pdf")
data_raw = loader.load()
print (f'{len(data_raw)} document(s) in your data')

40 document(s) in your data


### Format Chunks

In [5]:
data = data_raw

for text in data:
    text.page_content = text.page_content.replace('•', ' ')
    text.page_content = re.compile(r'\,\s{2,10}\n').sub(', ',text.page_content)
    text.page_content = re.compile(r'\:\s{2,10}\n').sub(': ',text.page_content)
    text.page_content = re.compile(r'\.\n').sub('. ',text.page_content)
    text.page_content = re.compile(r'\,\n').sub(', ',text.page_content)
    text.page_content = re.compile(r'\:\n').sub(': ',text.page_content)
    text.page_content = text.page_content.replace(' . ',' ')
    text.page_content = re.compile(r'[0-9][0-9][0-9]+').sub('',text.page_content)
    
text_splitter = RecursiveCharacterTextSplitter(separators = ["\n\n", "\n","."],chunk_size=750, chunk_overlap=10)

texts = text_splitter.split_documents(data)

for text in texts:
    text.page_content = re.compile(r'\n').sub('',text.page_content)
    text.page_content = re.compile(r'\s+').sub(' ',text.page_content)

In [6]:
print (f'{len(data)} document(s) in your data')

40 document(s) in your data


### Chunk Text

In [6]:
text_splitter = RecursiveCharacterTextSplitter(separators = ["\n\n", "\n","."],chunk_size=750, chunk_overlap=10)
texts = text_splitter.split_documents(data)
for text in texts:
    text.page_content = re.compile(r'\n').sub('',text.page_content)
    text.page_content = re.compile(r'\s+').sub(' ',text.page_content)

In [8]:
texts[30].page_content

'8I. EVENT PHASE You must work together to reach the scenario goal within the set number of rounds. Often, this requires building special Items or exploring specific locations (see the Scenario sheets and the appendix on pages 28-32). You must cooperate to overcome the obstacles; you only win as a group. If you do not manage to reach the scenario goal within the time limit, or if one of the characters dies, all players lose the game. A Game round comprises 6 Phases: 1. EVENT PHASEAn Event card is revealed and its Event Effect applied. The card is then placed in the right-hand Threat space, moving the other cards, which can trigger Threat Effects. Sometimes multiple cards may need to be drawn and resolved. 2. MORALE PHASE'

In [7]:
texts[60]

Document(page_content='If this token is on an Action space, its effect triggers during the die roll of the next Action of this type (see page 16). If a success is rolled on the Success die, it must be rerolled, and then the token is discarded. The new result of the die is the one that is applied. If the die does not initially show a success, the token remains in place and applies to the next Action of this type (even in the same round). GAME FLOW', metadata={'source': 'robinson_crusoe_rulebook.pdf', 'page': 11})

### Create Embeddings

In [None]:
embedding_function = SentenceTransformerEmbeddings(model_name="multi-qa-MiniLM-L6-cos-v1")

### Load Embedding Vectors into Chroma DB

In [8]:
try:
    db._collection.name
except:
    print('Vector Store is Empty')
else:
    db._client.delete_collection(db._collection.name)
finally:
    db = Chroma.from_documents(texts, embedding_function)

Vector Store is Empty


#### Test Similarity Search

In [12]:
query = "How is the Dog played?"
docs = db.similarity_search(query)
docs[3].page_content

'The Hunt Action provides you with food and possibly fur, but the resolving character is at risk of suffering wounds if the Weapon level is not high enough. This Action can be resolved once per Beast card in the Hunting deck, as each Beast can only be hunted once. Once the Hunting deck is empty, this Action can no longer be taken, but as long as enough cards are available, the Action can be resolved multiple times per round. Each Hunt Action requires exactly 2 Action pawns. For each Hunt Action, the Action pawns are placed in the Action space stacked on top of each other, with the character owning the topmost Action pawn resolving the Action. When the Action is resolved, the topmost Beast card is drawn from the Hunting'

In [None]:
local_llm_2 = CTransformers(model='TheBloke/Llama-2-13B-chat-GGML', 
                            model_file='llama-2-13b-chat.ggmlv3.q4_K_M.bin', 
                            config={"temperature" : .1})

In [9]:
prompt_template = """ Use the following pieces of context to answer the question at the end.

{context}

Question: {question}
Answer: """

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [10]:
query = "How is the Dog played?"
docs = db.similarity_search(query)

In [10]:
print(docs[0:3])

[Document(page_content='The Dog is represented by his card and 1 purple Action pawn. In a solo game, place his card next to the board and the Action pawn on it. He is used like a neutral Action pawn in all respects. He can be used every round for either the Hunt or Explore actions only. Dog’s Action pawn need not be assigned to any Action if the players do not wish it. He cannot die. VARIANTSEASIER GAMEIf players think a scenario is too hard for them, they can make it easier by: Adding the Dog. This is especially recommended for 3 players. Adding Friday Drawing more Starting Equipment Using fewer Event cards with the book symbol and more Event cards with the adventure symbol when creating the deck (step 15 of the setup). For example, using 4', metadata={'page': 26, 'source': 'robinson_crusoe_rulebook.pdf'}), Document(page_content='2 PLAYERSIf players randomly draw characters, it is recommended they should select from the carpenter, cook and explorer. The special abilities of the soldie

### Stuff Method

In [11]:
chain = load_qa_chain(local_llm_2, chain_type="stuff", prompt=PROMPT)

In [None]:
stuff_result = chain.run(input_documents=docs{0:1}, question=query)

In [83]:
print(stuff_result)

 The Dog is played like a neutral Action pawn that can be used every round for either the Hunt or Explore actions only. It cannot die and is placed next to the board with one purple Action pawn on it.


### Map-Reduce Method

In [73]:
question_prompt_template = """Use the following portion of a long document to see if any of the text is relevant to answer the question. 
Return any relevant text.
{context}
Question: {question}
Relevant text, if any:"""
QUESTION_PROMPT = PromptTemplate(
    template=question_prompt_template, input_variables=["context", "question"]
)

combine_prompt_template = """Given the following extracted parts of a long document and a question, create a final answer.

QUESTION: {question}
=========
{summaries}
=========
Answer:"""
COMBINE_PROMPT = PromptTemplate(
    template=combine_prompt_template, input_variables=["summaries", "question"]
)


In [74]:
chain = load_qa_chain(local_llm_2, chain_type="map_reduce", question_prompt=QUESTION_PROMPT, combine_prompt=COMBINE_PROMPT)

In [75]:
map_reduce_result = chain.run(input_documents=docs, question=query)

In [76]:
print(map_reduce_result)

 In a solo game, the Dog can be used as a neutral Action pawn like any other, and it is recommended to use it with caution due to the risk of suffering wounds if the Weapon level is not high enough when using the Hunt Action.


In [27]:
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')

In [67]:
llm = OpenAI(temperature=.1, openai_api_key=OPENAI_API_KEY)
chain = load_qa_chain(llm, chain_type="stuff", prompt=PROMPT)

In [68]:
GPT_result = chain.run(input_documents=docs, question=query)

In [69]:
print(GPT_result)

 The Dog is used like a neutral Action pawn in all respects. He can be used every round for either the Hunt or Explore actions only. Dog’s Action pawn need not be assigned to any Action if the players do not wish it. He cannot die.
