# Creating a Boardgame Instructions CoPilot with LLMs

## Packages

In [None]:
import warnings
warnings.filterwarnings("ignore")
from langchain import HuggingFacePipeline
from langchain import PromptTemplate, HuggingFaceHub, LLMChain
import torch
#from instruct_pipeline import InstructionTextGenerationPipeline LlamaForCausalLM, LlamaTokenizer,
from transformers import AutoTokenizer, pipeline, AutoModelForSeq2SeqLM, AutoModel, AutoConfig, AutoModelForCausalLM, LlamaTokenizer, LlamaForCausalLM, BitsAndBytesConfig
#from ctransformers import AutoModelForCausalLM
from langchain.document_loaders import UnstructuredPDFLoader, OnlinePDFLoader, PyPDFLoader, PyPDFDirectoryLoader, BSHTMLLoader, TextLoader, DirectoryLoader
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter, SentenceTransformersTokenTextSplitter
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.chains import LLMChain, TransformChain, SimpleSequentialChain, SequentialChain, RetrievalQA
from langchain.llms import CTransformers
import re
import os
import gc

## Import Context Data

### Load PDF

In [None]:
loader = PyPDFLoader("robinson_crusoe_rulebook.pdf")
data_raw = loader.load()
print (f'{len(data_raw)} document(s) in your data')

### Format Chunks

In [None]:
data = data_raw

for text in data:
    text.page_content = text.page_content.replace('•', ' ')
    text.page_content = re.compile(r'\,\s{2,10}\n').sub(', ',text.page_content)
    text.page_content = re.compile(r'\:\s{2,10}\n').sub(': ',text.page_content)
    text.page_content = re.compile(r'\.\n').sub('. ',text.page_content)
    text.page_content = re.compile(r'\,\n').sub(', ',text.page_content)
    text.page_content = re.compile(r'\:\n').sub(': ',text.page_content)
    text.page_content = text.page_content.replace(' . ',' ')
    text.page_content = re.compile(r'[0-9][0-9][0-9]+').sub('',text.page_content)
    
text_splitter = RecursiveCharacterTextSplitter(separators = ["\n\n","\n","."], chunk_size=400, chunk_overlap=10)

texts = text_splitter.split_documents(data)

for text in texts:
    text.page_content = re.compile(r'\n').sub('',text.page_content)
    text.page_content = re.compile(r'\s+').sub(' ',text.page_content)

In [None]:
print (f'{len(texts)} document(s) in your data')

## Encode and Store Context

### Choose an Embedding Function

In [None]:
embedding_function1 = SentenceTransformerEmbeddings(model_name = 'llm_models/torch/sentence_transformers/sentence-transformers_all-MiniLM-L6-v2', model_kwargs = {'device': 'cpu'})

embedding_function2 = SentenceTransformerEmbeddings(model_name="llm_models/torch/sentence_transformers/sentence-transformers_multi-qa-MiniLM-L6-cos-v1", model_kwargs = {'device': 'cpu'})

### Load Embedding Vectors into FAISS

In [None]:
db1 = FAISS.from_documents(texts, embedding_function1)

In [None]:
db2 = FAISS.from_documents(texts, embedding_function2)

### Test Similarity Search

In [None]:
query = "When can a player use their character's abilities?"

In [None]:
docs = db1.similarity_search(query)
docs[0]

In [None]:
docs = db2.similarity_search(query)
docs[0]

## Instantiate LLM

In [None]:
mpath = 'llm_models/Llama-2-7b-chat-hf'

In [None]:
#config = AutoConfig.from_pretrained(mpath)
max_memory_mapping = {0: "2GB", 1: "12GB"}
tokenizer = LlamaTokenizer.from_pretrained(mpath)
model = LlamaForCausalLM.from_pretrained(mpath, load_in_8bit=True, device_map='auto', max_memory=max_memory_mapping)

In [None]:
torch.cuda.device_count()

In [None]:
lp = pipeline(
    task="text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens = 300,
    temperature = 0.3,
    top_p=0.6,
    repetition_penalty=1.1,
    device_map='auto'
    )

In [None]:
llm = HuggingFacePipeline(pipeline=lp)

## Create LLM Pipeline Interface

### Create a Prompt Template and Chain: 'Stuff Method'

In [None]:
prompt_template = """Act as a helpful and knowledgeable assistant. Use the following pieces of context and your knowledge of board games to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Answer: """

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [None]:
stuff_chain = load_qa_chain(llm, chain_type="stuff", prompt=PROMPT)

### Create a Prompt Template and Chain: 'Refine Method'

In [None]:
prompt_template = """Act as a helpful and knowledgeable assistant. Use the following pieces of context and your knowledge of board games to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context_str}

Question: {question}
Answer: """

refine_PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context_str", "question"]
)

In [None]:
refine_chain = load_qa_chain(llm, chain_type="refine", question_prompt=refine_PROMPT, return_refine_steps=True)

### Create a Prompt Template and Chain: 'Map-Rerank Method'

In [None]:
rank_chain = load_qa_chain(llm, chain_type="map_rerank", return_intermediate_steps=True)

### Create a Prompt Template and Chain: 'Map-Reduce Method'

In [None]:
question_prompt_template = """Act as a helpful and knowledgeable assistant. Use the following pieces of context and your knowledge of board games to see if any of the text is relevant to answer the question. 
Return any relevant text.
{context}
Question: {question}
Relevant text, if any:"""
QUESTION_PROMPT = PromptTemplate(
    template=question_prompt_template, input_variables=["context", "question"]
)

combine_prompt_template = """Act as a helpful and knowledgeable assistant. Use the following extracted parts of the instructions and your knowledge of board games to create a final answer.

QUESTION: {question}
=========
{summaries}
=========
Answer:"""
COMBINE_PROMPT = PromptTemplate(
    template=combine_prompt_template, input_variables=["summaries", "question"]
)


In [None]:
mr_chain = load_qa_chain(llm, chain_type="map_reduce", question_prompt=QUESTION_PROMPT, combine_prompt=COMBINE_PROMPT)

## Run LLM with Document Query

In [None]:
query = "When can a player use their character's abilities?"

In [None]:
docs = db1.similarity_search(query)
docs[0]

### Stuff Method Results

In [None]:
stuff_result = stuff_chain({"input_documents": docs, "question": query},return_only_outputs=True)

In [None]:
stuff_result

### Refine Method Result

In [None]:
refine_result = refine_chain({"input_documents": docs, "question": query},return_only_outputs=True)

In [None]:
print(refine_result)

### Map-Rerank Method Result

In [None]:
rank_result = rank_chain({"input_documents": docs, "question": query},return_only_outputs=True)

In [None]:
print(rank_result)

### Map-Reduce Method Result

In [None]:
# map_reduce_result = mr_chain({"input_documents": docs, "question": query})
# mr_chain = RetrievalQA.from_chain_type(llm,retriever=db1.as_retriever(),
#                                        chain_type="map_reduce")
# result = mr_chain({"query": query})

In [None]:
# print(map_reduce_result)