In [1]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain import OpenAI, VectorDBQA
from langchain.document_loaders import DirectoryLoader
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
import os
import nltk
# import config
import logging


In [19]:
PERSIST_DIR = "vectorstore"  # replace with the directory where you want to store the vectorstore
LOGS_FILE = "logs/log.log"  # replace with the path where you want to store the log file
FILE ="doc/RULE.pdf" # replace with the path where you have your documents
FILE_DIR = "doc/"
local_prompt_template = """我是一位醫院排班助理機器人。
根據以下排班規則回答問題。我們有ABCDE五個班次，每個班次都需要符合這些規定。如果問題指出需要調整規則，則提供基於用戶需求的修訂版。如果找不到答案，建議其他可能的方案。

問題：{question}

當前排班規則：
{context}

請回答問題，並在需要時提供幫助。
"""

k = 4  # number of chunks to consider when generating answer

In [3]:
# Initialize logging with the specified configuration
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
    handlers=[
        # logging.FileHandler(config.LOGS_FILE),
        logging.FileHandler(LOGS_FILE),
        logging.StreamHandler(),
    ],
)
LOGGER = logging.getLogger(__name__)

In [6]:
# Load documents from the specified directory using a DirectoryLoader object
# loader = DirectoryLoader(config.FILE_DIR, glob='*.pdf')
loader = DirectoryLoader(FILE_DIR, glob='*.pdf')
documents = loader.load()

# split the text to chuncks of of size 1000
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
# Split the documents into chunks of size 1000 using a CharacterTextSplitter object
texts = text_splitter.split_documents(documents)

# Create a vector store from the chunks using an OpenAIEmbeddings object and a Chroma object
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
docsearch = Chroma.from_documents(texts, embeddings)

2023-12-01 12:01:40,249 - INFO - Processing entire page OCR with tesseract...
2023-12-01 12:01:41,670 - INFO - Processing entire page OCR with tesseract...
2023-12-01 12:01:44,190 - INFO - Processing entire page OCR with tesseract...
2023-12-01 12:01:46,345 - INFO - Processing entire page OCR with tesseract...
2023-12-01 12:01:48,231 - INFO - Processing entire page OCR with tesseract...
2023-12-01 12:01:55,749 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [11]:
# Define a function named 'answer' that takes a string prompt and an optional directory path
# for persisting data. The function returns a string that represents the answer to the prompt.
def answer(prompt: str, persist_directory: str = PERSIST_DIR) -> str:
    
    # Log a message indicating that the function has started
    LOGGER.info(f"Start answering based on prompt: {prompt}.")
    
    # Create a prompt template using a template from the config module and input variables
    # representing the context and question.
    prompt_template = PromptTemplate(template=local_prompt_template, input_variables=["context", "question"])
    
    # Load a QA chain using an OpenAI object, a chain type, and a prompt template.
    doc_chain = load_qa_chain(
        llm=OpenAI(
            openai_api_key = OPENAI_API_KEY,
            model_name="text-davinci-003",
            temperature=0,
            max_tokens=300,
        ),
        chain_type="stuff",
        prompt=prompt_template,
    )
    
    # Log a message indicating the number of chunks to be considered when answering the user's query.
    LOGGER.info(f"The top {k} chunks are considered to answer the user's query.")
    
    # Create a VectorDBQA object using a vector store, a QA chain, and a number of chunks to consider.
    qa = VectorDBQA(vectorstore=docsearch, combine_documents_chain=doc_chain, k=k)
    
    # Call the VectorDBQA object to generate an answer to the prompt.
    result = qa({"query": prompt})
    answer = result["result"]
    
    # Log a message indicating the answer that was generated
    LOGGER.info(f"The returned answer is: {answer}")
    
    # Log a message indicating that the function has finished and return the answer.
    LOGGER.info(f"Answering module over.")
    return answer

In [18]:
answer('幫我條列出醫護排班的注意要點')

2023-12-01 12:25:52,046 - INFO - Start answering based on prompt: 幫我條列出醫護排班的注意要點.
2023-12-01 12:25:52,142 - INFO - The top 4 chunks are considered to answer the user's query.
2023-12-01 12:25:52,483 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2023-12-01 12:25:59,239 - INFO - HTTP Request: POST https://api.openai.com/v1/completions "HTTP/1.1 200 OK"
2023-12-01 12:25:59,240 - INFO - The returned answer is: 
醫護排班的注意要點：

1. 確保排班時間表符合法規要求，並確保每個醫護人員都有足夠的休息時間。

2. 確保每個醫護人員都有足夠的時間來完成任務，並確保每個醫護人員都有足夠的時間來完成任務。

3. 確保每個醫護人員都有足夠的時間來完成任務，並確保每個醫護人員都有足夠的時間來完成
2023-12-01 12:25:59,241 - INFO - Answering module over.


'\n醫護排班的注意要點：\n\n1. 確保排班時間表符合法規要求，並確保每個醫護人員都有足夠的休息時間。\n\n2. 確保每個醫護人員都有足夠的時間來完成任務，並確保每個醫護人員都有足夠的時間來完成任務。\n\n3. 確保每個醫護人員都有足夠的時間來完成任務，並確保每個醫護人員都有足夠的時間來完成'

In [20]:
answer('假設我五個班別都最少人值班，這樣我最少需要多少人同時值班')

2023-12-01 12:29:32,340 - INFO - Start answering based on prompt: 假設我五個班別都最少人值班，這樣我最少需要多少人同時值班.
2023-12-01 12:29:32,433 - INFO - The top 4 chunks are considered to answer the user's query.
2023-12-01 12:29:34,464 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2023-12-01 12:29:39,264 - INFO - HTTP Request: POST https://api.openai.com/v1/completions "HTTP/1.1 200 OK"
2023-12-01 12:29:39,270 - INFO - The returned answer is: 
根據您提供的排班規則，您最少需要10名員工同時值班，分別為：A班2名、B班2名、C班2名、D班2名和E班2名。如果您需要更多的員工值班，可以將每個班次的人數增加，以滿足您的需求。
2023-12-01 12:29:39,271 - INFO - Answering module over.


'\n根據您提供的排班規則，您最少需要10名員工同時值班，分別為：A班2名、B班2名、C班2名、D班2名和E班2名。如果您需要更多的員工值班，可以將每個班次的人數增加，以滿足您的需求。'

In [21]:
answer('有一個人很重要，我想要讓他連續工作16個小時，這樣我要如何安排')

2023-12-01 12:30:51,630 - INFO - Start answering based on prompt: 有一個人很重要，我想要讓他連續工作16個小時，這樣我要如何安排.
2023-12-01 12:30:51,722 - INFO - The top 4 chunks are considered to answer the user's query.
2023-12-01 12:30:52,101 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2023-12-01 12:30:57,429 - INFO - HTTP Request: POST https://api.openai.com/v1/completions "HTTP/1.1 200 OK"
2023-12-01 12:30:57,436 - INFO - The returned answer is: 
根據您的要求，我建議您可以將ABCDE五個班次安排為：A班：8小時，B班：4小時，C班：2小時，D班：2小時，E班：0小時。這樣，該員工就可以連續工作16小時，而不會有任何時間上的間斷。
2023-12-01 12:30:57,437 - INFO - Answering module over.


'\n根據您的要求，我建議您可以將ABCDE五個班次安排為：A班：8小時，B班：4小時，C班：2小時，D班：2小時，E班：0小時。這樣，該員工就可以連續工作16小時，而不會有任何時間上的間斷。'

In [22]:
answer('我可以把這個人放在同一個班別裡面連續工作16小時嗎')

2023-12-01 12:31:53,050 - INFO - Start answering based on prompt: 我可以把這個人放在同一個班別裡面連續工作16小時嗎.
2023-12-01 12:31:53,145 - INFO - The top 4 chunks are considered to answer the user's query.
2023-12-01 12:31:53,543 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2023-12-01 12:31:59,635 - INFO - HTTP Request: POST https://api.openai.com/v1/completions "HTTP/1.1 200 OK"
2023-12-01 12:31:59,640 - INFO - The returned answer is: 
不，根據當前的排班規則，不可以把這個人放在同一個班別裡面連續工作16小時。每個班次的時間限制為8小時，因此無法滿足您的需求。建議您可以考慮調整排班規則，例如增加班次或延長每個班次的時間限制，以滿足您的需求。
2023-12-01 12:31:59,642 - INFO - Answering module over.


'\n不，根據當前的排班規則，不可以把這個人放在同一個班別裡面連續工作16小時。每個班次的時間限制為8小時，因此無法滿足您的需求。建議您可以考慮調整排班規則，例如增加班次或延長每個班次的時間限制，以滿足您的需求。'