In [1]:
!pip install langchain openai langchain-community faiss-cpu -q

In [2]:
import getpass
import os

os.environ["OPENAI_API_KEY"] = getpass.getpass()

 ········


### 1. Load and Split Document

In [4]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter 

loader = PyPDFLoader(
    "nihonkokukenpou.pdf",
)
text_splitter = RecursiveCharacterTextSplitter(
    separators=["\n\n", "\n", "。", "!", "?", "、", " ", ""],
    chunk_size=100,
    chunk_overlap=0
)
docs = loader.load_and_split(text_splitter)


### 2. Embed Document

In [5]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

### 3. Vectorstore

In [6]:
from langchain_community.vectorstores import FAISS

vector_store = FAISS.from_documents(docs, embeddings)

### 4. Retrieve

In [7]:
retriever = vector_store.as_retriever()

### 5. Question and Answer

In [35]:
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.messages import SystemMessage, HumanMessage
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.output_parsers import StrOutputParser



llm = ChatOpenAI(temperature=0, model="gpt-4o-mini")

prompt = ChatPromptTemplate.from_template("""Answer the following question in the {language} language based only on the provided context:
<context>
{context}
</context>
Question: {input}""")

combine_docs_chain = create_stuff_documents_chain(
    llm=llm,
    prompt=prompt,
  #  output_parser=StrOutputParser()
)
rag_chain = create_retrieval_chain(retriever, combine_docs_chain)

In [36]:
response = rag_chain.invoke({"language":"japanese", "input":"Can Japan possess Nuclear weapons"})

print(response["answer"])

日本は核兵器を保有することはできません。憲法第九条に基づき、戦争と武力の行使を放棄しており、戦力を保持しないことが定められています。


In [None]:
response = rag_chain.invoke({"language":"japanese", "input":""})

print(response["answer"])