In [None]:
%pip install -q -U langchain==0.0.325
%pip install -q -U python-dotenv

In [None]:
import dotenv

dotenv.load_dotenv(override=True)

## Loader

In [None]:
%pip install -q -U wikipedia

In [None]:
from langchain.document_loaders import WikipediaLoader

loader = WikipediaLoader(
  query="進撃の巨人", 
  load_max_docs=2,
  lang="ja"
  )

documents = loader.load()


In [None]:
documents[0].page_content[:400]  # a content of the Document

## Vector stores

In [None]:
%pip install -q -U qdrant-client


In [None]:
from langchain.vectorstores.qdrant import Qdrant
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import BedrockEmbeddings

text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

embeddings = BedrockEmbeddings()

db = Qdrant.from_documents(documents=docs, embedding=embeddings, path="./shingeki_qdrant", collection_name="shingeki")


In [None]:
db.client.close()

### diskから取得

In [None]:
from langchain.vectorstores import Qdrant
from langchain.embeddings import BedrockEmbeddings
from qdrant_client import QdrantClient

embeddings = BedrockEmbeddings()

db = Qdrant(client=QdrantClient(path="./shingeki_qdrant"), embeddings=embeddings, collection_name="shingeki")


In [None]:
query = "エレンの年齢は？"
docs = db.similarity_search(query)
print(docs[0].page_content)


## RetrievalQA

In [None]:
from langchain.chains import RetrievalQA
from langchain.llms.bedrock import Bedrock

llm = Bedrock(model_id='anthropic.claude-instant-v1', model_kwargs={'max_tokens_to_sample': 20000})

qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=db.as_retriever()
)


In [None]:
qa.combine_documents_chain.llm_chain.prompt

In [None]:
qa.invoke(input='エレンが育った町の名前は？')

## ToolとしてWikipediaを使う

In [None]:
from langchain.tools import WikipediaQueryRun
from langchain.utilities.wikipedia import WikipediaAPIWrapper

wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(lang="ja"))


In [None]:
wikipedia.run("進撃の巨人に関する質問です。三種類の壁の名前は？")


## ConversationalRetrievalChain

In [None]:
from langchain.chains import RetrievalQA
from langchain.chains import ConversationalRetrievalChain
from langchain.chains import LLMChain
from langchain.llms.bedrock import Bedrock
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory


# llm = Bedrock(model_id='anthropic.claude-v2', model_kwargs={'max_tokens_to_sample': 20000})
llm = Bedrock(model_id='anthropic.claude-instant-v1', model_kwargs={'max_tokens_to_sample': 20000})

qa = ConversationalRetrievalChain.from_llm(
  llm=llm,
  chain_type='stuff',
  retriever=db.as_retriever(),
  verbose=True
)

qa.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

In [None]:
qa.memory

In [None]:
result = qa.invoke('エレンが育った町の名前は？')
result['answer']


In [None]:
result = qa.invoke('エレンの幼馴染の名前は？')
result['answer']