In [21]:
from langchain_community.llms import Ollama
from langchain import PromptTemplate

model = Ollama(model='gemma2:2b', stop=["<|eot_id|>"]) # Added the stop token

In [22]:
text = input()
user_prompt = f"{text}"

system_prompt = """Think and write your step-by-step reasoning before responding. 
Please write one word in English that corresponds to the following user's question, fully spelled out and without abbreviations. 
For example, respond to 'RAG' with 'Retrieval-Augmented Generation' and to 'CNN' with 'Convolutional Neural Network'.
"""

In [23]:
template = """
    <|begin_of_text|>
    <|start_header_id|>system<|end_header_id|>
    {system_prompt}
    <|eot_id|>
    <|start_header_id|>user<|end_header_id|>
    {user_prompt}
    <|eot_id|>
    <|start_header_id|>assistant<|end_header_id|>
"""

prompt = PromptTemplate(
    input_variables=['system_prompt', 'user_prompt'],
    template=template
)

In [24]:
key_word = model(prompt.format(system_prompt=system_prompt, user_prompt=user_prompt))

In [25]:
key_word.strip()

'Donald'

In [26]:
f"https://en.wikipedia.org/wiki/{key_word.strip()}"

'https://en.wikipedia.org/wiki/Donald'

In [27]:
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

loader = WebBaseLoader(f"https://en.wikipedia.org/wiki/{key_word.strip()}")
data = loader.load()

text_splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0) 
all_splits = text_splitter.split_documents(data)

In [28]:
from langchain.vectorstores import Chroma
from langchain.embeddings import SentenceTransformerEmbeddings

embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = Chroma.from_documents(documents=all_splits, embedding=embeddings)
# docs = vectorstore.similarity_search("")

In [29]:
user_prompt = f"{user_prompt} on detail."

system_prompt = """
Please write all conversations in Korean(한국어).
Think and write your step-by-step reasoning before responding.
Write the article title using # in Markdown syntax.
The character limit is 200 characters.
"""

In [30]:
from langchain.chains import RetrievalQA

qachain = RetrievalQA.from_chain_type(model, retriever=vectorstore.as_retriever())

response = qachain(prompt.format(system_prompt=system_prompt, user_prompt=user_prompt))

In [31]:
response

{'query': '\n    <|begin_of_text|>\n    <|start_header_id|>system<|end_header_id|>\n    \nPlease write all conversations in Korean(한국어).\nThink and write your step-by-step reasoning before responding.\nWrite the article title using # in Markdown syntax.\nThe character limit is 200 characters.\n\n    <|eot_id|>\n    <|start_header_id|>user<|end_header_id|>\n    트럼프에 대해서 설명해줘 by korean.\n    <|eot_id|>\n    <|start_header_id|>assistant<|end_header_id|>\n',
 'result': '## #Donald  (Don) \n\nLet\'s break down why "Donald" is so prominent, but we need to start with context!\n\n\n* **What\'s the question?** We don\'t have a specific question about Donald. So let me offer some points to consider in answering your questions.\n   \n    * Do you want to know about how often this name appears? \n    * Are you curious about why it is used so commonly, or what its cultural significance is?\n\nPlease provide more information on the question you\'d like to answer! 😄  \n\n\n\n'}

In [32]:
print(response['result'])

## #Donald  (Don) 

Let's break down why "Donald" is so prominent, but we need to start with context!


* **What's the question?** We don't have a specific question about Donald. So let me offer some points to consider in answering your questions.
   
    * Do you want to know about how often this name appears? 
    * Are you curious about why it is used so commonly, or what its cultural significance is?

Please provide more information on the question you'd like to answer! 😄  




