In [4]:
from langchain_community.llms import Ollama
from langchain import PromptTemplate
import wikipedia
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
from langchain.embeddings import SentenceTransformerEmbeddings

In [None]:
def extract_keyword(model, user_prompt):
    """Extract keyword from user prompt using LLM model"""

    keyword_extract_system_prompt = """
Think and write your step-by-step reasoning before responding.
Please write only the fully spelled-out form of the acronym in English that corresponds to the following user's question, without abbreviations or additional text.
If you don't know how to respond, just say false.
"""
    
    template = """
<|begin_of_text|>
<|start_header_id|>system<|end_header_id|>
{system_prompt}
<|eot_id|>
<|start_header_id|>user<|end_header_id|>
{user_prompt}
<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
"""
    
    prompt = PromptTemplate(input_variables=['system_prompt', 'user_prompt'], template=template)
    keyword = model(prompt.format(system_prompt=keyword_extract_system_prompt, user_prompt=user_prompt)).strip()
    
    return keyword


def get_wikipedia_content(keyword):
    """Fetch content from Wikipedia based on the keyword"""
    try:
        search_results = wikipedia.search(keyword)
        if not search_results:
            return None
        page_content = wikipedia.page(search_results[0]).content
        return page_content
    except Exception as e:
        print(f"Error fetching Wikipedia content: {e}")
        return None

In [6]:
def generate_response(model, user_prompt, content=None):
    """Generate response using GPT model with optional document content"""

    system_prompt = """
Please write all conversations in Korean(한국어).
Think and write your step-by-step reasoning before responding.
Write the article title using ## in Markdown syntax.
"""
    
    template = """
<|begin_of_text|>
<|start_header_id|>system<|end_header_id|>
{system_prompt}
<|eot_id|>
<|start_header_id|>user<|end_header_id|>
{user_prompt}
<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
"""
    
    prompt = PromptTemplate(input_variables=['system_prompt', 'user_prompt'], template=template)
    if content:
        # Split and embed content if provided
        doc = Document(page_content=content)
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
        all_splits = text_splitter.split_documents([doc])
        embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
        vectorstore = Chroma.from_documents(documents=all_splits, embedding=embeddings)
        
        # Use RAG for response generation
        qachain = RetrievalQA.from_chain_type(model, retriever=vectorstore.as_retriever())
        response = qachain(prompt.format(system_prompt=system_prompt, user_prompt=user_prompt))
        return response['result']
    else:
        # Generate response without additional document
        response = model(prompt.format(system_prompt=system_prompt, user_prompt=user_prompt)).strip()
        return response

In [7]:
def main():
    model = Ollama(model='gemma2:2b', stop=["<|eot_id|>"])
    
    user_input = input("질문을 입력하세요: ")
    keyword = extract_keyword(model, user_input)

    if keyword == "false":
        print("키워드를 찾을 수 없습니다. 검색 없이 응답을 생성합니다.")
        response = generate_response(model, user_input)
    else:
        content = get_wikipedia_content(keyword)
        if content:
            print(f"{keyword}에 대한 Wikipedia 문서를 찾았습니다.")
            response = generate_response(model, user_input, content=content)
        else:
            print("문서를 찾을 수 없습니다. 검색 없이 응답을 생성합니다.")
            response = generate_response(model, user_input)
    
    print("응답:", response)


if __name__ == "__main__":
    main()



  lis = BeautifulSoup(html).find_all('li')


Error fetching Wikipedia content: "Python" may refer to: 
Pythonidae
Python (genus)
Python (mythology)
Python (programming language)
CMU Common Lisp
PERQ 3
Python of Aenus
Python (painter)
Python of Byzantium
Python of Catana
Python Anghelo
Python (Efteling)
Python (Busch Gardens Tampa Bay)
Python (Coney Island, Cincinnati, Ohio)
Python (automobile maker)
Python (Ford prototype)
Python (missile)
Python (nuclear primary)
Colt Python
Python (codename)
Python (film)
Monty Python
Python (Monty) Pictures
Timon of Phlius
Pithon
Pyton
문서를 찾을 수 없습니다. 검색 없이 응답을 생성합니다.
응답: ## Python 설명

Python은 **versatile**하고 **easy to learn**의 프로그래밍 언어입니다. 🐍  대규모 데이터셋을 분석하기, 웹 애플리케이션 개발하기, 머신러닝 알고리즘 구현 등 다양한 분야에서 사용됩니다. 특히 Python이 **반복적인 작업**이나 **데이터 처리**에 효과적이기 때문에 많은 프로그래밍자가 선택합니다. 

### Python의 장점

* **편리한 설치 및 활용:** Python은 간단하게 다운로드하고 설치하여 사용할 수 있습니다.  
* **활발한 커뮤니티 & 풍부한 도구:** Python은 대규모 커뮤니티와 다양한 라이브러리를 지원하는 강력한 지원망을 제공합니다. 

**Python의 활용 예시:**

* **데이터 분석:** 데이터 시각화, 통계 및 머신러닝 알고리즘을 사용하여 데이터를 분석하고 이해할