### 문제 4-1 : OpenAI에서 Ollama Qwen3로 RAG 시스템 변경하기


In [6]:
'''
기존 코드
'''

from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv
import os


print(" === base 작업 === ")
load_dotenv()

# 문서 로드
loader = PyPDFLoader("./data/콘텐츠분쟁해결_사례.pdf")

# 문자열 분할 
pythontext_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1500,       
    chunk_overlap=300,      
    separators=[
        "\n【사건개요】",   
        "\n【쟁점사항】",   
        "\n【처리경위】",  
        "\n【처리결과】",    
        "\n■", "\n\n", "\n", ".", " ", ""
    ]
)
chunks = pythontext_splitter.split_documents(loader.load())

# 임베드 ai 설정
pythonembeddings = OpenAIEmbeddings(
    model="text-embedding-3-large", 
    dimensions=1536                  
)

# 벡터 설정
vectorstore = FAISS.from_documents(chunks, pythonembeddings)
python_retriever = vectorstore.as_retriever(
    search_type="similarity",        
    search_kwargs={"k": 5}         
)

# ai 설정
pythonllm = ChatOpenAI(
    model="gpt-4o",                
    temperature=0.2,               
    max_tokens=2000                 
)
print(" === base 작업 완료 === ")


 === base 작업 === 
 === base 작업 완료 === 


In [11]:
''' 변경된 부분'''
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.chat_models import ChatOllama
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

model = "qwen3:8b"
url = "http://localhost:11434"
embed_model= "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"

# 임베딩 설정
pythonembeddings = HuggingFaceEmbeddings(
    model_name = embed_model,
    model_kwargs={'device': 'cpu'},
    encode_kwargs={'normalize_embeddings': True}
)

# llm 설정
ollama_llm = ChatOllama(
    model=model,
    temperature=0.7,
    num_predict=1000
)

# vector 설정
vectorstore = FAISS.from_documents(chunks, pythonembeddings)
python_retriever = vectorstore.as_retriever(
    search_type="similarity",        
    search_kwargs={"k": 5}         
)

# Template 설정
bluej_prompt_template = """당신은 BlueJ 프로그래밍 환경 전문가입니다.
아래 문서 내용을 바탕으로 정확하고 친절한 답변을 제공해주세요.

문서 내용:
{context}

질문: {question}

답변 규칙:
1. 문서 내용만을 근거로 답변하세요
2. 단계별로 설명하세요  
3. 구체적인 메뉴명, 버튼명을 포함하세요
4. 문서에 없는 정보는 "문서에서 찾을 수 없습니다"라고 하세요

답변:"""

bluej_prompt = PromptTemplate(
    template=bluej_prompt_template,
    input_variables=["context", "question"]
)


# 체인 설정
qa_chain = RetrievalQA.from_chain_type(
    llm=ollama_llm,
    chain_type="stuff",
    retriever=python_retriever,
    chain_type_kwargs={"prompt": bluej_prompt},
    return_source_documents=True
)
test_questions = [
    "BlueJ에서 객체를 생성하는 방법은 무엇인가요?",
    "컴파일 오류가 발생했을 때 어떻게 확인할 수 있나요?"
]

for question in test_questions:
    print(f"\n\n질문  : {question}")
    response = qa_chain.invoke({"query":question})
    print(response["result"])




질문  : BlueJ에서 객체를 생성하는 방법은 무엇인가요?
<think>
Okay, the user is asking about how to create an object in BlueJ. Let me check the provided documents first. The documents are about content dispute resolution cases, specifically in the context of games and other web-related issues. There's a case study from the Korea Consumer Agency about an online game where a user's account was hacked, and another case about account suspension due to item trading. 

Wait, the user's question is about BlueJ programming environment. The documents provided don't mention anything related to BlueJ, object creation, or programming concepts. All the content is about legal cases and dispute resolutions in online games. 

Since the documents don't have information about BlueJ or object-oriented programming, I need to inform the user that the answer isn't available in the provided texts. The answer rules state that if the information isn't in the documents, I should say "문서에서 찾을 수 없습니다". 

So, the correct response h