In [1]:

# imports
import os
import sys
import types
import json

# figure size/format
fig_width = 7
fig_height = 5
fig_format = 'retina'
fig_dpi = 96
interactivity = ''
is_shiny = False
is_dashboard = False
plotly_connected = True

# matplotlib defaults / format
try:
  import matplotlib.pyplot as plt
  plt.rcParams['figure.figsize'] = (fig_width, fig_height)
  plt.rcParams['figure.dpi'] = fig_dpi
  plt.rcParams['savefig.dpi'] = fig_dpi
  from IPython.display import set_matplotlib_formats
  set_matplotlib_formats(fig_format)
except Exception:
  pass

# plotly use connected mode
try:
  import plotly.io as pio
  if plotly_connected:
    pio.renderers.default = "notebook_connected"
  else:
    pio.renderers.default = "notebook"
  for template in pio.templates.keys():
    pio.templates[template].layout.margin = dict(t=30,r=0,b=0,l=0)
except Exception:
  pass

# disable itables paging for dashboards
if is_dashboard:
  try:
    from itables import options
    options.dom = 'fiBrtlp'
    options.maxBytes = 1024 * 1024
    options.language = dict(info = "Showing _TOTAL_ entries")
    options.classes = "display nowrap compact"
    options.paging = False
    options.searching = True
    options.ordering = True
    options.info = True
    options.lengthChange = False
    options.autoWidth = False
    options.responsive = True
    options.keys = True
    options.buttons = []
  except Exception:
    pass
  
  try:
    import altair as alt
    # By default, dashboards will have container sized
    # vega visualizations which allows them to flow reasonably
    theme_sentinel = '_quarto-dashboard-internal'
    def make_theme(name):
        nonTheme = alt.themes._plugins[name]    
        def patch_theme(*args, **kwargs):
            existingTheme = nonTheme()
            if 'height' not in existingTheme:
              existingTheme['height'] = 'container'
            if 'width' not in existingTheme:
              existingTheme['width'] = 'container'

            if 'config' not in existingTheme:
              existingTheme['config'] = dict()
            
            # Configure the default font sizes
            title_font_size = 15
            header_font_size = 13
            axis_font_size = 12
            legend_font_size = 12
            mark_font_size = 12
            tooltip = False

            config = existingTheme['config']

            # The Axis
            if 'axis' not in config:
              config['axis'] = dict()
            axis = config['axis']
            if 'labelFontSize' not in axis:
              axis['labelFontSize'] = axis_font_size
            if 'titleFontSize' not in axis:
              axis['titleFontSize'] = axis_font_size  

            # The legend
            if 'legend' not in config:
              config['legend'] = dict()
            legend = config['legend']
            if 'labelFontSize' not in legend:
              legend['labelFontSize'] = legend_font_size
            if 'titleFontSize' not in legend:
              legend['titleFontSize'] = legend_font_size  

            # The header
            if 'header' not in config:
              config['header'] = dict()
            header = config['header']
            if 'labelFontSize' not in header:
              header['labelFontSize'] = header_font_size
            if 'titleFontSize' not in header:
              header['titleFontSize'] = header_font_size    

            # Title
            if 'title' not in config:
              config['title'] = dict()
            title = config['title']
            if 'fontSize' not in title:
              title['fontSize'] = title_font_size

            # Marks
            if 'mark' not in config:
              config['mark'] = dict()
            mark = config['mark']
            if 'fontSize' not in mark:
              mark['fontSize'] = mark_font_size

            # Mark tooltips
            if tooltip and 'tooltip' not in mark:
              mark['tooltip'] = dict(content="encoding")

            return existingTheme
            
        return patch_theme

    # We can only do this once per session
    if theme_sentinel not in alt.themes.names():
      for name in alt.themes.names():
        alt.themes.register(name, make_theme(name))
      
      # register a sentinel theme so we only do this once
      alt.themes.register(theme_sentinel, make_theme('default'))
      alt.themes.enable('default')

  except Exception:
    pass

# enable pandas latex repr when targeting pdfs
try:
  import pandas as pd
  if fig_format == 'pdf':
    pd.set_option('display.latex.repr', True)
except Exception:
  pass

# interactivity
if interactivity:
  from IPython.core.interactiveshell import InteractiveShell
  InteractiveShell.ast_node_interactivity = interactivity

# NOTE: the kernel_deps code is repeated in the cleanup.py file
# (we can't easily share this code b/c of the way it is run).
# If you edit this code also edit the same code in cleanup.py!

# output kernel dependencies
kernel_deps = dict()
for module in list(sys.modules.values()):
  # Some modules play games with sys.modules (e.g. email/__init__.py
  # in the standard library), and occasionally this can cause strange
  # failures in getattr.  Just ignore anything that's not an ordinary
  # module.
  if not isinstance(module, types.ModuleType):
    continue
  path = getattr(module, "__file__", None)
  if not path:
    continue
  if path.endswith(".pyc") or path.endswith(".pyo"):
    path = path[:-1]
  if not os.path.exists(path):
    continue
  kernel_deps[path] = os.stat(path).st_mtime
print(json.dumps(kernel_deps))

# set run_path if requested
if r'C:\Users\kmkim\Desktop\projects\blog\docs\blog\posts\Agent\13-Cloud-RAG':
  os.chdir(r'C:\Users\kmkim\Desktop\projects\blog\docs\blog\posts\Agent\13-Cloud-RAG')

# reset state
%reset

# shiny
# Checking for shiny by using False directly because we're after the %reset. We don't want
# to set a variable that stays in global scope.
if False:
  try:
    import htmltools as _htmltools
    import ast as _ast

    _htmltools.html_dependency_render_mode = "json"

    # This decorator will be added to all function definitions
    def _display_if_has_repr_html(x):
      try:
        # IPython 7.14 preferred import
        from IPython.display import display, HTML
      except:
        from IPython.core.display import display, HTML

      if hasattr(x, '_repr_html_'):
        display(HTML(x._repr_html_()))
      return x

    # ideally we would undo the call to ast_transformers.append
    # at the end of this block whenver an error occurs, we do 
    # this for now as it will only be a problem if the user 
    # switches from shiny to not-shiny mode (and even then likely
    # won't matter)
    import builtins
    builtins._display_if_has_repr_html = _display_if_has_repr_html

    class _FunctionDefReprHtml(_ast.NodeTransformer):
      def visit_FunctionDef(self, node):
        node.decorator_list.insert(
          0,
          _ast.Name(id="_display_if_has_repr_html", ctx=_ast.Load())
        )
        return node

      def visit_AsyncFunctionDef(self, node):
        node.decorator_list.insert(
          0,
          _ast.Name(id="_display_if_has_repr_html", ctx=_ast.Load())
        )
        return node

    ip = get_ipython()
    ip.ast_transformers.append(_FunctionDefReprHtml())

  except:
    pass

def ojs_define(**kwargs):
  import json
  try:
    # IPython 7.14 preferred import
    from IPython.display import display, HTML
  except:
    from IPython.core.display import display, HTML

  # do some minor magic for convenience when handling pandas
  # dataframes
  def convert(v):
    try:
      import pandas as pd
    except ModuleNotFoundError: # don't do the magic when pandas is not available
      return v
    if type(v) == pd.Series:
      v = pd.DataFrame(v)
    if type(v) == pd.DataFrame:
      j = json.loads(v.T.to_json(orient='split'))
      return dict((k,v) for (k,v) in zip(j["index"], j["data"]))
    else:
      return v

  v = dict(contents=list(dict(name=key, value=convert(value)) for (key, value) in kwargs.items()))
  display(HTML('<script type="ojs-define">' + json.dumps(v) + '</script>'), metadata=dict(ojs_define = True))
globals()["ojs_define"] = ojs_define


In [2]:
from langchain_openai import AzureOpenAIEmbeddings, AzureChatOpenAI
from langchain_community.vectorstores.azuresearch import AzureSearch
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from dotenv import load_dotenv
import os

load_dotenv()

# Embeddings
embeddings = AzureOpenAIEmbeddings(
    azure_deployment="text-embedding-3-small",
    openai_api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY")
)

# Vector Store
vector_store = AzureSearch(
    azure_search_endpoint=os.getenv("AZURE_SEARCH_ENDPOINT"),
    azure_search_key=os.getenv("AZURE_SEARCH_API_KEY"),
    index_name=os.getenv("AZURE_SEARCH_INDEX_NAME"),
    embedding_function=embeddings.embed_query
)

# Retriever
retriever = vector_store.as_retriever(search_kwargs={"k": 3})

# LLM
llm = AzureChatOpenAI(
    azure_deployment=os.getenv("AZURE_OPENAI_DEPLOYMENT"),
    openai_api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    temperature=0
)

# Prompt
prompt = ChatPromptTemplate.from_template(
    """다음 컨텍스트를 참고하여 질문에 답변하세요.

컨텍스트:
{context}

질문: {question}

답변:"""
)

# Chain
def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# 실행
answer = rag_chain.invoke("Azure AI Search란 무엇인가?")
print(answer)

In [3]:
from typing import TypedDict, List
from langchain_core.documents import Document

class RAGState(TypedDict):
    """RAG 상태"""
    question: str  # 사용자 질문
    context: List[Document]  # 검색된 문서
    answer: str  # 생성된 답변
    retrieval_success: bool  # 검색 성공 여부

In [4]:
def retrieve(state: RAGState) -> RAGState:
    """문서 검색 노드"""
    question = state["question"]
    
    # 검색 실행
    docs = retriever.invoke(question)
    
    # 검색 성공 여부 확인
    success = len(docs) > 0
    
    return {
        **state,
        "context": docs,
        "retrieval_success": success
    }

def generate(state: RAGState) -> RAGState:
    """답변 생성 노드"""
    question = state["question"]
    context = state["context"]
    
    # 컨텍스트 포맷팅
    context_text = "\n\n".join([doc.page_content for doc in context])
    
    # 프롬프트 생성
    messages = prompt.invoke({
        "context": context_text,
        "question": question
    })
    
    # 답변 생성
    response = llm.invoke(messages)
    
    return {
        **state,
        "answer": response.content
    }

def no_context_fallback(state: RAGState) -> RAGState:
    """컨텍스트 없을 때 기본 답변"""
    return {
        **state,
        "answer": "관련 정보를 찾을 수 없습니다. 질문을 다시 입력해주세요."
    }

In [5]:
from langgraph.graph import StateGraph, END

# 그래프 생성
workflow = StateGraph(RAGState)

# 노드 추가
workflow.add_node("retrieve", retrieve)
workflow.add_node("generate", generate)
workflow.add_node("fallback", no_context_fallback)

# 시작점 설정
workflow.set_entry_point("retrieve")

# 조건부 엣지 (검색 성공 여부)
def should_generate(state: RAGState) -> str:
    """검색 성공 시 generate, 실패 시 fallback"""
    if state["retrieval_success"]:
        return "generate"
    else:
        return "fallback"

workflow.add_conditional_edges(
    "retrieve",
    should_generate,
    {
        "generate": "generate",
        "fallback": "fallback"
    }
)

# 종료 엣지
workflow.add_edge("generate", END)
workflow.add_edge("fallback", END)

# 컴파일
app = workflow.compile()

In [6]:
# 그래프 실행
result = app.invoke({
    "question": "Azure AI Search란 무엇인가?"
})

print(f"질문: {result['question']}")
print(f"검색 성공: {result['retrieval_success']}")
print(f"답변: {result['answer']}")

In [7]:
from langchain_core.messages import BaseMessage

class ConversationState(TypedDict):
    """대화 상태"""
    question: str
    chat_history: List[BaseMessage]  # 대화 히스토리
    context: List[Document]
    answer: str
    retrieval_success: bool

In [8]:
from langchain_core.messages import HumanMessage, AIMessage

def retrieve_with_history(state: ConversationState) -> ConversationState:
    """대화 히스토리를 고려한 검색"""
    question = state["question"]
    chat_history = state.get("chat_history", [])
    
    # 히스토리를 컨텍스트로 변환
    history_text = "\n".join([
        f"{'User' if isinstance(msg, HumanMessage) else 'AI'}: {msg.content}"
        for msg in chat_history[-3:]  # 최근 3개만
    ])
    
    # 질문 재구성 (히스토리 포함)
    if history_text:
        enhanced_question = f"대화 히스토리:\n{history_text}\n\n현재 질문: {question}"
    else:
        enhanced_question = question
    
    # 검색
    docs = retriever.invoke(enhanced_question)
    
    return {
        **state,
        "context": docs,
        "retrieval_success": len(docs) > 0
    }

def generate_with_history(state: ConversationState) -> ConversationState:
    """히스토리를 고려한 답변 생성"""
    question = state["question"]
    context = state["context"]
    chat_history = state.get("chat_history", [])
    
    # 컨텍스트 포맷팅
    context_text = "\n\n".join([doc.page_content for doc in context])
    
    # 히스토리 포함 프롬프트
    history_prompt = ChatPromptTemplate.from_template(
        """이전 대화:
{history}

컨텍스트:
{context}

질문: {question}

답변:"""
    )
    
    history_text = "\n".join([
        f"User: {msg.content}" if isinstance(msg, HumanMessage) else f"AI: {msg.content}"
        for msg in chat_history[-3:]
    ])
    
    messages = history_prompt.invoke({
        "history": history_text or "없음",
        "context": context_text,
        "question": question
    })
    
    response = llm.invoke(messages)
    
    # 히스토리 업데이트
    updated_history = chat_history + [
        HumanMessage(content=question),
        AIMessage(content=response.content)
    ]
    
    return {
        **state,
        "answer": response.content,
        "chat_history": updated_history
    }

In [9]:
# 대화형 워크플로우
conversation_workflow = StateGraph(ConversationState)

conversation_workflow.add_node("retrieve", retrieve_with_history)
conversation_workflow.add_node("generate", generate_with_history)
conversation_workflow.add_node("fallback", no_context_fallback)

conversation_workflow.set_entry_point("retrieve")

conversation_workflow.add_conditional_edges(
    "retrieve",
    should_generate,
    {
        "generate": "generate",
        "fallback": "fallback"
    }
)

conversation_workflow.add_edge("generate", END)
conversation_workflow.add_edge("fallback", END)

conversation_app = conversation_workflow.compile()

# 멀티턴 대화 실행
state = {
    "question": "Azure AI Search란 무엇인가?",
    "chat_history": []
}

result1 = conversation_app.invoke(state)
print(f"답변 1: {result1['answer']}\n")

# 후속 질문
state2 = {
    "question": "그것의 주요 기능은?",
    "chat_history": result1["chat_history"]
}

result2 = conversation_app.invoke(state2)
print(f"답변 2: {result2['answer']}")

In [10]:
class RetryState(TypedDict):
    """재시도 가능한 RAG 상태"""
    question: str
    context: List[Document]
    answer: str
    retrieval_success: bool
    retry_count: int  # 재시도 횟수
    max_retries: int  # 최대 재시도

In [11]:
def retrieve_with_retry(state: RetryState) -> RetryState:
    """재시도 가능한 검색"""
    question = state["question"]
    retry_count = state.get("retry_count", 0)
    
    # 재시도 시 쿼리 확장
    if retry_count > 0:
        expanded_question = f"{question} (관련 정보, 설명, 개요 포함)"
    else:
        expanded_question = question
    
    docs = retriever.invoke(expanded_question)
    
    return {
        **state,
        "context": docs,
        "retrieval_success": len(docs) > 0,
        "retry_count": retry_count + 1
    }

def should_retry(state: RetryState) -> str:
    """재시도 여부 결정"""
    if state["retrieval_success"]:
        return "generate"
    elif state["retry_count"] < state.get("max_retries", 2):
        return "retry"
    else:
        return "fallback"

In [12]:
retry_workflow = StateGraph(RetryState)

retry_workflow.add_node("retrieve", retrieve_with_retry)
retry_workflow.add_node("generate", generate)
retry_workflow.add_node("fallback", no_context_fallback)

retry_workflow.set_entry_point("retrieve")

retry_workflow.add_conditional_edges(
    "retrieve",
    should_retry,
    {
        "generate": "generate",
        "retry": "retrieve",  # 루프백
        "fallback": "fallback"
    }
)

retry_workflow.add_edge("generate", END)
retry_workflow.add_edge("fallback", END)

retry_app = retry_workflow.compile()

# 실행
result = retry_app.invoke({
    "question": "희귀한 주제 검색",
    "max_retries": 2
})

print(f"재시도 횟수: {result['retry_count']}")
print(f"답변: {result['answer']}")

In [13]:
class QualityState(TypedDict):
    """품질 검증 상태"""
    question: str
    context: List[Document]
    answer: str
    retrieval_success: bool
    answer_quality: str  # "good", "bad", "unknown"

In [14]:
def evaluate_answer(state: QualityState) -> QualityState:
    """답변 품질 평가"""
    answer = state["answer"]
    question = state["question"]
    
    # 평가 프롬프트
    eval_prompt = ChatPromptTemplate.from_template(
        """다음 질문과 답변의 품질을 평가하세요.

질문: {question}
답변: {answer}

답변이 질문에 적절히 대답하고 있나요?
- "good": 적절한 답변
- "bad": 부적절하거나 관련 없는 답변
- "unknown": 판단 불가

평가 결과 (good/bad/unknown만 출력):"""
    )
    
    messages = eval_prompt.invoke({
        "question": question,
        "answer": answer
    })
    
    evaluation = llm.invoke(messages).content.strip().lower()
    
    return {
        **state,
        "answer_quality": evaluation
    }

def regenerate_answer(state: QualityState) -> QualityState:
    """답변 재생성"""
    question = state["question"]
    context = state["context"]
    
    # 더 상세한 프롬프트
    detailed_prompt = ChatPromptTemplate.from_template(
        """다음 컨텍스트를 참고하여 질문에 상세히 답변하세요.
반드시 컨텍스트의 정보를 활용하고, 구체적으로 설명하세요.

컨텍스트:
{context}

질문: {question}

상세 답변:"""
    )
    
    context_text = "\n\n".join([doc.page_content for doc in context])
    messages = detailed_prompt.invoke({
        "context": context_text,
        "question": question
    })
    
    response = llm.invoke(messages)
    
    return {
        **state,
        "answer": response.content
    }

def route_by_quality(state: QualityState) -> str:
    """품질에 따라 라우팅"""
    quality = state["answer_quality"]
    
    if quality == "good":
        return "end"
    elif quality == "bad":
        return "regenerate"
    else:
        return "end"

In [15]:
quality_workflow = StateGraph(QualityState)

quality_workflow.add_node("retrieve", retrieve)
quality_workflow.add_node("generate", generate)
quality_workflow.add_node("evaluate", evaluate_answer)
quality_workflow.add_node("regenerate", regenerate_answer)
quality_workflow.add_node("fallback", no_context_fallback)

quality_workflow.set_entry_point("retrieve")

quality_workflow.add_conditional_edges(
    "retrieve",
    should_generate,
    {
        "generate": "generate",
        "fallback": "fallback"
    }
)

quality_workflow.add_edge("generate", "evaluate")

quality_workflow.add_conditional_edges(
    "evaluate",
    route_by_quality,
    {
        "end": END,
        "regenerate": "regenerate"
    }
)

quality_workflow.add_edge("regenerate", END)
quality_workflow.add_edge("fallback", END)

quality_app = quality_workflow.compile()

# 실행
result = quality_app.invoke({
    "question": "Azure AI Search의 장점은?"
})

print(f"답변 품질: {result['answer_quality']}")
print(f"최종 답변: {result['answer']}")

In [16]:
from IPython.display import Image, display

# 그래프 시각화
try:
    display(Image(app.get_graph().draw_mermaid_png()))
except Exception:
    print(app.get_graph().draw_ascii())

In [17]:
# 스트리밍으로 중간 단계 확인
for event in app.stream({
    "question": "Azure AI Search란?"
}):
    print(f"이벤트: {event}")
    print("---")