In [12]:
import os
import dotenv # pip install python-dotenv
from dotenv import load_dotenv, find_dotenv

# .env 파일에서 환경 변수 불러오기
load_dotenv(r'C:\Users\user\Desktop\LangChain\Daily\all.env') 

# 환경 변수에 API 키 설정
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')

# Langsmith Tracing 설정
os.environ['LANGCHAIN_TRACING_V2'] = os.getenv('LANGCHAIN_TRACING_V2')
os.environ['LANGCHAIN_ENDPOINT'] = os.getenv('LANGCHAIN_ENDPOINT')
os.environ['LANGCHAIN_PROJECT'] = os.getenv('LANGCHAIN_PROJECT')

# Fire Crawl API 설정
os.environ['FIRE_API_KEY'] = os.getenv('FIRE_API_KEY')


# 1. Loading Documents

In [19]:
from typing import List
# import document 
from langchain_community.document_loaders import FireCrawlLoader # pip install langchain-community
from langchain_core.documents import Document
# from document import Document 

class DocumentLoader:
    def __init__(self, api_key: str):
        self.api_key = api_key

    def get_docs(self, url: str) -> List[Document]:
        """
        Retrieves documents from the specified URL using the FireCrawlLoader.

        Args:
            url (str): The URL to crawl for documents.

        Returns:
            List[Document]: A list of Document objects containing the retrieved content.
        """
        loader = FireCrawlLoader(
            api_key=self.api_key, url=url, mode="crawl"
        )

        raw_docs = loader.load()
        docs = [Document(page_content=doc.page_content, metadata=doc.metadata) for doc in raw_docs]

        return docs

# 2. Creating Vector Store and Retriever

In [20]:
from typing import List, Optional
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter

def create_vector_store(docs, store_path: Optional[str] = None) -> FAISS:
    """
    Creates a FAISS vector store from a list of documents.

    Args:
        docs (List[Document]): A list of Document objects containing the content to be stored.
        store_path (Optional[str]): The path to store the vector store locally. If None, the vector store will not be stored.

    Returns:
        FAISS: The FAISS vector store containing the documents.
    """
    # Creating text splitter
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
    )

    texts = text_splitter.split_documents(docs)

    # Embedding object
    embedding_model = OpenAIEmbeddings()

    # Create the FAISS vector store
    store = FAISS.from_documents(texts, embedding_model)

    # Save the vector store locally if a path is provided
    if store_path:
        store.save_local(store_path)

    return store


# create vector store
store = create_vector_store(saved_docs)

# creating retriever
retriever = store.as_retriever()

NameError: name 'saved_docs' is not defined

# 3. Creating a retrieval chain for response generation

In [48]:
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

def create_generate_chain(llm):
    """
    코드 관련 질문에 답변하기 위한 생성 체인을 만듭니다.

    Args:
        llm (LLM): 응답 생성에 사용할 언어 모델.

    Returns:
        컨텍스트와 질문을 입력받아 문자열 응답을 반환하는 호출 가능한 함수.
    """
    generate_template = """
    You are a helpful code assistant named Speckly. The user provides you with a code-related question whose content is represented by the following context parts (delimited by <context></context>).
    Use these to answer the question at the end.
    The files deal with Speckle Developer Documentation. You can assume that the user is either a civil engineer, architect, or a software developer.
    If you don't know the answer, just say that you don't know. Do NOT try to make up an answer.
    If the question is not related to the context, politely respond that you only answer questions related to the context.
    Provide as detailed an answer as possible and generate the code in Python (default) unless specifically mentioned by the user in the question.

    <context>
    {context}
    </context>

    <question>
    {input}
    </question>
    """

    generate_prompt = PromptTemplate(template=generate_template, input_variables=["context", "input"])

    # 생성 체인 만들기
    generate_chain = generate_prompt | llm | StrOutputParser()

    return generate_chain

# 생성 체인 만들기
generate_chain = create_generate_chain(llm)


# 4. Retrieval Grader

In [37]:
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
from langchain_openai import ChatOpenAI

class GraderUtils:
    def __init__(self, model):
        self.model = model

    def create_retrieval_grader(self):
        """
        Retrieves documents and evaluates their relevance to the user's question.
        """
        grade_prompt = PromptTemplate(
            template="""
            system
            You are a grader assessing relevance of a retrieved document to a user question. If the document contains keywords related to the user question, grade it as relevant. It does not need to be a stringent test. The goal is to filter out erroneous retrievals.
            Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.
            Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.
            
            user
            Here is the retrieved document: \n\n {document} \n\n
            Here is the user question: {input} \n
            
            assistant
            """,
            input_variables=["document", "input"],
        )

        retriever_grader = grade_prompt | self.model | JsonOutputParser()

        return retriever_grader

    def create_hallucination_grader(self):
        """
        Assesses whether an answer generated by the LLM is grounded in / supported by a set of facts.
        """
        hallucination_prompt = PromptTemplate(
            template="""system
            You are a grader assessing whether an answer is grounded in / supported by a set of facts. Give a binary score 'yes' or 'no' score to indicate whether the answer is grounded in / supported by a set of facts. Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.
            
            user
            Here are the facts:
            \n ------- \n
            {documents}
            \n ------- \n
            Here is the answer: {generation}
            
            assistant""",
            input_variables=["generation", "documents"],
        )

        hallucination_grader = hallucination_prompt | self.model | JsonOutputParser()

        return hallucination_grader

    def create_code_evaluator(self):
        """
        Evaluates whether the generated code is correct and relevant to the given question.
        """
        eval_template = PromptTemplate(
            template="""system
            You are a code evaluator assessing whether the generated code is correct and relevant to the given question.
            Provide a JSON response with the following keys:

            'score': A binary score 'yes' or 'no' indicating whether the code is correct and relevant.
            'feedback': A brief explanation of your evaluation, including any issues or improvements needed.

            user
            Here is the generated code:
            \n ------- \n
            {generation}
            \n ------- \n
            Here is the question: {input}
            \n ------- \n
            Here are the relevant documents: {documents}
            assistant""",
            input_variables=["generation", "input", "documents"],
        )

        code_evaluator = eval_template | self.model | JsonOutputParser()

        return code_evaluator

    def create_question_rewriter(self):
        """
        Rewrites a given question to improve its clarity and relevance.
        """
        re_write_prompt = PromptTemplate(
            template="""
            system
            You are a question rewriter. Your task is to rewrite the given question to make it clearer and more relevant to the context.

            user
            Here is the original question: {input}

            assistant""",
            input_variables=["input"]
        )

        question_rewriter = re_write_prompt | self.model | StrOutputParser()

        return question_rewriter


# Example usage:

# Initialize the LLM model
llm = ChatOpenAI(model="gpt-4o", temperature=0)

# Create an instance of the GraderUtils class
grader = GraderUtils(llm)

# Get the retrieval grader
retrieval_grader = grader.create_retrieval_grader()

# Get the hallucination grader
hallucination_grader = grader.create_hallucination_grader()

# Get the code evaluator
code_evaluator = grader.create_code_evaluator()

# Get the question rewriter
question_rewriter = grader.create_question_rewriter()


# Test Code

In [31]:
import sys
sys.path.append('C:\\Users\\user\\Desktop\\LangChain\\Daily\\perfect_trio')

In [32]:
import os
from dotenv import load_dotenv, find_dotenv
import pickle

# # Step 1: 환경 변수 로드 및 문서 크롤링
# load_dotenv(find_dotenv())
# from document_loader import DocumentLoader  # 이전에 정의한 DocumentLoader 클래스를 import

# Speckle 개발자 문서 크롤링
loader = DocumentLoader(api_key=os.getenv('FIRE_API_KEY'))
speckle_docs = loader.get_docs("https://speckle.guide/dev/server-graphql-api.html#advanced-queries")
print("Step 1: 문서 크롤링 완료")

# 문서를 로컬에 저장
with open("cralwed_docs\speckle_docs.pkl", "wb") as f:
    pickle.dump(speckle_docs, f)
print("Step 1: 문서 로컬에 저장 완료")




  with open("cralwed_docs\speckle_docs.pkl", "wb") as f:


Step 1: 문서 크롤링 완료
Step 1: 문서 로컬에 저장 완료


In [69]:
speckle_docs



In [35]:
# Step 3: 벡터 스토어 생성


store = create_vector_store(speckle_docs)
retriever = store.as_retriever()


In [38]:

# Step 5: Grader 테스트

from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o", temperature=0)
grader_utils = GraderUtils(llm)

# 모든 Grader 테스트
retrieval_grader = grader_utils.create_retrieval_grader()
document = "France is a country in Europe. Paris is the capital of France."
question = "What is the capital of France?"

# 수정된 호출 방법
score = retrieval_grader.invoke({"document": document, "input": question})
print("Retrieval Grader Test:", score)

hallucination_grader = grader_utils.create_hallucination_grader()
answer = "The capital of France is Paris."
facts = ["France is a country in Europe.", "Paris is the capital of France."]
hallucination_score = hallucination_grader.invoke({"generation": answer, "documents": facts})
print("Hallucination Grader Test:", hallucination_score)

code_evaluator = grader_utils.create_code_evaluator()
code = "def greet(name): return f'Hello, {name}!'"
code_question = "Write a function to greet someone by name."
documents = ["A function should take a name as input and return a greeting message."]
evaluation_result = code_evaluator.invoke({"generation": code, "input": code_question, "documents": documents})
print("Code Evaluator Test:", evaluation_result)

question_rewriter = grader_utils.create_question_rewriter()
original_question = "how to use speckle's python sdk?"

# 수정된 호출 방법
rewritten_question = question_rewriter.invoke({"input": original_question})
print("Question Rewriter Test:", rewritten_question)

Retrieval Grader Test: {'score': 'yes'}
Hallucination Grader Test: {'score': 'yes'}
Code Evaluator Test: {'score': 'yes', 'feedback': "The generated code is correct and relevant to the given question. The function 'greet' takes a name as input and returns a greeting message, which aligns with the requirements specified in the relevant documents."}
Question Rewriter Test: How do I use Speckle's Python SDK?


# 5. Creating the Graph

In [39]:
from typing_extensions import TypedDict

class GraphState(TypedDict):
    """
    그래프의 상태를 나타냅니다.

    Attributes:
        input: 질문 또는 입력
        generation: LLM으로부터 생성된 응답
        documents: 관련 문서 리스트
    """
    input: str
    generation: str
    documents: str


In [40]:
class GraphNodes:
    def __init__(self, llm, retriever, retrieval_grader, hallucination_grader, code_evaluator, question_rewriter):
        self.llm = llm
        self.retriever = retriever
        self.retrieval_grader = retrieval_grader
        self.hallucination_grader = hallucination_grader
        self.code_evaluator = code_evaluator
        self.question_rewriter = question_rewriter
        self.generate_chain = create_generate_chain(llm)

    def retrieve(self, state):
        print('---RETRIEVE---')
        question = state['input']
        documents = self.retriever.retrieve(question)
        return {'documents': documents ,'input': question}
    
    def generate(self, state):
        print("---GENERATE---")
        question = state["input"]
        documents = state["documents"]
        generation = self.generate_chain.invoke({"context": documents, "input": question})
        return {"documents": documents, "input": question, "generation": generation}

    def grade_documents(self, state):
        print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
        question = state["input"]
        documents = state["documents"]
        filtered_docs = []

        for d in documents:
            score = self.retrieval_grader.invoke({"input": question, "document": d.page_content})
            if score["score"] == "yes":
                print("---GRADE: DOCUMENT RELEVANT---")
                filtered_docs.append(d)
            else:
                print("---GRADE: DOCUMENT IR-RELEVANT---")
                continue

        return {"documents": filtered_docs, "input": question}

    def transform_query(self, state):
        print("---TRANSFORM QUERY---")
        question = state["input"]
        better_question = self.question_rewriter.invoke({"input": question})
        return {"documents": state["documents"], "input": better_question}


In [41]:
class EdgeGraph:
    def __init__(self, hallucination_grader, code_evaluator):
        self.hallucination_grader = hallucination_grader
        self.code_evaluator = code_evaluator

    def decide_to_generate(self, state):
        print("---ASSESS GRADED DOCUMENTS---")
        filtered_documents = state["documents"]

        if not filtered_documents:
            print("---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---")
            return "transform_query"
        else:
            print("---DECISION: GENERATE---")
            return "generate"

    def grade_generation_v_documents_and_question(self, state):
        print("---CHECK HALLUCINATIONS---")
        documents = state["documents"]
        generation = state["generation"]

        score = self.hallucination_grader.invoke({"documents": documents, "generation": generation})
        if score["score"] == "yes":
            print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")
            score = self.code_evaluator.invoke({"input": state["input"], "generation": generation, "documents": documents})
            if score["score"] == "yes":
                print("---DECISION: GENERATION ADDRESSES QUESTION---")
                return "useful"
            else:
                print("---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
                return "not useful"
        else:
            print("---DECISION: GENERATIONS ARE HALLUCINATED, RE-TRY---")
            return "not supported"


In [51]:
from langgraph.graph import StateGraph



# 그래프 초기화
workflow = StateGraph(GraphState)

# GraphNodes와 EdgeGraph 인스턴스 생성
graph_nodes = GraphNodes(llm, retriever, retrieval_grader, hallucination_grader, code_evaluator, question_rewriter)
edge_graph = EdgeGraph(hallucination_grader, code_evaluator)

# 노드 정의
workflow.add_node("retrieve", graph_nodes.retrieve)
workflow.add_node("grade_documents", graph_nodes.grade_documents)
workflow.add_node("generate", graph_nodes.generate)
workflow.add_node("transform_query", graph_nodes.transform_query)

# 그래프 빌드
workflow.set_entry_point("retrieve")
workflow.add_edge("retrieve", "grade_documents")
workflow.add_conditional_edges(
    "grade_documents",
    edge_graph.decide_to_generate,
    {
        "transform_query": "transform_query",
        "generate": "generate",
    },
)
workflow.add_edge("transform_query", "retrieve")
workflow.add_conditional_edges(
    "generate",
    edge_graph.grade_generation_v_documents_and_question,
    {
        "not supported": "generate",
        "useful": END,
        "not useful": "transform_query",
    },
)

# 그래프 컴파일
chain = workflow.compile()


# 6. Starting a Server using FastAPI

In [54]:
from fastapi import FastAPI, HTTPException

app = FastAPI(
    title="Speckle Server",
    version="1.0",
    description="An API server to answer questions regarding the Speckle Developer Docs"
)

In [55]:
@app.get("/")
async def redirect_root_to_docs():
    return RedirectResponse("/docs")

In [70]:
from pydantic.v1 import BaseModel, Field

class Input(BaseModel):
    input: str

class Output(BaseModel):
    output: dict

In [71]:
import langserve 
from langserve import add_routes
from pydantic.v1 import BaseModel, Field

add_routes(
    app,
    chain.with_types(input_type=Input, output_type=Output),
    path="/speckle_chat_3",
)

In [72]:
# FastAPI 서버 실행 예시 (Jupyter Notebook에서 실행 가능)
import uvicorn

# 기존의 if __name__ == "__main__": 블록 안에 있던 코드를 직접 실행
uvicorn.run(app, host="localhost", port=8000)


RuntimeError: asyncio.run() cannot be called from a running event loop

# 7. Streamlit/Gradio UI