### 개요 
1. FastAPI 특징 및 주요 기능 간단 설명
2. llm 서버 주의사항?
3. langserve 있다는거 알려줌

참고: 
- [RAG in Production - LangChain & FastAPI](https://www.youtube.com/watch?v=Arf7UwWjGyc&t=2s)
- [FastAPI](https://fastapi.tiangolo.com/)
- [\[루닥스 블로그\] 연습만이 살길이다\:티스토리](https://rudaks.tistory.com/entry/langchain-Langchain%EA%B3%BC-FastAPI%EB%A5%BC-%EC%82%AC%EC%9A%A9%ED%95%98%EC%97%AC-OpenAI-%EB%AA%A8%EB%8D%B8-%ED%98%B8%EC%B6%9C%ED%95%98%EA%B8%B0-invoke-ainvoke-stream-astream?category=1154278)

## 2. fastAPI에 llm 호출 서빙

### 테스트 편의를 위하여 `get`만 사용하며, 필요 인자는 쿼리 매개변수로 받습니다.

In [12]:
from fastapi import FastAPI
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
from fastapi.responses import StreamingResponse

from dotenv import load_dotenv

load_dotenv()
# FastAPI 인스턴스 생성

app = FastAPI()
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

chain = llm | StrOutputParser()

@app.get("/invoke")
def sync_chat(message: str):
    response = chain.invoke(message)
    return response

@app.get("/ainvoke")
async def async_chat(message: str):
    response = await chain.ainvoke(message)
    return response

@app.get("/stream")
def sync_stream_chat(message: str):
    def event_stream():
        try:
            for chunk in chain.stream(message):
                if len(chunk) > 0:
                    yield f"{chunk}"
        except Exception as e:
            yield f"data: {str(e)}\n\n"
    return StreamingResponse(event_stream(), media_type="text/event-stream")

@app.get("/astream")
async def async_stream_chat(message: str):
    async def event_stream():
        try:
            async for chunk in chain.astream(message):
                if len(chunk) > 0:
                    yield f"{chunk}"
        except Exception as e:
            yield f"data: {str(e)}\n\n"
    return StreamingResponse(event_stream(), media_type="text/event-stream")

밑의 코드를 통해서 FastAPI를 실행시키고 당신의 브라우저에서 url로 접속하여 테스트 해보시오

ex: `http://127.0.0.1:8000/invoke?message=구글의 설립연도를 알려줘`

In [None]:
import uvicorn
import nest_asyncio

nest_asyncio.apply()
uvicorn.run(app)

# 3. FastAPI VectorDB 호출
간단한 RAG 내용 추가??

In [13]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
import time
import asyncio

from dotenv import load_dotenv
DB_PATH = "./chroma_db"

load_dotenv()

app = FastAPI()

chroma = Chroma(
    collection_name="FastApiServing", 
    persist_directory=DB_PATH,
    embedding_function=OpenAIEmbeddings(),
)

retriever = chroma.as_retriever(
    search_kwargs={
        "k": 4,
    }
)

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

@app.get("/invoke")
def sync_chat(message: str):
    response = chain.invoke(message)
    return response

@app.get("/add-content")
async def add_content(content: str, author:str):
    time.sleep(3)
    chroma.add_texts([content], [{"source": author}])
    return {"message": f"add-content: {content}"}

@app.get('/async-add-content')
async def async_add_content(content: str, author:str):
    await asyncio.sleep(3)
    await chroma.aadd_texts([content], [{"source": author}])
    return {"message": f"async-add-content: {content}"}

# 추가 예정
# get by ids()
# aget by ids()

# delete
# adelete

ex: `http://127.0.0.1:8000/async-add-content?content=Hello World&author=donghak`

In [None]:
import uvicorn
import nest_asyncio

nest_asyncio.apply()
uvicorn.run(app)

## 4. FastAPI 서빙시 비동기 중요성

In [14]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
import time
import asyncio

from dotenv import load_dotenv
DB_PATH = "./chroma_db"

load_dotenv()

app = FastAPI()

chroma = Chroma(
    collection_name="FastApiServing", 
    persist_directory=DB_PATH,
    embedding_function=OpenAIEmbeddings(),
)

retriever = chroma.as_retriever(
    search_kwargs={
        "k": 4,
    }
)

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

@app.get("/invoke")
def sync_chat(message: str):
    response = chain.invoke(message)
    return response

@app.get("/add-content")
async def add_content(content: str, author:str):
    time.sleep(3)
    # chroma.add_texts([content], [{"source": author}])
    return {"message": f"add-content: {content}"}

@app.get('/async-add-content')
async def async_add_content(content: str, author:str):
    await asyncio.sleep(3)
    # await chroma.aadd_texts([content], [{"source": author}])
    return {"message": f"async-add-content: {content}"}


비동기 비교 테스트 코드

In [None]:
import asyncio

import httpx
import time


async def make_request(endpoint):
    url = f"http://127.0.0.1:8000/{endpoint}"

    # print(f"-----url: {url}------")

    async with httpx.AsyncClient(timeout=20) as client:
        response = await client.get(url)
        if response.status_code == 200:
            print(f"Response from {endpoint}: {response.json()}")
        else:
            print(f"Error from {endpoint}: {response.status_code}")
            print("Response content:", response.text)


async def main(endpoint):
    tasks = [
        make_request(f"{endpoint}?content=my name is donghak&author=donghak"),
        make_request("invoke?message=내이름을 하나만 말해봐"),
    ]
    await asyncio.gather(*tasks)


if __name__ == "__main__":

    # Test sync
    start_time = time.time()
    for _ in range(3):
        asyncio.run(main(endpoint="add-content"))
    end_time = time.time()
    syncResult = end_time - start_time

    #  test async
    start_time = time.time()
    for _ in range(3):
        asyncio.run(main(endpoint="async-add-content"))
    end_time = time.time()
    asyncResult = end_time - start_time

    print(f"Sync time: {syncResult}")
    print(f"Async time: {asyncResult}")
