In [1]:
#Installation
!pip install langchain

Collecting langchain
  Downloading langchain-0.1.16-py3-none-any.whl.metadata (13 kB)
Collecting PyYAML>=5.3 (from langchain)
  Using cached PyYAML-6.0.1-cp312-cp312-win_amd64.whl.metadata (2.1 kB)
Collecting SQLAlchemy<3,>=1.4 (from langchain)
  Downloading SQLAlchemy-2.0.29-cp312-cp312-win_amd64.whl.metadata (9.8 kB)
Collecting aiohttp<4.0.0,>=3.8.3 (from langchain)
  Downloading aiohttp-3.9.5-cp312-cp312-win_amd64.whl.metadata (7.7 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)
  Downloading dataclasses_json-0.6.4-py3-none-any.whl.metadata (25 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl.metadata (3.0 kB)
Collecting langchain-community<0.1,>=0.0.32 (from langchain)
  Downloading langchain_community-0.0.33-py3-none-any.whl.metadata (8.5 kB)
Collecting langchain-core<0.2.0,>=0.1.42 (from langchain)
  Downloading langchain_core-0.1.44-py3-none-any.whl.metadata (5.9 kB)
Collecting langchain-text-splitters<0.1,>=0.0.

# LLM Chain

In [2]:
!pip install langchain-openai

Collecting langchain-openai
  Downloading langchain_openai-0.1.3-py3-none-any.whl.metadata (2.5 kB)
Collecting openai<2.0.0,>=1.10.0 (from langchain-openai)
  Downloading openai-1.21.2-py3-none-any.whl.metadata (21 kB)
Collecting tiktoken<1,>=0.5.2 (from langchain-openai)
  Downloading tiktoken-0.6.0-cp312-cp312-win_amd64.whl.metadata (6.8 kB)
Collecting anyio<5,>=3.5.0 (from openai<2.0.0,>=1.10.0->langchain-openai)
  Using cached anyio-4.3.0-py3-none-any.whl.metadata (4.6 kB)
Collecting distro<2,>=1.7.0 (from openai<2.0.0,>=1.10.0->langchain-openai)
  Using cached distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)
Collecting httpx<1,>=0.23.0 (from openai<2.0.0,>=1.10.0->langchain-openai)
  Using cached httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)
Collecting tqdm>4 (from openai<2.0.0,>=1.10.0->langchain-openai)
  Using cached tqdm-4.66.2-py3-none-any.whl.metadata (57 kB)
Collecting regex>=2022.1.18 (from tiktoken<1,>=0.5.2->langchain-openai)
  Downloading regex-2024.4.16-cp312-cp312-wi

In [4]:
# import os
# OPENAI_API_KEY=os.environ['OPENAI_API_KEY']

# !export OPENAI_API_KEY="..."

In [1]:
# 모델 초기화

# OpenAI (API)
from langchain_openai import ChatOpenAI
llm = ChatOpenAI()  # ChatOpenAI(api_key="...")

# # Local (using Ollama)
# # https://ollama.com/download 에서 OS에 맞는 Ollama 다운로드
# # 터미널에서 `ollama run llama2` 입력 (llama2 설치/실행)
# from langchain_community.llms import Ollama
# llm = Ollama(model="llama2")

In [2]:
llm.invoke("how can langsmith help with testing?")

AIMessage(content='Langsmith can help with testing in several ways:\n\n1. Automated testing: Langsmith can be used to write scripts and automate the testing process, making it faster and more efficient.\n\n2. Test case generation: Langsmith can be used to generate test cases based on specifications and requirements, ensuring thorough coverage of the application.\n\n3. Test data generation: Langsmith can help in generating test data for different scenarios, making it easier to test the application under various conditions.\n\n4. Integration testing: Langsmith can be used to test the integration of different components of the application, ensuring that they work together seamlessly.\n\n5. Performance testing: Langsmith can be used to conduct performance testing to evaluate the speed, responsiveness, and stability of the application under different load conditions.\n\nOverall, Langsmith can help streamline the testing process, improve test coverage, and ensure the quality of the software 

In [9]:
from langchain_core.prompts import ChatPromptTemplate

# prompt template로 원하는 응답을 유도 
# Prompt templates은 사용자의 입력을 LLM에게 더 나은 입력으로 변환
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are world class technical documentation writer."),
    ("user", "{input}")
])

In [10]:
# prompt와 llm 모델을 간단한 LLM chain으로 변합
chain = prompt | llm 

In [11]:
chain.invoke({"input": "how can langsmith help with testing?"})

AIMessage(content='Langsmith is a powerful tool that can greatly assist with testing in various ways. Here are some ways in which Langsmith can help with testing:\n\n1. **Test data generation**: Langsmith can be used to generate realistic and diverse test data quickly and easily. This can help in creating a comprehensive set of test cases to validate the functionality and performance of the system under test.\n\n2. **Test case automation**: Langsmith can automate the generation of test cases based on predefined rules and constraints. This can save time and effort in creating and maintaining test cases for regression testing or new feature testing.\n\n3. **Test environment setup**: Langsmith can help in setting up test environments by automatically generating configuration files, database schemas, and other resources needed for testing. This can streamline the process of preparing test environments and reduce the chances of errors in configuration.\n\n4. **Test coverage analysis**: Lang

In [12]:
from langchain_core.output_parsers import StrOutputParser

# 출력 chat message를 string으로 변환하는 간단한 parser 추가
output_parser = StrOutputParser()

In [13]:
chain = prompt | llm | output_parser

In [14]:
chain.invoke({"input": "how can langsmith help with testing?"})

'Langsmith can be a valuable tool for testing in several ways:\n\n1. Automated testing: Langsmith can be used to automate testing processes, such as running test scripts and generating test reports. This can help ensure that the software is functioning as expected and that any bugs or issues are identified and addressed promptly.\n\n2. Test data generation: Langsmith can help generate realistic and diverse test data for various scenarios, making it easier to test the software under different conditions and edge cases.\n\n3. Performance testing: Langsmith can be used to simulate high traffic loads and test the performance of the software under stress. This can help identify any bottlenecks or performance issues that need to be addressed.\n\n4. Integration testing: Langsmith can be used to test the integration of different components or systems, ensuring that they work together seamlessly and without any compatibility issues.\n\nOverall, Langsmith can streamline the testing process, impr

# Retrieval Chain

Retrieval는 LLM에 직접 넣기에는 데이터가 너무 많을 때 유용합니다. Retriever를 사용해서 가장 관련있는 부분만 가져와서 전달할 수 있습니다.

## Documents, Embeddings, VectorStore

In [None]:
!pip install beautifulsoup4

In [2]:
from langchain_community.document_loaders import WebBaseLoader

# 참조할 데이터를 로드
loader = WebBaseLoader("https://docs.smith.langchain.com/user_guide")

docs = loader.load()

In [3]:
# 임베딩 모델
# 문서를 임베딩해서 vectorstores에 저장하는 모델

# OpenAI (API)
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

# # Local (using Ollama)
# from langchain_community.embeddings import OllamaEmbeddings
# embeddings = OllamaEmbeddings()

In [3]:
# 단순한 local vectorstore FAISS(Facebook AI Similarity Search)
# https://python.langchain.com/docs/integrations/vectorstores/faiss/
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.8.0-cp312-cp312-win_amd64.whl.metadata (3.8 kB)
Downloading faiss_cpu-1.8.0-cp312-cp312-win_amd64.whl (14.5 MB)
   ---------------------------------------- 0.0/14.5 MB ? eta -:--:--
   ---------------------------------------- 0.0/14.5 MB ? eta -:--:--
   ---------------------------------------- 0.0/14.5 MB 325.1 kB/s eta 0:00:45
   ---------------------------------------- 0.1/14.5 MB 751.6 kB/s eta 0:00:20
    --------------------------------------- 0.3/14.5 MB 2.1 MB/s eta 0:00:07
   - -------------------------------------- 0.7/14.5 MB 3.3 MB/s eta 0:00:05
   --- ------------------------------------ 1.1/14.5 MB 4.3 MB/s eta 0:00:04
   ---- ----------------------------------- 1.5/14.5 MB 5.1 MB/s eta 0:00:03
   ----- ---------------------------------- 2.0/14.5 MB 5.8 MB/s eta 0:00:03
   ------- -------------------------------- 2.6/14.5 MB 6.6 MB/s eta 0:00:02
   ------- -------------------------------- 2.9/14.5 MB 6.8 MB/s eta 0:00:02
   -

In [4]:
# 색인 생성
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter


# # RecursiveCharacterTextSplitter 분할기는 청크가 충분히 작아질 때까지 주어진 문자 목록의 순서대로 텍스트를 분할하려고 시도합니다.
# # 기본 문자 목록은 ["\n\n", "\n", " ", ""]입니다. 단락 -> 문장 -> 단어 순서로 재귀적으로 분할합니다.
# text_splitter = RecursiveCharacterTextSplitter(
#     # 청크 크기를 매우 작게 설정합니다. 예시를 위한 설정입니다.
#     chunk_size=250,
#     # 청크 간의 중복되는 문자 수를 설정합니다.
#     chunk_overlap=50,
#     # 문자열 길이를 계산하는 함수를 지정합니다.
#     length_function=len,
#     # 구분자로 정규식을 사용할지 여부를 설정합니다.
#     is_separator_regex=False,
# )


text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split_documents(docs)
vector = FAISS.from_documents(documents, embeddings) # 문서와 임베딩을 통해 생성된 백터스토어

## Retrieval Chain

입력된 질문을 받아, 관련된 문서를 찾고, 원래 질문과 함께 해당 문서를 LLM에 전달합니다.<br/>그래서 원래 질문에 대한 답변을 요청합니다.

In [6]:
from langchain.chains.combine_documents import create_stuff_documents_chain

prompt = ChatPromptTemplate.from_template(
"""Answer the following question based only on the provided context:
<context>
{context}
</context>

Question: {input}
"""
)

# 문서 목록을 모델에 전달하는 chain
document_chain = create_stuff_documents_chain(llm, prompt)

In [7]:
# 문서를 직접 입력해서 실행시킬 수 있음
from langchain_core.documents import Document

document_chain.invoke({
    "input": "how can langsmith help with testing?",
    "context": [Document(page_content="langsmith can let you visualize test results")]
})

'Langsmith can help by allowing users to visualize test results.'

In [8]:
from langchain.chains import create_retrieval_chain

# 가장 연관된 문서를 선택하기 위해 탐색기를 사용
retriever = vector.as_retriever()

# 질문과 탐색된 문서를 받아 답변을 생성하는 chain
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [9]:
response = retrieval_chain.invoke({"input": "how can langsmith help with testing?"})
# 질문 -> retriever: 연관된 문서 검색 -> 모델에 질문과 연관된 문서 전달 -> 답변 생성
print(response["answer"])

# LangSmith offers several features that can help with testing:...

LangSmith can help with testing by allowing developers to create datasets, run tests on LLM applications using these datasets, upload test cases in bulk, create test cases on the fly, export test cases from application traces, and run custom evaluations to score test results. Additionally, LangSmith provides a comparison view to track and diagnose regressions in test scores across multiple revisions of an application, a playground environment for rapid iteration and experimentation, and the ability to add runs as examples to datasets to expand test coverage on real-world scenarios.
