In [1]:
!pip show langgraph

Name: langgraph
Version: 0.6.2
Summary: Building stateful, multi-actor applications with LLMs
Home-page: 
Author: 
Author-email: 
License-Expression: MIT
Location: /Users/jessicahong/.pyenv/versions/3.11.11/lib/python3.11/site-packages
Requires: langchain-core, langgraph-checkpoint, langgraph-prebuilt, langgraph-sdk, pydantic, xxhash
Required-by: 


In [2]:
import langgraph
print(dir(langgraph))

['__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__']


In [3]:
"""[사용자 입력]
     ↓
[LangGraph로 제어 흐름 구성]
     ↓
[1. Toolformer 스타일로 도구 선택]
     ↓
[2. Tavily로 Web + Wikipedia 검색 (or custom retriever)]
     ↓
[3. CRAG로 요약 + QA 생성 (예: DistilBART + FLAN-T5)]
     ↓
[4. LangChain으로 전체 파이프라인 연결]
     ↓
[5. sklearn/evaluation을 통해 결과 평가 및 튜닝]
     ↓
[최종 응답 반환]
"""

'[사용자 입력]\n     ↓\n[LangGraph로 제어 흐름 구성]\n     ↓\n[1. Toolformer 스타일로 도구 선택]\n     ↓\n[2. Tavily로 Web + Wikipedia 검색 (or custom retriever)]\n     ↓\n[3. CRAG로 요약 + QA 생성 (예: DistilBART + FLAN-T5)]\n     ↓\n[4. LangChain으로 전체 파이프라인 연결]\n     ↓\n[5. sklearn/evaluation을 통해 결과 평가 및 튜닝]\n     ↓\n[최종 응답 반환]\n'

In [4]:
import wikipedia
from transformers import pipeline
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [5]:
wikipedia.set_lang("en")

In [6]:
# 2. 모델 준비
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-small", tokenizer="google/flan-t5-small")

Device set to use mps:0
Device set to use mps:0


In [7]:
def fetch_wiki_content(query):
    try:
        search_results = wikipedia.search(query)
        if not search_results:
            return "No pages found for this query."
        
        # search_results 출력해서 확인
        print("Search results:", search_results)
        
        # search_results 중 첫 번째부터 차례로 시도해보기
        for title in search_results:
            try:
                page = wikipedia.page(title, auto_suggest=False)
                return page.content
            except wikipedia.PageError:
                continue
            except wikipedia.DisambiguationError:
                continue
        
        return "Page not found. Try another query."
    
    except Exception as e:
        return f"Error occurred: {e}"

result = fetch_wiki_content("Donald Trump")

if isinstance(result, str) and len(result) > 500:
    print(result[:500] + "\n... [중략] ...")
else:
    print(result)


Search results: ['Donald Trump', 'Donald Trump Jr.', 'Donald Trump sexual misconduct allegations', 'Trump family', 'Second cabinet of Donald Trump', 'Donald Trump and golf', 'Age and health concerns about Donald Trump', 'Donald Trump and religion', 'False or misleading statements by Donald Trump', 'Trump–Musk feud']
Donald John Trump (born June 14, 1946) is an American politician, media personality, and businessman who is the 47th president of the United States. A member of the Republican Party, he served as the 45th president from 2017 to 2021.
Born into a wealthy family in New York City, Trump graduated from the University of Pennsylvania in 1968 with a bachelor's degree in economics. He became the president of his family's real estate business in 1971, renamed it the Trump Organization, and began acquiri
... [중략] ...


In [8]:
def summarize_long_text(text, chunk_size=1000, chunk_overlap=100):
    splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    chunks = splitter.split_text(text)
    
    summaries = []
    for chunk in chunks:
        input_len = len(chunk.split())
        max_len = max(min(input_len // 3, 150), 10)
        min_len = min(40, max_len - 1)
        if min_len < 10:
            min_len = 10
        
        summary = summarizer(chunk, max_length=max_len, min_length=min_len, do_sample=False)[0]['summary_text']
        summaries.append(summary)
    
    combined_summary = " ".join(summaries)
    
    combined_len = len(combined_summary.split())
    final_max_len = max(min(combined_len // 3, 200), 50)
    final_min_len = min(40, final_max_len - 1)
    if final_min_len < 10:
        final_min_len = 10
    
    final_summary = summarizer(combined_summary, max_length=final_max_len, min_length=final_min_len, do_sample=False)[0]['summary_text']
    return final_summary


In [9]:
def crag_summarize_and_qa(wiki_content, question):
    summary = summarize_long_text(wiki_content)
    
    # question과 context를 하나의 문자열로 합침 (FLAN-T5 스타일)
    input_text = f"question: {question} context: {summary}"
    
    answer = qa_pipeline(input_text)[0]['generated_text']
    return summary, answer


In [10]:
def run_all_in_one(query):
    wiki_text = fetch_wiki_content(query)
    
    if wiki_text.startswith("Page not found") or wiki_text.startswith("No pages found") or wiki_text.startswith("Query ambiguous"):
        print("Error fetching wiki content:", wiki_text)
        return
    
    question = f"What is {query}?"
    summary, answer = crag_summarize_and_qa(wiki_text, question)
    
    print("Summary:\n", summary)
    print("\nAnswer:\n", answer)

# 실행 예시
run_all_in_one("Natural language processing")


Search results: ['Natural language processing', 'Natural language', 'Natural-language user interface', 'Quantum natural language processing', 'History of natural language processing', 'Outline of natural language processing', 'Natural Language Toolkit', 'Semantic decomposition (natural language processing)', 'Natural language understanding', 'Empirical Methods in Natural Language Processing']


Your max_length is set to 10, but your input_length is only 7. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=3)
Token indices sequence length is longer than the specified maximum sequence length for this model (1443 > 1024). Running this sequence through the model will result in indexing errors


Summary:
  The study of NLP, a subfield of computer science, is generally associated with artificial intelligence . NLP is related to information retrieval, knowledge representation, computational linguistics, and more broadly with linguistics . The Georgetown experiment in 1954 involved fully automatic translation of more than sixty Russian sentences into English . Since 2015, the statistical approach has been replaced by the neural networks approach .

Answer:
 natural language processing


In [11]:
run_all_in_one("Who is Noam Chomsky")

Search results: ['Noam Chomsky bibliography and filmography', 'Political positions of Noam Chomsky', 'Noam Chomsky', 'Is the Man Who Is Tall Happy?', 'Carol Chomsky', 'Decoding Chomsky', 'William Chomsky', 'List of honorary degrees awarded to Noam Chomsky', 'Manufacturing Consent', 'The Withdrawal']


Your max_length is set to 10, but your input_length is only 7. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=3)


Summary:
  This is a list of writings published by the American writer Noam Chomsky . Chomsky's full b  Chomsky wrote in preparation for his PhD thesis, including hand-written notes made in preparation of the 1975 book, is available as a 149 MiB, 919 page  Roman Jakobson (Ph.D.) — (Jun 1955). Transformational Analysis . University of Pennsylvania . Chomsky has written numerous books about language .

Answer:
 Chomsky's full b Chomsky wrote in preparation for his PhD thesis, including hand-written notes made in preparation of the 1975 book, is available as a 149 MiB, 919 page Roman Jakobson (Ph.D.) — (Jun 1955). Transformational Analysis . University of Pennsylvania


In [12]:
#Langchain Agent

In [13]:
from langchain.tools import Tool

# 기존에 있던 fetch_wiki_content 함수
def tool_fetch_wiki_content(query: str) -> str:
    return fetch_wiki_content(query)

# 기존에 있던 crag_summarize_and_qa 함수 (요약 + QA)
def tool_crag_summarize_and_qa(wiki_content: str, question: str) -> dict:
    summary, answer = crag_summarize_and_qa(wiki_content, question)
    return {"summary": summary, "answer": answer}

# Tool로 래핑
wiki_tool = Tool(
    name="Wikipedia Search",
    func=tool_fetch_wiki_content,
    description="Given a topic string, searches Wikipedia and returns content."
)

crag_tool = Tool(
    name="CRAG Summarizer and QA",
    func=tool_crag_summarize_and_qa,
    description="Given wiki content and a question, returns summarized content and answer."
)


In [26]:
from langchain.agents import initialize_agent, Tool
from langchain.chat_models import ChatOpenAI
from langchain.utilities import WikipediaAPIWrapper


In [27]:
os.environ["OPENAI_API_KEY"] ="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"

llm = ChatOpenAI(temperature=0)

# 이제 llm 쓸 준비 완료


In [29]:
from langchain.agents import initialize_agent, Tool
from langchain.chat_models import ChatOpenAI
from langchain.utilities import WikipediaAPIWrapper

llm = ChatOpenAI(temperature=0)

wiki = WikipediaAPIWrapper()
tools = [
    Tool(
        name="Wikipedia",
        func=wiki.run,
        description="Useful for answering questions about general topics."
    )
]

agent = initialize_agent(
    tools,
    llm,
    agent="chat-zero-shot-react-description",  # 여기 바뀜
    verbose=True,
)

response = agent.run("Tell me about Natural Language Processing.")
print(response)


  response = agent.run("Tell me about Natural Language Processing.")




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I should use Wikipedia to gather information about Natural Language Processing.

Action:
```
{
  "action": "Wikipedia",
  "action_input": "Natural Language Processing"
}
```[0m
Observation: [36;1m[1;3mPage: Natural language processing
Summary: Natural language processing (NLP) is the processing of natural language information by a computer. The study of NLP, a subfield of computer science, is generally associated with artificial intelligence. NLP is related to information retrieval, knowledge representation, computational linguistics, and more broadly with linguistics.
Major processing tasks in an NLP system include: speech recognition, text classification, natural language understanding, and natural language generation.



Page: Natural language
Summary: A natural language or ordinary language is a language that occurs organically in a human community by a process of use, repetition, and change and in forms such 