In [2]:
from bs4 import SoupStrainer
from langchain import hub 
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

In [1]:
# 환경변수 불러오기
import os 
from dotenv import load_dotenv

load_dotenv()

True

In [9]:
# Load Document 
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=SoupStrainer(
            class_=("post-header", "post-content")
        )
    )
)

docs = loader.load()

print(docs)

[Document(page_content='\n\n      LLM Powered Autonomous Agents\n    \nDate: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\n\n\nBuilding agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview#\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final re

In [12]:
# Text Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
print(len(splits), splits)

66 [Document(page_content='LLM Powered Autonomous Agents\n    \nDate: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\n\n\nBuilding agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview#\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.\

In [14]:
# Embed
vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=OpenAIEmbeddings()
)
retriever = vectorstore.as_retriever()

In [15]:
# Prompt 
prompt = hub.pull("rlm/rag-prompt")

# LLM
llm = ChatOpenAI(
    model_name="gpt-3.5-turbo",
    temperature=0
)

In [17]:
# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Chain 
rag_chain = (
    {"context": retriever | format_docs, 
     "question": RunnablePassthrough()}
    | prompt
    | llm 
    | StrOutputParser()
)

In [18]:
# Question 
rag_chain.invoke("What is Task Decomposition?")

'Task Decomposition is a technique used to break down complex tasks into smaller and simpler steps. This approach helps agents to plan and execute tasks more efficiently by dividing them into manageable components. Task decomposition can be achieved through various methods such as prompting with specific instructions or utilizing human inputs.'

In [7]:
import requests 
from bs4 import BeautifulSoup, SoupStrainer

url = "https://lilianweng.github.io/posts/2023-06-23-agent/"

parse_only = SoupStrainer(class_=["post-header","post-content"])

html = requests.get(url)
if html.status_code == 200:
    soup = BeautifulSoup(html.text, parse_only=parse_only)

print(soup.prettify())

<!DOCTYPE html>
<header class="post-header">
 <h1 class="post-title">
  LLM Powered Autonomous Agents
 </h1>
 <div class="post-meta">
  Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng
 </div>
</header>
<div class="post-content">
 <p>
  Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as
  <a href="https://github.com/Significant-Gravitas/Auto-GPT">
   AutoGPT
  </a>
  ,
  <a href="https://github.com/AntonOsika/gpt-engineer">
   GPT-Engineer
  </a>
  and
  <a href="https://github.com/yoheinakajima/babyagi">
   BabyAGI
  </a>
  , serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.
 </p>
 <h1 id="agent-system-overview">
  Agent System Overview
  <a aria-hidden="true" class="anchor" hidden="" href="#agent-system-overview">
   #
  </a>
 </h1>
 <p>
  In 

In [20]:
import requests 
from bs4 import BeautifulSoup, SoupStrainer

url = "https://n.news.naver.com/mnews/article/421/0007578326"

parse_only = SoupStrainer(class_=["media_end_head_headline","go_trans _article_content"])

html = requests.get(url)
if html.status_code == 200:
    soup = BeautifulSoup(html.text, parse_only=parse_only)

print(soup.prettify())

<!DOCTYPE html>
<h2 class="media_end_head_headline" id="title_area">
 <span>
  "AI 학습·공익서비스 개발" 빅데이터…가명처리로 안전하게 사용
 </span>
</h2>
<article class="go_trans _article_content" id="dic_area">
 <strong class="media_end_summary">
  전세계 데이터 75% '개인정보'…"규제 막혀 버려져선 안돼"
  <br/>
  데이터 '마스킹·대체' 가명정보…"기관간 결합해 시너지"
 </strong>
 <span class="end_photo_org">
  <div class="nbd_im_w _LAZY_LOADING_WRAP">
   <div class="nbd_a _LAZY_LOADING_ERROR_HIDE" id="img_a1">
    <img alt="가명 정보끼리 결합하면 데이터에 근거한 공공서비스 개발도 가능하다. 복지 사각지대 위험군을 추적하는 데 쓰일 수 있다.(한국인터넷진흥원 제공)" class="_LAZY_LOADING _LAZY_LOADING_INIT_HIDE" data-src="https://imgnews.pstatic.net/image/421/2024/06/03/0007578326_001_20240603110035335.jpg?type=w647" id="img1" style="display: none;"/>
   </div>
  </div>
  <em class="img_desc">
   가명 정보끼리 결합하면 데이터에 근거한 공공서비스 개발도 가능하다. 복지 사각지대 위험군을 추적하는 데 쓰일 수 있다.(한국인터넷진흥원 제공)
  </em>
 </span>
 <br/>
 <br/>
 (서울=뉴스1) 윤주영 기자 = 인공지능(AI)을 고도화하는 학습 데이터엔 개인 민감정보가 들어있어 자칫 개인정보보호법을 침해할 소지가 있다. 정부는 이를 막으면서도 기술 고도화 토대를 마련하고자 개

In [62]:
naver_client_id = "VzPpKwAPQWA3MaI1kZpL"
naver_secret_key = "MqknyFulXf"

import os 
from dotenv import load_dotenv
from urllib.parse import urlencode
from urllib.request import Request, urlopen

load_dotenv()

keyword = "의대생 살인사건"
base_url = "https://openapi.naver.com/v1/search/news?"
params = {
    "query": keyword,
    "display": 10,
    "sort": "sim"
}

url = base_url + urlencode(params)
headers = {
    "X-Naver-Client-Id": os.getenv("NAVER_CLIENT_ID"),
    "X-Naver-Client-Secret": os.getenv("NAVER_SECRET_KEY")
}

request = Request(url, headers=headers)
response = urlopen(request)

print(response.read().decode("utf-8"))

{
	"lastBuildDate":"Mon, 03 Jun 2024 17:35:23 +0900",
	"total":721,
	"start":1,
	"display":10,
	"items":[
		{
			"title":"'<b>의대생 살인<\/b>' 한달도 안돼 또…'헤어지자' 한마디에 <b>살인<\/b>자 돌변 왜?",
			"originallink":"https:\/\/www.news1.kr\/articles\/5435018",
			"link":"https:\/\/n.news.naver.com\/mnews\/article\/421\/0007577585?sid=102",
			"description":"지난달 2일 발생한 '<b>의대생 살인<\/b>' <b>사건<\/b>이 발생한 지 한 달도 되지 않아 발생한 '교제 <b>살인<\/b>' <b>사건<\/b>이다. 경찰은 박 씨가 이별을 통보받자 보복 목적으로 범행했을 가능성에 무게를 두고 수사하고 있다. 박 씨는 우발적인... ",
			"pubDate":"Mon, 03 Jun 2024 06:30:00 +0900"
		},
		{
			"title":"또 교제<b>살인<\/b>… “사람 죽였다” 30대男 자수 후 숨져",
			"originallink":"https:\/\/www.donga.com\/news\/Society\/article\/all\/20240602\/125236250\/1",
			"link":"https:\/\/n.news.naver.com\/mnews\/article\/020\/0003568235?sid=102",
			"description":"최근 ‘<b>의대생 살인 사건<\/b>’ 등 교제하던 이성을 살해하는 <b>사건<\/b>이 이어지며 ‘교제폭력’ 대책을 마련해야 한다는 목소리가 높아진다. 2일 경남 창녕경찰서에 따르면 지난달 28일 낮 12시경 “내가 사람을 죽였다.... ",
			"pubDate":"Sun, 02 Jun 2024 17:39:00 +0900"
		},
		{


In [80]:
import json 

def _search_naver_news(keyword):
    base_url = "https://openapi.naver.com/v1/search/news?"
    params = urlencode({
        "query": keyword,
        "display": 10,
        "sort": "sim"
    })

    headers = {
        "X-Naver-Client-Id": os.getenv("NAVER_CLIENT_ID"),
        "X-Naver-Client-Secret": os.getenv("NAVER_SECRET_KEY")
    }

    request = Request(base_url + params, headers=headers)
    response = urlopen(request).read().decode("utf-8")
    json_data = json.loads(response)

    return json_data["items"]

test = _search_naver_news("의대생 살인사건")
test
    

[{'title': "'<b>의대생 살인</b>' 한달도 안돼 또…'헤어지자' 한마디에 <b>살인</b>자 돌변 왜?",
  'originallink': 'https://www.news1.kr/articles/5435018',
  'link': 'https://n.news.naver.com/mnews/article/421/0007577585?sid=102',
  'description': "지난달 2일 발생한 '<b>의대생 살인</b>' <b>사건</b>이 발생한 지 한 달도 되지 않아 발생한 '교제 <b>살인</b>' <b>사건</b>이다. 경찰은 박 씨가 이별을 통보받자 보복 목적으로 범행했을 가능성에 무게를 두고 수사하고 있다. 박 씨는 우발적인... ",
  'pubDate': 'Mon, 03 Jun 2024 06:30:00 +0900'},
 {'title': '또 교제<b>살인</b>… “사람 죽였다” 30대男 자수 후 숨져',
  'originallink': 'https://www.donga.com/news/Society/article/all/20240602/125236250/1',
  'link': 'https://n.news.naver.com/mnews/article/020/0003568235?sid=102',
  'description': '최근 ‘<b>의대생 살인 사건</b>’ 등 교제하던 이성을 살해하는 <b>사건</b>이 이어지며 ‘교제폭력’ 대책을 마련해야 한다는 목소리가 높아진다. 2일 경남 창녕경찰서에 따르면 지난달 28일 낮 12시경 “내가 사람을 죽였다.... ',
  'pubDate': 'Sun, 02 Jun 2024 17:39:00 +0900'},
 {'title': '[만물상] 60대 데이트 <b>살인</b>',
  'originallink': 'https://www.chosun.com/opinion/manmulsang/2024/06/02/VDNW7HM6BFCNHGTN46FULOUY4Q/?utm_source=naver&utm