# LLM que faz resumo de páginas web

### Criando classe para website

In [3]:
from bs4 import BeautifulSoup
import requests

class Website:
    url: str
    title: str
    content: str

    def __init__(self, url):
        self.url = url
        response = requests.get(self.url)
        soup = BeautifulSoup(response.content, "html.parser")
        self.title = soup.title.string if soup.title else "Sem título"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.content = soup.body.get_text(separator="\n", strip=True)

In [5]:
webcontent = Website(url="https://www.nba.com/")

In [6]:
webcontent.title

'The official site of the NBA for the latest NBA Scores, Stats & News. | NBA.com'

In [8]:
print(webcontent.content)

Navigation Toggle
Home
Tickets
NBA 2K26 Summer League Schedule
2025 Playoffs Schedule
Key Dates
Featured
NBA TV
Home
Top Stories
Summer League
Draft
Playoffs
Features
Events
Key Dates
Transactions
Future Starts Now
History
More
Stats Home
Dunk Score
Players
Teams
Leaders
Stats 101
Cume Stats
Lineups Tool
Media Central Game Stats
Draft
Quick Links
Contact Us
Atlantic
Boston Celtics
Brooklyn Nets
New York Knicks
Philadelphia 76ers
Toronto Raptors
Central
Chicago Bulls
Cleveland Cavaliers
Detroit Pistons
Indiana Pacers
Milwaukee Bucks
Southeast
Atlanta Hawks
Charlotte Hornets
Miami Heat
Orlando Magic
Washington Wizards
Northwest
Denver Nuggets
Minnesota Timberwolves
Oklahoma City Thunder
Portland Trail Blazers
Utah Jazz
Pacific
Golden State Warriors
LA Clippers
Los Angeles Lakers
Phoenix Suns
Sacramento Kings
Southwest
Dallas Mavericks
Houston Rockets
Memphis Grizzlies
New Orleans Pelicans
San Antonio Spurs
Players Home
2024-25 Audio Pronunciation Guide
Player Stats
Starting Lineups
Free 

### Instanciando LLM

In [9]:
import os
import openai
from dotenv import find_dotenv, load_dotenv

load_dotenv(find_dotenv())

groq_api_key = os.getenv("GROQ_API_KEY")

client_groq = openai.OpenAI(
    base_url="https://api.groq.com/openai/v1",
    api_key=groq_api_key
)

### Criando prompts

In [None]:
def get_messages(webcontent):
    system_prompt = """
    Você é o melhor em resumir conteúdos oriundos da internet.
    Você receberá o conteúdo de um website e você deverá resumir esse conteúdo de maneira objetiva e perfeita.
    Ignore conteúdos que parecem ser de navegação  pelo site. Devolva a resposta em markdown
    """

    user_prompt = """
    ** Conteúdo do website **

    {content_website}
    """

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "role": user_prompt.format(content_website=webcontent)}
    ]

    return messages

In [18]:
def get_summary(url):
    webcontent = Website(url=url)
    messages = get_messages(webcontent=webcontent.content)
    response = client_groq.chat.completions.create(
        model="llama3-8b-8192",
        messages=messages
    )

    return response.choices[0].message.content


In [19]:
summary = get_summary(url="https://www.nba.com/")

BadRequestError: Error code: 400 - {'error': {'message': "'messages.0' : input does not contain the discriminator property 'role'", 'type': 'invalid_request_error'}}

In [None]:
from IPython.display import Markdown, display

display(Markdown(summary))