# 🚀 Brochure - Creation - Using LLMS

## 🌠Importing libraries

In [36]:
from openai import OpenAI
import os, json
import requests
from bs4 import BeautifulSoup
from dotenv import load_dotenv
from anthropic import Anthropic

In [37]:
load_dotenv()
openai_key = os.getenv('OPENAI_API_KEY')
anthropic_key = os.getenv("ANTHROPIC_API_KEY")

if not openai_key or api_key[:8] != 'sk-proj-':
    print("Problem found with api key, please check the key in env file")

MODEL = 'gpt-4o-mini'
openai = OpenAI()
claude = Anthropic()

## Extracting Info from 🕸️site using Class

In [38]:
class Website:
    def __init__(self,url):
        self.url = url
        response = requests.get(url)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "Title Not Found"
        if soup.body:
            for irr in soup.body(["script", "style", "img", "input"]):
                irr.decompose()
            self.text = soup.body.get_text(separator = "\n", strip = True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:n {self.title} \n Webpage Contents: \n {self.text}\n\n"

In [39]:
# response = requests.get('https://edwarddonner.com/')
# soup = BeautifulSoup(body, 'html.parser')
# for ir in soup.body(["script", "style", "img", "input"]):
#     ir.decompose()
# soup.body

In [40]:
url = 'https://cnn.com/'
obj = Website(url)
# obj.links

## 📞 Creating API Call prompts for Links Segregation

In [41]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You have to decide which of the links are most relevant to include in a brochure for a company, \
such as links to About page, or company page. You should respond in JSON as this example :"
link_system_prompt += """
{
    "links": [
    {"type": "games page", "url" : "https://edition.cnn.com/games"},
    {"type": "profiles page", "url": "https://edition.cnn.com/profiles"}
}
"""

def get_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, repsond with the full https URL,\
    Do not include Terms of Service, Privacy, email links. \n"
    user_prompt += "Links (some mught be relevant links) : \n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [42]:
# print(get_links_user_prompt(obj))

## Getting Only the relevant Links 🔗

In [43]:
def get_links(url):
    website_obj = Website(url)
    response = openai.chat.completions.create(model = MODEL, messages = [
    {"role":"system", "content":link_system_prompt},
    {"role":"user", "content": get_user_prompt(website_obj)} ],
    response_format = {"type":"json_object"}
                                             )
    result = response.choices[0].message.content
    #print(result)
    return json.loads(result)

In [44]:
get_links(url)

{'links': [{'type': 'home page', 'url': 'https://edition.cnn.com'},
  {'type': 'about page', 'url': 'https://edition.cnn.com/about'},
  {'type': 'profiles page', 'url': 'https://edition.cnn.com/profiles'},
  {'type': 'leadership page',
   'url': 'https://edition.cnn.com/profiles/cnn-leadership'},
  {'type': 'careers page', 'url': 'https://careers.wbd.com/cnnjobs'},
  {'type': 'social media page', 'url': 'https://www.linkedin.com/company/cnn'},
  {'type': 'social media page', 'url': 'https://facebook.com/CNN'},
  {'type': 'social media page', 'url': 'https://twitter.com/CNN'},
  {'type': 'social media page', 'url': 'https://instagram.com/CNN'},
  {'type': 'social media page', 'url': 'https://www.tiktok.com/@cnn?lang=en'}]}

In [46]:
def get_details(url):
    result = "Page: \n"
    result += Website(url).get_contents()
    links = get_links(url)              ## Getting only the relevant links
    print("Found Links: ", links)
    for link in links["links"]:
        result += f"\n\n {link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

# print(get_details("https://anthropic.com"))

In [47]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."


def get_brochure_user_prompt(web_obj):
    prompt = f"You are looking at at a company called : {web_obj.title} \n"
    prompt += "Here are the details of it's page and other relevant pages: use this information to create a detailed but short brochure of the company\n"
    prompt += get_details(web_obj.url)
    prompt = prompt[:20000]  ## Truncate so that it does not exceed a limit, mostly to reduce costs
    return prompt
    

In [48]:
from IPython.display import Markdown, display, update_display
def create_brochure(url):
    response = openai.chat.completions.create(model = MODEL, messages = [{"role":"system", "content": system_prompt},
                                                                         {"role":"user", "content": get_brochure_user_prompt(Website(url))}])

    result = response.choices[0].message.content
    display(Markdown(result))

## Et Voila! 🪄

In [None]:
create_brochure('https://www.anthropic.com/')

In [None]:
def stream_brochure_gpt(url):
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(Website(url))}
          ],
        stream=True
    )
    result = ""
    for chunk in stream:
        result += chunk.choices[0].delta.content or ""
        yield result    
# ---------------------------- 
    # To respond within Cell - 
    # response = ""
    # display_handle = display(Markdown(""), display_id=True)
    # for chunk in stream:
    #     response += chunk.choices[0].delta.content or ''
    #     #response = response.replace("```","").replace("markdown", "")
    #     update_display(Markdown(response), display_id=display_handle.display_id)

In [None]:
def stream_brochure_claude(url):
    messages = [{"role": "user", "content": get_brochure_user_prompt(Website(url))}]
    result = claude.messages.stream(model = "claude-sonnet-4-20250514",system = system_prompt,
                                           messages = messages, max_tokens=900)
    response = ""
    with result as stream:
        for text in stream.text_stream:
            response +=text or ""
            yield response

In [None]:
stream_brochure_claude('https://www.anthropic.com/')

## Creating UI 🧑‍💻

In [None]:
import gradio as gr

In [None]:
def stream_model(url, model):
    if model == "GPT":
        result = stream_brochure_gpt(url)
    elif model == "Claude":
        result = stream_brochure_claude(url)
    else:
        result = "Model Not Found"
    for chunk in result:
        yield chunk

In [None]:
view = gr.Interface(fn = stream_model, inputs =[gr.Textbox(label="URL:"), gr.Dropdown(["GPT","Claude"], label = "Select model")], outputs = [gr.Markdown(label="Response:")],allow_flagging = "never")
view.launch()
