# Importing requirements

In [22]:
import os
import requests
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from dotenv import load_dotenv
import json
from openai import OpenAI

# Loading API key
make sure you have a api_key in .env file created before loading API key. If not use any open source(ollama).

In [23]:
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

print("API_Key found") if api_key else print("API key not fund")

API_Key found


In [24]:
model = "gpt-4o-mini"
openai = OpenAI()

# Checking whether open ai with our API is working or not

In [25]:
message = [{ "role" : "user",
           "content" : " Brief me about the business applications of the Generative AI in 2 lines"
          }]

In [26]:
answer = openai.chat.completions.create(
    model = model,
    messages = message)

In [27]:
display(Markdown(answer.choices[0].message.content))

Generative AI can streamline content creation by producing high-quality text, images, and multimedia, enhancing marketing and branding efforts. Additionally, it can improve product design and innovation through automated prototyping and simulation, fostering more efficient R&D processes.

# Creating website class for scrapping

In [28]:
# defining headers to prevent getting blocked
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    
    def __init__(self, url):
        self.url = url

        resp = requests.get(url, headers = headers)
        soup = BeautifulSoup(resp.content, 'html.parser')

        self. title = soup.title.string if soup.title else "Title not found"

        for unwanted in soup.body(["script", "style", "img", "input"]):
            unwanted.decompose()

        self.content = soup.get_text("\n", strip = True)

        links = [link.get('href') for link in soup.find_all("a")]
        self.links = [link for link in links if link]

    def get_content(self):
        return f"Web page title: {self.title} \n\n Web page contents: {self.content}\n\n"
        

In [29]:
webscrap = Website("https://edwarddonner.com/")
print(webscrap.links)

['https://edwarddonner.com/', 'https://edwarddonner.com/connect-four/', 'https://edwarddonner.com/outsmart/', 'https://edwarddonner.com/about-me-and-about-nebula/', 'https://edwarddonner.com/posts/', 'https://edwarddonner.com/', 'https://news.ycombinator.com', 'https://nebula.io/?utm_source=ed&utm_medium=referral', 'https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html', 'https://patents.google.com/patent/US20210049536A1/', 'https://www.linkedin.com/in/eddonner/', 'https://edwarddonner.com/2025/01/23/llm-workshop-hands-on-with-agents-resources/', 'https://edwarddonner.com/2025/01/23/llm-workshop-hands-on-with-agents-resources/', 'https://edwarddonner.com/2024/12/21/llm-resources-superdatascience/', 'https://edwarddonner.com/2024/12/21/llm-resources-superdatascience/', 'https://edwarddonner.com/2024/11/13/llm-engineering-resources/', 'https://edwarddonner.com/2024/11/13/llm-engineering-resources/', 'https://edwarddonne

# Calling gpt 4o mini to read links and get useful links in structed json format

In [30]:
system_prompt = """ You are provided with a list of links of a web page.\n
You have to findout the useful links from the given list to keep in them in creating brochure of  website like
about, careers , jobs, etc..\n
You have to give reply in structured json format as this example.\n
{
example:[
"type" : "about page", "url" : "https://url/about/",
"type" : "career page", "url" : "https://url/careers/"
]
}"""


In [31]:
def get_links_prompt(webscrap):
    user_prompt = f" Here is the list of links on the website: {webscrap.url}.\n"
    user_prompt+= " Please find the useful links from the below to include in brochure of the website, respomd with full url"
    user_prompt+=" Donot include emails and other links.\n Links: \n "
    user_prompt+= "\n".join(webscrap.links)

    return user_prompt


In [32]:
print(get_links_prompt(webscrap))

 Here is the list of links on the website: https://edwarddonner.com/.
 Please find the useful links from the below to include in brochure of the website, respomd with full url Donot include emails and other links.
 Links: 
 https://edwarddonner.com/
https://edwarddonner.com/connect-four/
https://edwarddonner.com/outsmart/
https://edwarddonner.com/about-me-and-about-nebula/
https://edwarddonner.com/posts/
https://edwarddonner.com/
https://news.ycombinator.com
https://nebula.io/?utm_source=ed&utm_medium=referral
https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html
https://patents.google.com/patent/US20210049536A1/
https://www.linkedin.com/in/eddonner/
https://edwarddonner.com/2025/01/23/llm-workshop-hands-on-with-agents-resources/
https://edwarddonner.com/2025/01/23/llm-workshop-hands-on-with-agents-resources/
https://edwarddonner.com/2024/12/21/llm-resources-superdatascience/
https://edwarddonner.com/2024/12/21/llm-re

In [33]:
def get_use_links(url):
    website = Website(url)
    response = openai.chat.completions.create(
    model = model,
    messages = [
        {"role" : "system", "content" : system_prompt},
        {"role" : "user", "content" : get_links_prompt(website)}
            ],
    response_format = {"type": "json_object"})
    return json.loads(response.choices[0].message.content)
    

In [34]:
get_use_links("https://www.deeplearning.ai/")

{'useful_links': [{'type': 'about page',
   'url': 'https://www.deeplearning.ai/about/'},
  {'type': 'career page', 'url': 'https://www.deeplearning.ai/careers/'},
  {'type': 'courses page', 'url': 'https://www.deeplearning.ai/courses/'},
  {'type': 'blog page', 'url': 'https://www.deeplearning.ai/blog/'},
  {'type': 'community page', 'url': 'https://www.deeplearning.ai/community/'},
  {'type': 'events page', 'url': 'https://www.deeplearning.ai/events/'},
  {'type': 'resources page', 'url': 'https://www.deeplearning.ai/resources/'}]}

# Lets call gpt for creating a brochure

In [35]:
def get_all_details(url):
    result = " Landing Page:\n"
    result += Website(url).get_content()
    links = get_use_links(url)

    for link in links['useful_links']:
        result+= f" \n\n {link['type']}: \n"
        result+= Website(link['url']).get_content()
    return result

In [36]:
sys_prompt = "You are an assistant that analyses the content of several pages of the company website and creates\
a short brochure about the company for a prospective customers. Respond in markdown. With a brochure name with website name. The brochure should be funny and exciting"

In [37]:
def get_user_prompt(company_name, url):
    user_prompt = f"you are looking after a company called {company_name}"
    user_prompt += "Here are the contents of its landing page and other relevant pages: to create a short brochure based on the below information"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:20_000]
    return user_prompt

In [38]:
def create_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model = model,
        messages = [
            {"role":"system" , "content": sys_prompt},
            {"role": "user", "content": get_user_prompt(company_name, url)}
        ],
        stream = True
    )

    response = ""

    for chunk in stream:
        response+= chunk.choices[0].delta.content or ''
        yield response
    



# Integrating Gradio

In [39]:
import gradio as gr

In [40]:
view = gr.Interface(
    fn=create_brochure,
    inputs=[
        gr.Textbox(label="Company name:"),
        gr.Textbox(label="Landing page URL including http:// or https://")],
    outputs=[gr.Markdown(label="Brochure:")],
    flagging_mode="never"
)
view.launch()

* Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.


