In [99]:
# Import the libraries 
from openai import OpenAI
import os 
from dotenv import load_dotenv 
import requests
from bs4 import BeautifulSoup 
import json 
from IPython.display import Markdown, display, update_display
import gradio as gr

In [100]:
# Set up the environment 
load_dotenv(override = True)
api_key = os.getenv('OPENAI_API_KEY')
anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')

# Define the model 
MODEL = 'gpt-4o-mini'
openai = OpenAI()
claude = anthropic.Anthropic()

In [221]:
# A class to represent a Webpage

# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """
    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"
# Example usage 
link = 'https://www.bbc.com'
Web = Website(link) 
#print(Web.title) 
#print(Web.text) 
#print(Web.get_contents())
        
        

In [222]:
# Define a system prompt for link_filtering
system_prompt_link_filtering = (
    "You are an intelligent assistant. You will be given a list of URLs from a specific website. "
    "Your task is to select the links that are most useful for creating a brochure that represents the website's purpose, offerings, and key content. "
    "Focus on links that highlight the company's services, products, mission, contact information, or any visually rich or informative sections."
)
system_prompt_link_filtering += "You should respond in JSON as in this example:"
system_prompt_link_filtering += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}

"""

# Define a user-prompt for link filtering
def user_prompt_link_filtering(link): 
    Web_class = Website(link)
    
    prompt = (
        f"We are creating a brochure for the company titled: '{Web_class.title}'.\n\n"
        f"Your task is to select the most relevant and helpful links from the list below to assist in designing the brochure.\n"
        f"Focus on pages that showcase the company's mission, services, products, key information, and visually rich or promotional content.\n\n"
        f"respond with the full https URL in JSON format. Do not include Terms of Service, Privacy, email links.\n"
        f"Here are the available links:\n{Web_class.links}\n\n"
        "Please choose the links that would be most helpful for this purpose."
    )
    
    return prompt


In [223]:
def message_link_filtering(link): 
    return [
        {'role': 'system', 'content': system_prompt_link_filtering}, 
        {'role': 'user', 'content': user_prompt_link_filtering(link)}
    ] 

# Example usage: 
#link = 'https://www.bbc.com'
#message_link_filtering(link)

In [224]:
def filtered_links(link): 
    response = openai.chat.completions.create(
        model = MODEL, 
        messages = message_link_filtering(link), 
        response_format = {'type': 'json_object'}
    )
    return json.loads(response.choices[0].message.content)

#links = filtered_links(link)
#links 

In [225]:
def get_all_contents(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = filtered_links(url)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

In [226]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in Markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':

# system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
# Include details of company culture, customers and careers/jobs if you have the information."


In [227]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_contents(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

#get_brochure_user_prompt('bbc_news',link)

In [228]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
    )
    result = response.choices[0].message.content
    #display(Markdown(result))
    return result 

In [229]:
# Alternatively, stream the brochure 
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","").replace("markdown", "")
        yield response


In [206]:
create_brochure("BBC NEWS", "https://www.bbc.com/")

# BBC News Brochure

---

## Welcome to BBC News

The British Broadcasting Corporation, known as BBC, is a renowned global news organization providing trusted news coverage, in-depth analysis, and engaging features across various sectors. Our commitment to delivering accurate and impartial journalism has solidified our role as a leader in the media landscape.

---

## What We Cover

At BBC News, we bring you coverage from around the globe, including:

- **Breaking News**: Stay informed with the latest updates on critical global events.
- **International News**: In-depth reporting from regions such as Africa, Asia, Europe, and the Americas.
- **Sports**: Up-to-date news and live coverage of major sporting events, including the Premier League and international competitions.
- **Business & Innovation**: Insights into market trends, banking, technology, and sustainable practices affecting businesses today.
- **Culture & Arts**: Features on music, film, literature, and artistic expressions from diverse cultures worldwide.

---

## Company Culture

At the BBC, we foster a culture rooted in integrity, creativity, and inclusivity. Our team is comprised of diverse individuals from various backgrounds, who collaborate to bring unparalleled news insights to our audiences. We prioritize the well-being of our employees and encourage professional growth through ongoing training and developmental opportunities.

---

## Join Our Team

Are you passionate about journalism and media? The BBC is always on the lookout for dedicated individuals to join our vibrant team. Whether you are an experienced journalist, a digital media expert, or are interested in supporting roles, there is a place for you at the BBC. Visit our Careers page to explore current job openings and team initiatives.

---

## Connect With Us

Stay updated with the latest from BBC News by following us on our social media platforms and subscribing to our newsletters. Your source for trust, innovation, and world-class journalism is right here—welcome to the BBC!

---

**Contact Us:**  
Email: info@bbc.co.uk  
Website: [bbc.co.uk/news](https://www.bbc.co.uk/news)  
Social Media: [Twitter](https://twitter.com/BBCNews) | [Facebook](https://www.facebook.com/bbcnews) | [Instagram](https://www.instagram.com/bbcnews)  

--- 

We look forward to welcoming you to the BBC community!

## Create a User Interface 

In [230]:
def stream_gpt(user_input):
    response = openai.chat.completions.create(
    model= MODEL, 
    messages=[
        {"role": "system", "content": 'Extract the company name and website link. Respond as JSON: {"company_name": "...", "url": "..."}'},
        {"role": "user", "content": user_input}]
    )
    response = json.loads(response.choices[0].message.content)
    company_name = response['company_name']
    url = response['url'] 
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )
    
    result = ""
    for chunk in stream:
        result += chunk.choices[0].delta.content or ""
        yield result
        
#Gradio_function(input_text)

In [232]:
def stream_claude(user_input):
    result = claude.messages.stream(
        model="claude-3-haiku-20240307",
        max_tokens=1000,
        temperature=0.7,
        system=system_message,
        messages=[
            {"role": "user", "content": prompt},
        ],
    )
    response = ""
    with result as stream:
        for text in stream.text_stream:
            response += text or ""
            yield response 


In [235]:
def stream_function(input_text, model): 
    if model == 'GPT': 
        result  = stream_gpt(input_text) 
        
    elif model == 'Claude': 
        result = stream_claude(input_text)
    else: 
        raise ValueError('Unknown model selected') 
    yield from result 

In [None]:
gr.Interface(
    fn=stream_function,
    inputs=[gr.Textbox(label = 'Your message'), 
           gr.Dropdown(['GPT', 'Claude'])],
    outputs=[gr.Markdown()],  # or gr.Textbox(lines=20) for multiline
).launch()