<div style="border: 1px solid #ccc; padding: 10px; border-radius: 5px;">
    <table style="width: 100%;">
        <tr>
            <td style="width: 10%; text-align: center; vertical-align: top;">
                <img src="./images/robotic_sales_assistent.PNG" alt="Robotic Sales Assistent" width="200">
            </td>
            <td style="width: 70%; vertical-align: top; padding-left: 15px;">
                <h3>Use Case: Sales Assistent</h3>
                <p>
                    The Sales Assistant is a tool designed to streamline the preparation process for sales professionals by generating compact, data-driven company presentations based on publicly available information from corporate websites. Users provide the company name and URL, and the system scrapes and analyzes relevant web pages such as "About Us" or "Careers," extracting key insights about company culture, offerings, and career opportunities. Leveraging AI for content generation, it outputs a concise, Markdown-formatted brochure that can be used directly in client meetings or internal planning. This solution saves time, enhances the quality of sales interactions, and ensures that representatives are well-prepared with up-to-date and relevant information.
                </p>
            </td>
        </tr>
    </table>
</div>



In [None]:
# Imports

import os
import requests
import json
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI
import gradio as gr

In [None]:
# Initialize and constants

# Load the .env file containing environment variables
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')

# Validate API key format
if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")

# Set the AI model to be used    
MODEL = 'gpt-4o-mini'

# Initialize OpenAI API client
openai = OpenAI()

In [None]:
# A class to represent a Webpage

class Website:
    """
        A utility class to represent a Website for scraping content and extracting links.
    
        Attributes:
            url (str): The URL of the website.
            body (str): The raw HTML content of the webpage.
            title (str): The title of the webpage, if available.
            text (str): The cleaned text content of the webpage.
            links (list): A list of hyperlinks (absolute and relative) found on the webpage.
    """

    def __init__(self, url: str):
        """
        Initialize the Website class by fetching the webpage content.

        Args:
            url (str): The URL of the webpage to scrape.
        """
        self.url = url
        response = requests.get(url)
        self.body = response.content
        
        # Parse HTML content with BeautifulSoup
        soup = BeautifulSoup(self.body, 'html.parser')
        
        # Extract the title of the page
        self.title = soup.title.string if soup.title else "No title found"
        
        # Remove irrelevant elements like scripts, styles, and images
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        
        # Extract all hyperlinks on the page
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self) -> str:
        """
        Retrieve the cleaned text content of the webpage.

        Returns:
            str: A string containing the webpage title and content.
        """
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

## First step: Have GPT-4o-mini figure out which links are relevant

### Use a call to gpt-4o-mini to read the links on a webpage, and respond in structured JSON.  
It should decide which links are relevant, and replace relative links such as "/about" with "https://company.com/about".  
I will use "one shot prompting" in which we provide an example of how it should respond in the prompt.

In [None]:
# Create the system prompt for filtering links

link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [None]:
# Create the user prompt for filtering links

def get_links_user_prompt(website) -> str:
    """
    Create a user prompt for analyzing links found on a website.

    Args:
        website (Website): The Website object containing link data.

    Returns:
        str: The formatted prompt to be sent to the AI model.
    """
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [None]:
# Ask the AI model for the most relevant links and return as JSON

def get_links(url: str) -> dict:
    """
    Identify the most relevant links from a webpage.

    Args:
        url (str): The URL of the webpage to analyze.

    Returns:
        dict: A JSON object containing the filtered links and their types.
    """
    website = Website(url)
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
      ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    return json.loads(result)

## Second step: make the brochure!

Assemble all the details into another prompt to GPT4-o

In [None]:
# Crawl information from relevant links

def get_all_details(url:str) -> str:
    """
    Retrieve and aggregate content from the landing page and its relevant links.

    Args:
        url (str): The URL of the company's webpage.

    Returns:
        str: A string containing the aggregated content from all relevant pages.
    """
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

In [None]:
# System prompt for brochure creation

system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

In [None]:
# Create user prompt for generating the brochure

def get_brochure_user_prompt(company_name: str, url: str) -> str:
    """
    Create a user prompt for generating a company brochure.

    Args:
        company_name (str): The name of the company.
        url (str): The URL of the company's website.

    Returns:
        str: The formatted user prompt for the AI model.
    """
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:20_000] # Truncate if more than 20,000 characters
    return user_prompt

In [None]:
# Generate the brochure

def create_brochure(company_name: str, url: str):
    """
    Generate a company brochure using AI.

    Args:
        company_name (str): The name of the company.
        url (str): The URL of the company's website.
    """       
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
      ]
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=messages,
        stream=True
    )
    # Yield each chunk of the streaming response
    result = ""
    for chunk in stream:
        result += chunk.choices[0].delta.content or ""
        yield result

In [None]:
# Example usage:
create_brochure('Durst Group AG', 'https://www.durst-group.com')

In [None]:
# Create Gradio Interface with layout adjustments
with gr.Blocks() as interface:
    with gr.Row():
        with gr.Column(scale=1):  # Input section takes 1/3 of the width
            company_name = gr.Textbox(label="Company Name:", placeholder="Enter the company name")
            url = gr.Textbox(label="Company Website URL:", placeholder="Enter company URL with http or https")
            generate_button = gr.Button("Generate Brochure")
        with gr.Column(scale=2):  # Output section takes 2/3 of the width
            output = gr.Markdown(label="Generated Company Brochure")

    # Link button click to the function
    generate_button.click(
        fn=create_brochure,  # The function to call
        inputs=[company_name, url],  # Inputs to pass to the function
        outputs=output  # Output to update with the result
    )

# Launch the Gradio Interface
interface.launch()