# A Complete Business Solution

### 🚀 Business Challenge

Develop a product that automatically generates a **brochure** for a company. The brochure will be tailored for **prospective clients**, **investors**, and **potential recruits**.

You will be provided with:
- The **company name**
- The company’s **primary website**

In [None]:
import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI
from google import genai
from google.genai.types import *
from typing import List

# added the "jupyter.notebookFileRoot": "${workspaceFolder}" in vscode setting
from utils.ai_agent import AiAgent
from utils.gemini import Gemini

In [None]:
load_dotenv(override=True)
ai_agent: AiAgent = Gemini()

In [None]:
class Parser():
    def __init__(self):
        pass

    @staticmethod
    def strip_json(jsonData):
        json_string = jsonData.strip()
        if json_string.startswith("```json"):
            json_string = json_string[len("```json") :].lstrip()
        if json_string.endswith("```"):
            json_string = json_string[: -len("```")].rstrip()

        return json_string

In [None]:
class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """

    def __init__(self, url):
        self.url = url
        self.headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
        }

        response = requests.get(url, headers=self.headers)

        self.body = response.content
        soup = BeautifulSoup(self.body, "html.parser")

        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get("href") for link in soup.find_all("a")]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [None]:
class ContentGeneratorHelper:

    @staticmethod
    def _get_all_details(url):
        website = Website(url)
        links = ContentGeneratorHelper._get_links(website)

        result = "Landing Page:\n"
        result += website.get_contents()

        # print(f"Found Links: {links}")

        for link in links['links']:
            result += f"\n\n{link['type']}:\n"
            result += Website(link['url']).get_contents()
        return result

    @staticmethod
    def _get_links(website):
        user_prompt = ContentGeneratorHelper._get_user_prompt(website)
        system_prompt = ContentGeneratorHelper._get_system_prompt()
        result = ai_agent.generate_stripped_response_string(system_prompt, user_prompt)
        return json.loads(Parser.strip_json(result))

    @staticmethod
    def _get_system_prompt():
        link_system_prompt = "You are provided with a list of links found on a webpage. \
        You are able to decide which of the links would be most relevant to include in a brochure about the company, \
        such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
        link_system_prompt += "You should respond in JSON as in this example:"
        link_system_prompt += """
        {
            "links": [
                {"type": "about page", "url": "https://full.url/goes/here/about"},
                {"type": "careers page": "url": "https://another.full.url/careers"}
            ]
        }
        """

        return link_system_prompt

    @staticmethod
    def _get_user_prompt(website):
        user_prompt = f"Here is the list of links on the website of {website.url} - "
        user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. Do not include Terms of Service, Privacy, email links.\n"
        user_prompt += "Links (some might be relative links):\n"
        user_prompt += "\n".join(website.links)
        return user_prompt

In [None]:
class BrochureGenerator:

    @staticmethod
    def _get_brochure_system_prompt():
        system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
        and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
        Include details of company culture, customers and careers/jobs if you have the information."

        # Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':

        # system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
        # and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
        # Include details of company culture, customers and careers/jobs if you have the information."
        return system_prompt

    @staticmethod
    def _get_brochure_user_prompt(company_name, url):
        user_prompt = f"You are looking at a company called: {company_name}\n"
        user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
        user_prompt += ContentGeneratorHelper._get_all_details(url)
        user_prompt = user_prompt[:5_000]  # Truncate if more than 5,000 characters
        return user_prompt

    @staticmethod
    def create_brochure(company_name, url):
        system_prompt = BrochureGenerator._get_brochure_system_prompt()
        user_prompt = BrochureGenerator._get_brochure_user_prompt(company_name, url)
        response = ai_agent.generate_stripped_response_string(system_prompt, user_prompt)
        display(Markdown(response))

    @staticmethod
    def stream_brochure(company_name, url):
        system_prompt = BrochureGenerator._get_brochure_system_prompt()
        user_prompt = BrochureGenerator._get_brochure_user_prompt(company_name, url)
        stream = ai_agent.generate_content_stream(system_prompt, user_prompt)

        response = ""
        display_handle = display(Markdown(""), display_id=True)
        for candidate in stream.candidates:
            for part in candidate.content.parts:
                response += part.text
                response = response.replace("```","").replace("markdown", "")
                update_display(Markdown(response), display_id=display_handle.display_id)

In [None]:
BrochureGenerator.stream_brochure("HuggingFace", "https://huggingface.co")

In [None]:
BrochureGenerator.create_brochure(
    "Preetham Portfolio", "https://preetham-p-m.vercel.app/"
)