A webscrape app that crafts a flyer for a company, aimed at attracting new clients, engaging investors, and appealing to potential hires.

You only need company’s name and their main website to get started.

In [116]:
import os
import requests
import json

In [117]:
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from typing import List
from IPython.display import Markdown, display, update_display

In [118]:
import openai

In [119]:
class WebscraperFlyers:
    
    
    def __init__(self,company, url):
        
        self.url = url
        self.company = company
        
        
        self.system_prompt_links = """
            You read all list of all links of a website.
            
            You're task is to identify and decide carefully what link is the most relevant to be included.
            when creating a flyer to a company such as links to 'About' page or 'Company' page or 'Jobs' or
            anything related to careers.

            You need to return the answer in JSON format. Refer to example below

            Example:
            {
                'links': [
                    {'name': 'About',  url: 'https://url/about'},
                    {'name': 'Career', url: 'https://url/career'},
                    
                ]
            }
        """

        self.system_prompt =""" 
        You are a very reliable assistant that read and carefully analyze contents of
        a website and all the pages included.

        I want you to create a very attractive flyer about the company.
        You need to include details of the company, a short history, cients and careers or jobs if its present.

        I want you to make it funny, engaging and entertaining.
        """
        load_dotenv()

        self.context: list()
        self.all_links: dict()
        self.sub_contents: str = ""

    def extract_content(self,child_url = None):
        if child_url is None:
            response = requests.get(self.url)
        else:
            response = requests.get(child_url)
            self.url = child_url

        soup = BeautifulSoup(response.content, 'html.parser')
        
        self.title = soup.title.string if soup.title else "No title found"
        self.web_content = f"Title:{self.title}\n"
        

        if soup.body:
            for tags in soup.body(["script", "style", "img", "input"]):
                tags.decompose()
            self.web_content += "Contents:\n" + soup.body.get_text(separator="\n", strip=True)
            self.web_content + "\n\n"

        self.raw_links = [link.get('href') for link in soup.find_all('a')]
        return self

    def extract_links(self):
        self.links = [link for link in self.raw_links if link]
        return self

    def create_user_prompt(self):
        self.user_prompt_links = f"""These are the url list of this website {self.url}

            You need retrieve all relevant links for a company flyer.
            You need to exclude TOS, Privacy and email or social media links

            These are the links:            
        """

        self.user_prompt_links += '\n'.join(self.links)
        return self

    def context_messages(self):
        self.context = [
            {'role': 'system', 'content': self.system_prompt_links},
            {'role': 'user', 'content': self.user_prompt_links}
        ]
        return self

    def extract_all_links(self):
        openai.key = os.getenv('OPENAI_API_KEY')
        completion = openai.chat.completions.create(
            model='gpt-4o-mini',
            messages = self.context,
            response_format = {'type': 'json_object'}
        )

        self.all_links = json.loads(completion.choices[0].message.content)
        return self

    def extract_links_and_contents(self):
        for link in self.all_links['links']:
            #print(f"\nname:{link['name']}")
            #print(f"url:{link['url']}")
            self.sub_contents +=link['name']
            self.sub_contents +=self.extract_content(link['url']).web_content
            

        return self

    def create_flyers(self):
        user_prompt = f"""
            You are analyzing the company called {self.company}
            Here are the contents of the entire websites including its child pages.

        """
        user_prompt += self.sub_contents[:20000]

        completions = openai.chat.completions.create(
            model='gpt-4o-mini',
            messages = [
                {'role': 'system', 'content': self.system_prompt},
                {'role': 'user', 'content': user_prompt}
            ]
        )

        self.final_result = completions.choices[0].message.content
        return self

    def show(self):
        display(Markdown(self.final_result))
        return self

In [120]:
site = "https://cohere.com/"
(
    WebscraperFlyers('cohere.com',site)
        .extract_content()
        .extract_links()
        .create_user_prompt()
        .context_messages()
        .extract_all_links()
        .extract_links_and_contents()
        .create_flyers()
        .show()
)

**🚀 💬 Welcome to Cohere: Where Language AI Meets Out-of-This-World Ideas! 💬 🚀**

---

**🧐 Who Are We?**
Cohere is not your ordinary tech company! We're heroes in the world of **language AI**, on a mission to help developers and businesses unlock the magic of words! 🚀 Our talented team adds a sprinkle of pixie dust (or maybe just some serious machine learning juice) to make language AI accessible and useful for everyone!

---

**🌟 A (Brief) History of Awesomeness**
Founded by *Aidan Gomez, Nick Frosst*, and *Ivan Zhang*, Cohere emerged from the brilliant minds of the ML/AI elite, aiming to **make machines as adept at language as humans!** We've kissed traditional development goodbye to deliver cutting-edge large language models that don’t break the bank!

*2017: A distributed research collaboration was launched (cue the superhero theme music).*

*2024: Cohere is at the forefront of language AI innovation, powered by our amazing research lab, Cohere For AI (or C4AI for the cool kids).*

---

**🤝 Our Clients**
From rising startups to industry giants, we've partnered with all types! Big names trust us to sprinkle some AI capabilities into their operations. Join the ranks of companies that have transformed their language game with Cohere! 

---

**💼 Careers: Join the Fun!**
Are you ready to be a part of the adventure? If you're an ML/AI all-star looking to bring your skills to a team that values creativity, diversity, and flexibility, check out our career opportunities! Whether you’re a wizard with code or a ninja in research, we have a place for you. 🚀

**Perks include:**
- **Flexible Work**: Remote-friendly culture! 🎉
- **Sweet Benefits**: Health, wellness, and family support offered! 👨‍👩‍👧‍👦
- **Innovative Opportunities**: Hackathons, learning, and development galore! 🚀
- **Paid Time Off**: 6 weeks of vacation 🏖️ (that’s almost too much fun).

---

**🌐 A Glimpse into Our Future**
Whether diving into the vast ocean of multilingual AI with our model *Aya* or researching the enigmatic world of machine learning, Cohere is here to bridge the gap between humans and technology.

**Join us in shaping a future where AI understands YOU!**

---

Ready to jump on the **language AI rocket** with Cohere? **[Visit us at Cohere.com](https://cohere.com)** to learn more, explore career opportunities, or conjure up your very own AI magic!

---

**Cohere: Let’s Have Conversations That Matter!** 💬✨

<__main__.WebscraperFlyers at 0x1bf5bf094d0>