In [1]:
import os 

In [2]:
import requests 
import json 

from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI 

In [3]:
#load the api key from the dot env file 

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("all good")
else:
    print("all not good")

Model = 'gpt-4o-mini'

openai = OpenAI()

all good


In [4]:
# creating a class to represent a Webpage

# Some websites need you to use proper headers when fetching them: and some website dont allow bots so user-agent is a way to bypass simple checks
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

In [5]:
class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [6]:
web = Website('https://cosmosdigitalksa.com/')

In [7]:
#data extracted from website
print(web.get_contents())

Webpage Title:
Cosmos Digital – We Support Vision Digitally
Webpage Contents:
Skip to content
Home
Services
Menu Toggle
AWS Cloud Computing
Google Cloud
Azure Cloud
Data Engineering
E-Commerce
Product Development
About
Contact
Get A Quote
Get A Quote
Main Menu
We turn challenge into success
Our Mission is to achieve the reputation of a quality, high standard & reliable solution & service Provider Company in the ICT industry
Find out how!
Cosmos Digital
We
provides one stop automated solution for your trade and industry. Depending on the size and field of your organization, we have different products and services to meet your requirements. We provide the optimum and customized solutions made for your organization.
We
focused exclusively on high quality and cost-effective software development and implementation of services. We are advancing on a tremendous pace and with involvement of skilled and experienced people working in the organization.
What We Do?
We
offers scalable end-to-end ap

In [8]:
#all the links from the website 
web.links

['#content',
 'https://cosmosdigitalksa.com/',
 'https://cosmosdigitalksa.com/',
 'https://cosmosdigitalksa.com/services/',
 'https://cosmosdigitalksa.com/aws-cloud-computing/',
 'https://cosmosdigitalksa.com/google-cloud/',
 'https://cosmosdigitalksa.com/azure-cloud/',
 'https://cosmosdigitalksa.com/data-engineering/',
 'https://cosmosdigitalksa.com/e-commerce/',
 'https://cosmosdigitalksa.com/product-development/',
 'https://cosmosdigitalksa.com/about-2/',
 'https://cosmosdigitalksa.com/contact/',
 '#',
 '#',
 'https://cosmosdigitalksa.com/',
 '#services',
 'https://cosmosdigitalksa.com/about-2/',
 'https://cosmosdigitalksa.com/services/',
 'https://cosmosdigitalksa.com/contact/',
 'https://cosmosdigitalksa.com/wp-content/uploads/2021/05/gallery-1.jpg',
 'https://cosmosdigitalksa.com/wp-content/uploads/2021/05/gallery-2.jpg',
 'https://cosmosdigitalksa.com/wp-content/uploads/2021/05/gallery-3.jpg',
 'https://cosmosdigitalksa.com/',
 'https://cosmosdigitalksa.com/services/',
 'https:/

In [9]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"

link_system_prompt+="You should respond in json as in this example:"
link_system_prompt+="""
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [10]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in json format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [11]:
#function to get links and call openai to decide relevant links 

def get_links(url):
    website = Website(url)

    response = openai.chat.completions.create(
        model= Model,
        messages = [
        {"role":"system", "content": link_system_prompt},
        {"role":"user", "content": get_links_user_prompt(website)}
        ], response_format={"type":"json_object"}
    )

    result = response.choices[0].message.content

    return json.loads(result)

In [12]:
get_links("https://www.nvidia.com/en-in/")

{'links': [{'type': 'about page',
   'url': 'https://www.nvidia.com/en-in/about-nvidia/'},
  {'type': 'careers page',
   'url': 'https://www.nvidia.com/en-in/about-nvidia/careers/'},
  {'type': 'company page',
   'url': 'https://www.nvidia.com/en-in/about-nvidia/partners/'},
  {'type': 'events page', 'url': 'https://www.nvidia.com/en-in/events/'}]}

In [13]:
## now that we have developed a mechanism that will extract the all the relevant links for us from a website 
# we are now going to make the brochure generature 


In [14]:
def get_all_details(url):
    
    result = "Homepage: \n"
    result += Website(url).get_contents()

    links = get_links(url)
    print("the following links have been found: \n", links)

    for link in links['links']:
        result+= f"\n {link['type']} \n "      #to print the name of the section of the website or the "type" inthe above result
        result+= Website(link['url']).get_contents()         # to get the conetents of the individual links 
        
    return result
    

In [15]:
print(get_all_details("https://cosmosdigitalksa.com/"))

the following links have been found: 
 {'links': [{'type': 'about page', 'url': 'https://cosmosdigitalksa.com/about-2/'}, {'type': 'services page', 'url': 'https://cosmosdigitalksa.com/services/'}, {'type': 'contact page', 'url': 'https://cosmosdigitalksa.com/contact/'}]}
Homepage: 
Webpage Title:
Cosmos Digital – We Support Vision Digitally
Webpage Contents:
Skip to content
Home
Services
Menu Toggle
AWS Cloud Computing
Google Cloud
Azure Cloud
Data Engineering
E-Commerce
Product Development
About
Contact
Get A Quote
Get A Quote
Main Menu
We turn challenge into success
Our Mission is to achieve the reputation of a quality, high standard & reliable solution & service Provider Company in the ICT industry
Find out how!
Cosmos Digital
We
provides one stop automated solution for your trade and industry. Depending on the size and field of your organization, we have different products and services to meet your requirements. We provide the optimum and customized solutions made for your organiz

In [16]:
#system prompt to tell AI that we want it to make a brochure for the the company from all the details that we scraped earlier 

system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."


In [17]:
def get_bro_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)

    user_prompt = user_prompt[:5_000]  #to make sure its less that 5000 charecters

    return user_prompt

In [18]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model= Model,
        messages= [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_bro_user_prompt(company_name, url)}
        ]
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [19]:
create_brochure('Cosmos Digital', 'https://cosmosdigitalksa.com/')

the following links have been found: 
 {'links': [{'type': 'about page', 'url': 'https://cosmosdigitalksa.com/about-2/'}, {'type': 'services page', 'url': 'https://cosmosdigitalksa.com/services/'}, {'type': 'contact page', 'url': 'https://cosmosdigitalksa.com/contact/'}]}


# Cosmos Digital Brochure

## Welcome to Cosmos Digital
**We Support Vision Digitally**

At Cosmos Digital, our mission is to transform challenges into successes by providing top-notch, high-quality, and reliable ICT solutions tailored specifically for your organizational needs. Our extensive range of services makes us the ideal partner to help you bring your ideas to life.

---

## Our Vision
To become a trusted IT technology partner that plays a crucial role in the success of your organization's vision and mission.

## What We Offer
Our comprehensive service offerings cover various application lifecycle stages:

- **Application Development:** End-to-end development from requirement analysis to deployment.
- **Application Maintenance:** Adapting software to meet evolving business demands post-launch.
- **Application Support:** 24/7 support to ensure your systems run smoothly.
- **Application Integration/Migration:** Efficiently replacing and integrating outdated systems with modern solutions.
- **Application Management:** End-to-end management of your software applications, from project management to risk and quality management.

### Specialized Services
- AWS Cloud Computing
- Google Cloud Solutions
- Azure Cloud Services
- Data Engineering
- E-Commerce Solutions
- Product Development
- Big Data & Analytics
- Machine Learning Solutions
- Information Security
- DevOps & Continuous Delivery
- Healthcare IT Solutions

---

## Why Choose Us?
At Cosmos Digital, we pride ourselves on being more than just a service provider; we are your dedicated partners for success. Here’s why you should choose us:

- **Expertise & Knowledge:** Our skilled team aims to provide honest advice and high-quality services at competitive prices.
- **Client-Centric Approach:** We listen closely to your needs and customize our solutions accordingly, ensuring you feel heard and valued.
- **Innovative Solutions:** Our passionate team is motivated to develop creative solutions that accelerate your online presence and boost conversion rates.

---

## Company Culture
Cosmos Digital is made up of a dynamic team of young professionals, engineers, and qualified business graduates who come together to create impactful solutions. We believe in fostering a culture of collaboration, continuous learning, and innovation, ensuring that every team member contributes to the success of our clients.

---

## Join Our Team
Are you passionate about technology and looking to make an impact? At Cosmos Digital, we’re always on the lookout for talented professionals ready to embrace challenges and contribute to transformative projects. Explore exciting career opportunities with us and help shape the future of digital solutions.

---

## Connect with Us
Want to build a flourishing business together? Reach out today to discuss how we can help elevate your organization. We are just an email away!
Email: hello@cosmosdigitalksa.com

---

**Cosmos Digital - Your Trusted Partner for Digital Transformation**  
*Copyright © 2025 Cosmos Digital | Powered by Cosmos Digital*