<a href="https://colab.research.google.com/github/manisshapande/llm_engineering/blob/main/CompanyBrochure.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Create Company Brochure ##


In [1]:
#Imports
import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

import ollama

In [None]:
#Setting Models
MODEL_LLAMA = 'llama3.2'
MODEL = 'gpt-40-mini'

In [None]:
#Setting function with get links

class Website:
    def __init__(self, url):
        self.url = url
        response = requests.get(url)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrevalant in soup.body(["script","style","img","input"]):
                irrevalant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title: \n{self.title}\nWebpage Contents: \n{self.text}\n\n"


In [None]:
#extraxt links from any website page

pg = Website("https://anthropic.com")
pg.links

In [None]:
# setting prompts

link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [2]:
# print to check
print(link_system_prompt)

In [None]:
#Get links with user prompt

def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [None]:
#print to check
print(get_links_user_prompt(pg))

In [None]:
#Get Llama 3.2 to answer
def get_links(url):
    website = Website(url)
    response = ollama.chat(
        model=MODEL_LLAMA,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
        ]
    )
    result = response['message']['content']
    print(f"About to parse this into json: {result}")
    return json.loads(result)

In [None]:
#Get Chapgpt to answer
def get_links_gpt(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
        ],
    response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    return json.loads(result)

In [None]:
anthropic = Website("https://anthropic.com")
anthropic.links

In [None]:
get_links("https://anthropic.com")

In [None]:
#Within the main url you get links . retrieve all links and its contents
def get_all_details(url):
    result = "Landing Page:\n"
    result += Website(url).get_contents()
    result += Website(url).get_contents()
    links = get_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result +=f"\n\n{link['type']}\n"
        result +=Website(link["url"]).get_contents()
    return result

In [None]:
print(get_all_details("https://anthropic.com"))

In [None]:
#Set system prompt
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':

# system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
# Include details of company culture, customers and careers/jobs if you have the information."

In [None]:
#get brochure

def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:20_000] # Truncate if more than 20,000 characters
    return user_prompt

In [None]:
get_brochure_user_prompt("Anthropic", "https://anthropic.com")

In [None]:
# def create_brochure(company_name, url):
#     response = openai.chat.completions.create(
#         model=MODEL,
#         messages=[
#             {"role": "system", "content": system_prompt},
#             {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
#           ],
#     )
#     result = response.choices[0].message.content
#     display(Markdown(result))

def create_brochure(company_name, url):
    response = ollama.chat(
        model=MODEL_LLAMA,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ]
    )
    result = response['message']['content']
    display(Markdown(result))

In [None]:
create_brochure("Anthropic", "https://anthropic.com")

In [None]:
# def stream_brochure(company_name, url):
#     stream = openai.chat.completions.create(
#         model=MODEL,
#         messages=[
#             {"role": "system", "content": system_prompt},
#             {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
#           ],
#         stream=True
#     )

# # For just a simple output you can do the following two lines;
# # for chunk in stream:
# #    print(chunk.choices[0].delta.content or '',end='')

#     response = ""
#     display_handle = display(Markdown(""), display_id=True)
#     for chunk in stream:
#         response += chunk.choices[0].delta.content or ''
#         response = response.replace("```","").replace("markdown", "")
#         update_display(Markdown(response), display_id=display_handle.display_id)

def stream_brochure(company_name, url):
    stream = ollama.chat(
        model=MODEL_LLAMA,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
        stream=True
    )

    # For just a simple output you can do the following two lines;
    # for chunk in stream:
    #    print(chunk['message']['content'] or '', end='')

    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk['message']['content'] or ''
        response = response.replace("```", "").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)


In [None]:
stream_brochure("Anthropic", "https://anthropic.com")

In [None]:
# Try changing the system prompt to the humorous version when you make the Brochure for Hugging Face:

stream_brochure("HuggingFace", "https://huggingface.co")

In [None]:
def test_llama_response_basic(company_name, url):
    try:
        response = ollama.chat(
            model=MODEL_LLAMA,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
            ]
        )

        # Print the entire raw response for debugging purposes
        print("Raw response received:", response)

        # Check if the response contains 'message' and 'content'
        if 'message' in response and 'content' in response['message']:
            response_content = response['message']['content']
            print("Content from response:", response_content)
            return response_content
        else:
            print("Response does not contain expected 'message' or 'content'")
            return response

    except Exception as e:
        print(f"An error occurred: {e}")
        return {}

# Example usage
test_llama_response_basic("HuggingFace", "https://huggingface.co")
