In [6]:
import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI

load_dotenv(override=True);

api_key = os.getenv("OPEN_API_KEY");

if not api_key:
    raise ValueError("OPEN_API_KEY environment variable is not set.")

open_ai = OpenAI(api_key=api_key);


In [7]:
class Website:
    """
    A utility class for fetching and processing website content.
    """
    url: str;
    title: str = None
    content: str = None
    summary: str = None


    def __init__(self, url):
        self.url = url
        response = requests.get(url);
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser');
            self.title = soup.title.string if soup.title else "No title found"
            
            # Remove irrelevant elements
            for irrelevant in soup(['script', 'style', 'img', 'input', 'nav']):
                irrelevant.decompose()
            
            self.content =soup.get_text(separator="\n", strip=False)
        else:
            print(f"Failed to fetch {url}: {response.status_code}")

In [8]:
short_read = Website("https://edwarddonner.com");
display(Markdown(f"### {short_read.title}"))

# display(Markdown(short_read.content));

### Home - Edward Donner

In [9]:
def from_prompt(website: str, system_prompt: str = None) -> tuple[list[dict[str, str]], int, str]:
    """
    Forms a prompt for the LLM based on system and user inputs.
    """
    if not system_prompt:
        system_prompt = "You are a helpful assistant that analyzes the content of a website and provides a short summary, ignoring texts that may be navigation related."

    soup_props: BeautifulSoup = None
    status_code: int = 403
    error: str = None;
    try:
        response = requests.get(website)
        status_code: int = response.status_code
        if status_code == 200:
            soup_props = BeautifulSoup(response.content, 'html.parser')
            for irrelevant in soup_props(['script', 'style', 'img', 'input', 'nav']):
                irrelevant.decompose()
        else:
            print(f"Failed to fetch {website}: {status_code}")
            error = f"Failed to fetch {website}: {status_code}"
            return ([], status_code, error)
    except Exception as e:
        # print(f"An error occurred while fetching {website}: {e}")
        error = str(e)
        return ([], status_code, error)
    # if not soup_props:
    #     raise ValueError(f"Failed to fetch or parse the website: {website}")

    user_prompt = f"You are looking at a website titled {soup_props.title.string if soup_props.title else 'No title found'}."
    user_prompt += "\nThe contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += soup_props.get_text(separator="\n", strip=False)
    prompts: list[dict[str, str]] = [
        {
            "role": "system",
            "content": system_prompt
        },
        {
            "role": "user",
            "content": user_prompt
        }
    ]
    return (prompts, status_code,error)


In [12]:
the_prompt = from_prompt("https://onax.me");
print(the_prompt[0])

# The 'temperature' parameter controls the randomness and creativity of the model's responses.
# Lower values (e.g., 0.2) make the output more focused and deterministic.
# # Higher values (e.g., 0.8) make the output more diverse and creative.
if the_prompt[1] == 200:
    response = open_ai.chat.completions.create(
        model="gpt-4o-mini",
        messages=the_prompt[0],
        max_tokens=500,
        temperature=0.7 # 0.7 is a moderate value, balancing creativity and coherence.
    );
    # print(f"\n\n{response.choices[0].message.content}")
    display(Markdown(f"\n======\n{response.choices[0].message.content}"))
else:
    print(f"Failed to generate response. Status Code: {the_prompt[1]} | Error: {the_prompt[2]}")

[{'role': 'system', 'content': 'You are a helpful assistant that analyzes the content of a website and provides a short summary, ignoring texts that may be navigation related.'}, {'role': 'user', 'content': 'You are looking at a website titled Onaefe Edebi.\nThe contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.\n\n\n\n\n\n\n\n\n\n\n\nOnaefe Edebi\n\n\n\n\n\n\n\n\n\n\n🚧 Under Construction 🏗️\n\n\nIn the mean time, please check out my links below:\n\n\n\n\n\n\n\n            GitHub\n        \n\n\n\n            LinkedIn\n        \n\n\n\n            CourseRunway - AI Powered site to learn about any subject\n        \n\n\n\n\n\n\n\n'}]



======
# Summary of Onaefe Edebi

The website titled "Onaefe Edebi" is currently under construction. While the main content is not available, visitors are encouraged to explore links to the owner's GitHub, LinkedIn, and CourseRunway, an AI-powered platform for learning about various subjects.