In [1]:
# imports

import requests
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
import ollama

In [2]:
# Constants

MODEL = "llama3.2"

In [9]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

class WebScraper:
    def __init__(self):
        # Initialize the Chrome driver
        self.driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

    def extract_content(self, url):
        # Open the URL
        self.driver.get(url)
        
        # Extract the page title
        title = self.driver.title
        
        # Try to find the main content by common tags and classes
        main_content = ""
        try:
            main_content_element = self.driver.find_element(By.TAG_NAME, 'main')
            main_content = main_content_element.text
        except:
            pass
        
        if not main_content:
            try:
                main_content_element = self.driver.find_element(By.CLASS_NAME, 'main-content')
                main_content = main_content_element.text
            except:
                pass
        
        if not main_content:
            try:
                main_content_element = self.driver.find_element(By.ID, 'content')
                main_content = main_content_element.text
            except:
                pass
        
        if not main_content:
            try:
                main_content_element = self.driver.find_element(By.CLASS_NAME, 'content')
                main_content = main_content_element.text
            except:
                pass
        
        return title, main_content

    def close(self):
        # Close the browser
        self.driver.quit()

In [4]:

system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

In [5]:
def user_prompt_for(title, text):
    user_prompt = f"You are looking at a website titled {title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += text
    return user_prompt

In [6]:
def messages_for(title, text):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(title, text)}
    ]

In [12]:
# And now: call the Ollama function instead of OpenAI

def summarize(url):
    scraper = WebScraper()
    title, content = scraper.extract_content(url)
    messages = messages_for(title, content)
    response = ollama.chat(model=MODEL, messages=messages)
    return response['message']['content']

In [14]:
summary = summarize("https://edwarddonner.com")

In [16]:
print(summary)

**Summary of Edward Donner's Website**

### Overview

Edward Donner's website appears to be a personal blog or resource site focused on Artificial Intelligence (AI), Machine Learning (ML), and related topics.

### Recent Posts
#### News and Announcements
* **January 23, 2025**: "LLM Workshop – Hands-on with Agents – resources" - This post likely provides resources for attending an LLM (Large Language Model) workshop.
* **December 21, 2024**: "Welcome, SuperDataScientists!" - An announcement welcoming new subscribers or members to the community.
* **November 13, 2024**: "Mastering AI and LLM Engineering – Resources" - A post providing resources for learning about AI and LLM engineering.
* **October 16, 2024**: "From Software Engineer to AI Data Scientist – resources" - A resource guide for transitioning from a software engineer role to an AI data scientist position.

### Summary
Edward Donner's website seems to be dedicated to sharing knowledge, resources, and insights on AI and related