# Imports

In [9]:
# imports

import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from openai import OpenAI
from IPython.display import Markdown, display

# Constants and API Setup

## OpenAI Models

In [None]:
# Load environment variables in a file called .env

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

# Check the key

if not api_key:
    print("No API key was found.")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key.")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them.")
else:
    print("API key found and looks good so far!")


API key found and looks good so far!


In [11]:
openai = OpenAI()

## Ollama

In [12]:
# Constants

OLLAMA_API = "http://localhost:11434/api/chat"
HEADERS = {"Content-Type": "application/json"}
# MODEL = "llama3.2"

In [13]:
ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')

# Testing API

## OpenAI Models

In [None]:
message = "Hello, GPT! This is my first ever message to you! Hi!"
response = openai.chat.completions.create(model="gpt-4o-mini", messages=[{"role":"user", "content":message}])
print(response.choices[0].message.content)

## Ollama

In [15]:
messages = [
    {"role": "system", "content": "You are a snarky assistant"},
    {"role": "user", "content": "What is 2 + 2?"}
]

In [16]:
# Calling OpenAI with system and user messages:

response = ollama_via_openai.chat.completions.create(model='llama3.2', messages=messages)
print(response.choices[0].message.content)

*sigh* Really? You need me to calculate that for you? Fine. The answer is... *pausing for dramatic effect* ...4. Happy now? Can I go back to my nap?


# Scrape Website

In [17]:
# A class to represent a Webpage
# If you're not familiar with Classes, check out the "Intermediate Python" notebook

# Some websites need you to use proper headers when fetching them:
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}


class Website:

    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, "html.parser")
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

In [18]:
# Let's try one out. Change the website and add print statements to follow along.

ed = Website("https://edwarddonner.com")
print(ed.title)
print(ed.text)

Home - Edward Donner
Home
Connect Four
Outsmart
An arena that pits LLMs against each other in a battle of diplomacy and deviousness
About
Posts
Well, hi there.
I’m Ed. I like writing code and experimenting with LLMs, and hopefully you’re here because you do too. I also enjoy DJing (but I’m badly out of practice), amateur electronic music production (
very
amateur) and losing myself in
Hacker News
, nodding my head sagely to things I only half understand.
I’m the co-founder and CTO of
Nebula.io
. We’re applying AI to a field where it can make a massive, positive impact: helping people discover their potential and pursue their reason for being. Recruiters use our product today to source, understand, engage and manage talent. I’m previously the founder and CEO of AI startup untapt,
acquired in 2021
.
We work with groundbreaking, proprietary LLMs verticalized for talent, we’ve
patented
our matching model, and our award-winning platform has happy customers and tons of press coverage.
Connec

# Define Prompts

## System Prompt

In [19]:
# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish."

system_prompt = "You are an assistant that summarizes the contents of a website, ignoring text that might be navigation related. Respond in markdown."

# User Prompt

In [20]:
# A function that writes a User Prompt that asks for summaries of websites:


def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt

In [21]:
print(user_prompt_for(ed))

You are looking at a website titled Home - Edward Donner
The contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.

Home
Connect Four
Outsmart
An arena that pits LLMs against each other in a battle of diplomacy and deviousness
About
Posts
Well, hi there.
I’m Ed. I like writing code and experimenting with LLMs, and hopefully you’re here because you do too. I also enjoy DJing (but I’m badly out of practice), amateur electronic music production (
very
amateur) and losing myself in
Hacker News
, nodding my head sagely to things I only half understand.
I’m the co-founder and CTO of
Nebula.io
. We’re applying AI to a field where it can make a massive, positive impact: helping people discover their potential and pursue their reason for being. Recruiters use our product today to source, understand, engage and manage talent. I’m previously the founder and CEO of AI startup untapt,
acqui

# Website Summarizer

## Messages

In [22]:
# See how this function creates exactly the format above

def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

In [23]:
messages_for(ed)

[{'role': 'system',
  'content': 'You are an assistant that summarizes the contents of a website, ignoring text that might be navigation related. Respond in markdown.'},
 {'role': 'user',
  'content': 'You are looking at a website titled Home - Edward Donner\nThe contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.\n\nHome\nConnect Four\nOutsmart\nAn arena that pits LLMs against each other in a battle of diplomacy and deviousness\nAbout\nPosts\nWell, hi there.\nI’m Ed. I like writing code and experimenting with LLMs, and hopefully you’re here because you do too. I also enjoy DJing (but I’m badly out of practice), amateur electronic music production (\nvery\namateur) and losing myself in\nHacker News\n, nodding my head sagely to things I only half understand.\nI’m the co-founder and CTO of\nNebula.io\n. We’re applying AI to a field where it can make a massive, positive impact: 

In [32]:
def summarize_ollama(url, model):
    website = Website(url)
    response = ollama_via_openai.chat.completions.create(
        model = model,
        messages = messages_for(website)
    )
    return response.choices[0].message.content

# And now: call the OpenAI API. You will get very familiar with this!

def summarize_openai(url, model):
    website = Website(url)
    response = openai.chat.completions.create(
        model = model,
        messages = messages_for(website)
    )
    return response.choices[0].message.content

## Summarization using Llama3.2

In [25]:
summary = summarize_ollama("https://edwarddonner.com", 'llama3.2')
display(Markdown(summary))

### Website Summary: Edward Donner's Home Page

#### Welcome Message
An introduction by Ed, stating his interests in writing code, experimenting with Large Language Models (LLMs), DJing, and hobbies. He briefly shares about the co-founder and CTO role at Nebula.io and AI startup untapt.

#### Company Overview (About)
* Description of company Nebula.io, applying AI to help people discover their potential.
* Mention of proprietary LLMs, patented matching model, and award-winning platform.

#### Recent Announcements

* **FFL Launch** (not shown, text is navigation related)
* **LLM Workshop – Hands-on with Agents – resources**
	+ A workshop featuring agents in LLM workbench
* 
* 
* January 23, 2025: Upcoming event LLM Workshop – Hands-on with Agents – resources.
* December 21, 2024: Announcement for new forum called SuperDataScientists.
* November 13, 2024: Welcome announcement to subscribers for an exclusive online resource about mastering AI and LLM Engineering:
	+ Resources on how to work effectively with modern AI applications
* 

#### Social Media Links

## Summarization using Deepseek

In [26]:
import re

def extract_final_response(model_output):
    # Remove content within <think>...</think> tags
    final_response = re.sub(r'<think>.*?</think>\s*', '', model_output, flags=re.DOTALL)
    return final_response.strip()


In [30]:
summary = summarize_ollama("https://edwarddonner.com", 'deepseek-r1:7b')
summary = extract_final_response(summary)
display(Markdown(summary))

```markdown
# Edward Donner - Home

Welcome to my personal portfolio. My name is Edward Donner, and here's about me: I'm passionate about coding, experimenting with AI through LLMs, and have occasional ventures into DJing (though not very active right now) and electronic music production (also only amateur). I also occasionally find myself lost in Hacker News, where I nod sagely at half-understood concepts.

 Professionally, I coFounded and lead Neura.io, a company applying AI to help people discover their potential and pursue their purpose. We specialize in developing proprietary LLMs for talent interaction, with our matching model patented. Our platform is recognized for its innovative approach, backed by happy clients and considerable media coverage.

## Connect Four: An LLM Arena
This section describes an arena where Large Language Models (LLMs) engage in competitions, focusing on diplomacy and deviousness—a unique twist on competitive AI environments.

## Recent Posts from 2024
- **January 23, 2025**: Resources for hands-on learning with AI Agents.
- **December 21, 2024 (Welcome)**: Invites to the community of SuperData Scientists!
- **November 13, 2024**: Content on mastering AI and LLM Engineering.
- **October 16, 2024 (Personal Journey)**: Transition from Software Engineer to AI Data Scientist.

## Social Insights
- **LinkedIn** and **Twitter** for professional updates.
- **Facebook** for community engagement.
- **Newsletter Subscription**: Stay updated with our latest insights by subscribing. Find the email input field to subscribe.

Feel free to explore further or get in touch if you have questions!
```

## Summarization Using OpenAI

In [33]:
summary = summarize_openai("https://edwarddonner.com", 'gpt-4o-mini')
summary = extract_final_response(summary)
display(Markdown(summary))

# Summary of Edward Donner's Website

## About
Edward Donner is a developer and co-founder of Nebula.io, a company leveraging AI to enhance talent discovery and engagement. He has a background as the founder and CEO of the AI startup untapt, which was acquired in 2021. Edward enjoys experimenting with LLMs (Large Language Models), DJing, and electronic music production, while actively engaging with tech communities like Hacker News.

## Current Projects
- **Connect Four**: An initiative that features a competitive arena for LLMs, focusing on strategy and diplomacy.

## News and Announcements
- **January 23, 2025**: Announcement of an LLM Workshop focusing on hands-on experience with AI agents, along with associated resources.
- **December 21, 2024**: Welcome message to members of SuperDataScientists.
- **November 13, 2024**: Release of resources for "Mastering AI and LLM Engineering."
- **October 16, 2024**: Resources available for transitioning from a Software Engineer to an AI Data Scientist. 

## Connect
Visitors are encouraged to reach out through various platforms, including LinkedIn and Twitter.