In [29]:
# extract bookmarks to a named tuple from firefox bookmarks

In [52]:
import json
from collections import namedtuple
from datetime import datetime

# Define the BookmarkEntry named tuple
BookmarkEntry = namedtuple('BookmarkEntry', ['uri', 'title', 'last_modified'])

bookmarks_file = "bookmarks-2025-03-15.json"

def convert_timestamp_to_datetime(timestamp_us):
    """Convert a timestamp in microseconds to a datetime object."""
    return datetime.fromtimestamp(timestamp_us / 1_000_000.0)

# Function to extract and store all links from the bookmarks data
def extract_bookmarks(bookmark_node, bookmarks_list):
    if isinstance(bookmark_node, dict):
        if bookmark_node.get('type') == "text/x-moz-place":
            # This is a bookmark entry with a URL
            uri = bookmark_node.get('uri')
            if uri and uri.startswith('http'):
                title = bookmark_node.get('title')
                last_modified_us = bookmark_node.get('lastModified')
                last_modified_dt = convert_timestamp_to_datetime(last_modified_us)
                bookmarks_list.append(BookmarkEntry(uri=uri, title=title, last_modified=last_modified_dt))
        elif 'children' in bookmark_node:
            # This is a folder containing other bookmarks or folders
            for child in bookmark_node['children']:
                extract_bookmarks(child, bookmarks_list)

# Initialize the list to store the bookmarks
bookmarks = []

with open(bookmarks_file, 'r', encoding='utf-8') as f:
    file_data = json.load(f)

# Start extracting bookmarks from the root of the bookmarks data
extract_bookmarks(file_data, bookmarks)

# Print the number of extracted bookmarks
print(len(bookmarks))
print(bookmarks[:5])
# Optionally, print the first few bookmark entries to verify
for entry in bookmarks[:5]:
    print(entry)


1995
[BookmarkEntry(uri='https://www.reddit.com/r/WorldofTanks/comments/cxhh4w/na_only_september_scavenger_hunt_thread/', title='[NA ONLY] September Scavenger Hunt Thread : WorldofTanks', last_modified=datetime.datetime(2024, 4, 10, 14, 28, 13)), BookmarkEntry(uri='https://inara.cz/galaxy', title='INARA - Elite:Dangerous companion', last_modified=datetime.datetime(2024, 4, 10, 14, 28, 13)), BookmarkEntry(uri='https://www.reddit.com/r/WorldofTanks/comments/cxhh4w/na_only_september_scavenger_hunt_thread/', title='[NA ONLY] September Scavenger Hunt Thread : WorldofTanks', last_modified=datetime.datetime(2024, 4, 10, 14, 28, 13)), BookmarkEntry(uri='https://inara.cz/galaxy', title='INARA - Elite:Dangerous companion', last_modified=datetime.datetime(2024, 4, 10, 14, 28, 13)), BookmarkEntry(uri='https://www.arcadepunks.com/retro-pie-downloads-page-date-added/', title='Pi Images Downloads Page (Newest First) - Arcade Punks', last_modified=datetime.datetime(2024, 4, 10, 14, 28, 13))]
BookmarkE

In [28]:
# Snagging html content and converting to text

In [26]:
import primp

# Impersonate
client = primp.Client(impersonate="firefox_135", impersonate_os="linux")

# GET request
resp = client.get("https://mcgillij.dev")
print(resp.text_plain)
for chunk in resp.stream():
    print(chunk)

devoops
🔍About Me
🍃 AmdFan
🪵 Blog
🖳 Projects

Welcome to my corner of the internet

Enjoy your stay! You can find all the things I'm working on with regards to:
open source
hiking
travel
guitars, amps and effects
overlanding
drones
gaming (in linux and steamdeck)
Atiya my dog

Latest Blog Posts

What goes into an item in Atiya’s Quest

published on 📅 25/02/2025

Explore the intricate process of creating equipment in Atiya’s Quest, from effects and rarity to
unique names and descriptions.

| tags : Linux, Godot, GameDev, Atiya

Godot Patterns: Prefab

published on 📅 07/11/2024

In this post I’ll go over the Prefab pattern that I’ve been using in Atiya’s Quest, that allows for
dynamic content creation in Godot.

| tags : Linux, Python, GDScript, Godot, Patterns, Prefab

Godot Patterns: EventBus

published on 📅 06/11/2024

EventBus pattern in Godot

| tags : Linux, Python, GDScript, Godot, Patterns, EventBus

GDScript vs Python (from a Python Dev’s perspective)

published on 📅 04/11/2024


In [53]:
import sqlite3
from datetime import datetime

# Function to convert datetime objects to a string format for SQLite
def adapt_datetime(dt):
    return dt.isoformat()

# Register the adapter with sqlite3
sqlite3.register_adapter(datetime, adapt_datetime)

try:
    # Create or connect to an SQLite database
    conn = sqlite3.connect('bookmarks.db')
    cursor = conn.cursor()

    # Create the table if it doesn't exist
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS bookmarks (
        uri TEXT PRIMARY KEY,
        title TEXT NOT NULL,
        last_modified TIMESTAMP NOT NULL,
        content TEXT,
        checked BOOLEAN
    )
    ''')

    # Function to insert or update bookmark data in the database
    def insert_or_update_bookmark(bookmark, content=None, checked=False):
        if not bookmark.uri or not bookmark.title or not bookmark.last_modified:
            print(bookmark)
            raise ValueError("Bookmark URI, title, and last_modified must be provided.")
        
        cursor.execute('''
        INSERT OR REPLACE INTO bookmarks (uri, title, last_modified, content, checked)
        VALUES (?, ?, ?, ?, ?)
        ''', (bookmark.uri, bookmark.title, bookmark.last_modified, content, checked))
        conn.commit()

    # Insert all extracted bookmarks into the database
    for entry in bookmarks:
        insert_or_update_bookmark(entry)

    # Debugging function to print out a small bit of content from the database
    def debug_database():
        cursor.execute('SELECT uri, title, last_modified, SUBSTR(content, 1, 100) AS snippet, checked FROM bookmarks LIMIT 5')
        rows = cursor.fetchall()
        for row in rows:
            uri, title, last_modified, snippet, checked = row
            print(f"URI: {uri}")
            print(f"Title: {title}")
            print(f"Last Modified: {last_modified}")
            print(f"Content Snippet: {snippet}...")
            print(f"Checked: {checked}")
            print("-" * 80)

    # Call the debugging function to display some data
    debug_database()

except sqlite3.Error as e:
    print(f"SQLite error: {e}")
except ValueError as ve:
    print(f"Value Error: {ve}")
except Exception as ex:
    print(f"An unexpected error occurred: {ex}")

finally:
    # Ensure the database connection is closed
    if conn:
        conn.close()


URI: about:newtab
Title: New Tab
Last Modified: 2024-04-10T14:28:13
Content Snippet: None...
Checked: 0
--------------------------------------------------------------------------------
URI: about:home
Title: New Tab
Last Modified: 2024-04-10T14:28:13
Content Snippet: None...
Checked: 0
--------------------------------------------------------------------------------
URI: javascript:location.href='http://localhost/bookmarklet?uri='+encodeURIComponent(window.location.href)
Title:  Add to Miniflux 
Last Modified: 2024-04-10T14:28:13
Content Snippet: None...
Checked: 0
--------------------------------------------------------------------------------
URI: file:///home/j/DND/Dungeons/Rappan-Athuk-5e.pdf
Title: Rappan-Athuk-5e.pdf
Last Modified: 2024-04-10T14:28:13
Content Snippet: None...
Checked: 0
--------------------------------------------------------------------------------
URI: file:///home/j/DND/adventures/D_D%205e%20Storm%20King_s%20Thunder.pdf
Title: Storm Kings Thunder
Last Modified:

In [55]:


# Function to fetch text content from a URI using primp
def fetch_text_content(uri):
    if uri and uri.startswith('http'):
        client = primp.Client(impersonate="firefox_135", impersonate_os="linux")
        resp = client.get(uri)
        return resp.text_plain

# Function to update the database with fetched text content and set checked to True
def update_bookmarks_with_text_content():
    # Query for bookmarks where checked is False
    cursor.execute('SELECT uri, title, last_modified FROM bookmarks WHERE checked = 0')
    rows = cursor.fetchall()
    
    for row in rows:
        uri, title, last_modified = row
        bookmark = BookmarkEntry(uri=uri, title=title, last_modified=last_modified)
        #Bookmark(uri, title, last_modified)
        
        try:
            # Fetch text content
            text_content = fetch_text_content(uri)
            
            # Update the database with the fetched text content and set checked to True
            insert_or_update_bookmark(bookmark, content=text_content, checked=True)
            print(f"Updated bookmark for URI: {uri}")
        
        except Exception as e:
            print(f"Failed to fetch or update bookmark for URI: {uri}. Error: {e}")

try:
    conn = sqlite3.connect('bookmarks.db')
    cursor = conn.cursor()
    update_bookmarks_with_text_content()
except sqlite3.Error as e:
    print(f"SQLite error: {e}")
except ValueError as ve:
    print(f"Value Error: {ve}")
except Exception as ex:
    print(f"An unexpected error occurred: {ex}")

finally:
    # Ensure the database connection is closed
    if conn:
        conn.close()

In [56]:
def debug_database2():
        cursor.execute('SELECT uri, title, last_modified, SUBSTR(content, 1, 100) AS snippet, checked FROM bookmarks WHERE checked = 1 LIMIT 5')
        rows = cursor.fetchall()
        for row in rows:
            uri, title, last_modified, snippet, checked = row
            print(f"URI: {uri}")
            print(f"Title: {title}")
            print(f"Last Modified: {last_modified}")
            print(f"Content Snippet: {snippet}...")
            print(f"Checked: {checked}")
            print("-" * 80)
        else:
            print("no entries")

try:
    conn = sqlite3.connect('bookmarks.db')
    cursor = conn.cursor()
    debug_database2()
except sqlite3.Error as e:
    print(f"SQLite error: {e}")
except ValueError as ve:
    print(f"Value Error: {ve}")
except Exception as ex:
    print(f"An unexpected error occurred: {ex}")

finally:
    # Ensure the database connection is closed
    if conn:
        conn.close()

URI: https://www.reddit.com/r/WorldofTanks/comments/cxhh4w/na_only_september_scavenger_hunt_thread/
Title: [NA ONLY] September Scavenger Hunt Thread : WorldofTanks
Last Modified: 2024-04-10T14:28:13
Content Snippet: Skip to main content
Open menu Open navigation .snoo-cls-1 { fill: url(#snoo-radial-gragient) white;...
Checked: 1
--------------------------------------------------------------------------------
URI: https://inara.cz/galaxy
Title: INARA - Elite:Dangerous companion
Last Modified: 2024-04-10T14:28:13
Content Snippet: Main site logoElite:DangerousEliteStarfieldStarfieldKingdom Come 2KCD2MMO.RPG.SPACEM.R.S
Inara for E...
Checked: 1
--------------------------------------------------------------------------------
URI: https://bsky.app/
Title: B
Last Modified: 2025-01-21T00:36:24.038000
Content Snippet: <h1 lang="en">JavaScript Required</h1> <p lang="en">This is a heavily interactive web application,
a...
Checked: 1
----------------------------------------------------------------

In [57]:
# test out some chromadb stuff


import chromadb
chroma_client = chromadb.Client()

# switch `create_collection` to `get_or_create_collection` to avoid creating a new collection every time
collection = chroma_client.get_or_create_collection(name="my_collection")

# switch `add` to `upsert` to avoid adding the same documents every time
collection.upsert(
    documents=[
        "This is a document about pineapple",
        "This is a document about oranges"
    ],
    ids=["id1", "id2"]
)

results = collection.query(
    query_texts=["This is a query document about florida"], # Chroma will embed this for you
    n_results=2 # how many results to return
)

print(results)


/home/j/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx.tar.gz: 100%|█| 79.3M/79.3M 


{'ids': [['id2', 'id1']], 'embeddings': None, 'documents': [['This is a document about oranges', 'This is a document about pineapple']], 'uris': None, 'data': None, 'metadatas': [[None, None]], 'distances': [[1.1462138891220093, 1.3015382289886475]], 'included': [<IncludeEnum.distances: 'distances'>, <IncludeEnum.documents: 'documents'>, <IncludeEnum.metadatas: 'metadatas'>]}
