In [1]:
# extract bookmarks to a named tuple from firefox bookmarks

In [2]:
import json
from collections import namedtuple
from datetime import datetime

# Define the BookmarkEntry named tuple
BookmarkEntry = namedtuple('BookmarkEntry', ['uri', 'title', 'last_modified'])

bookmarks_file = "bookmarks-2025-03-15.json"

def convert_timestamp_to_datetime(timestamp_us):
    """Convert a timestamp in microseconds to a datetime object."""
    return datetime.fromtimestamp(timestamp_us / 1_000_000.0)

# Function to extract and store all links from the bookmarks data
def extract_bookmarks(bookmark_node, bookmarks_list):
    if isinstance(bookmark_node, dict):
        if bookmark_node.get('type') == "text/x-moz-place":
            # This is a bookmark entry with a URL
            uri = bookmark_node.get('uri')
            if uri and uri.startswith('http'):
                title = bookmark_node.get('title')
                last_modified_us = bookmark_node.get('lastModified')
                last_modified_dt = convert_timestamp_to_datetime(last_modified_us)
                bookmarks_list.append(BookmarkEntry(uri=uri, title=title, last_modified=last_modified_dt))
        elif 'children' in bookmark_node:
            # This is a folder containing other bookmarks or folders
            for child in bookmark_node['children']:
                extract_bookmarks(child, bookmarks_list)

# Initialize the list to store the bookmarks
bookmarks = []

with open(bookmarks_file, 'r', encoding='utf-8') as f:
    file_data = json.load(f)

# Start extracting bookmarks from the root of the bookmarks data
extract_bookmarks(file_data, bookmarks)

# Print the number of extracted bookmarks
print(len(bookmarks))
print(bookmarks[:5])
# Optionally, print the first few bookmark entries to verify
for entry in bookmarks[:5]:
    print(entry)

# limit to 20 entries while testing
bookmarks = bookmarks[:20]

1994
[BookmarkEntry(uri='https://www.reddit.com/r/WorldofTanks/comments/cxhh4w/na_only_september_scavenger_hunt_thread/', title='[NA ONLY] September Scavenger Hunt Thread : WorldofTanks', last_modified=datetime.datetime(2024, 4, 10, 14, 28, 13)), BookmarkEntry(uri='https://inara.cz/galaxy', title='INARA - Elite:Dangerous companion', last_modified=datetime.datetime(2024, 4, 10, 14, 28, 13)), BookmarkEntry(uri='https://www.reddit.com/r/WorldofTanks/comments/cxhh4w/na_only_september_scavenger_hunt_thread/', title='[NA ONLY] September Scavenger Hunt Thread : WorldofTanks', last_modified=datetime.datetime(2024, 4, 10, 14, 28, 13)), BookmarkEntry(uri='https://inara.cz/galaxy', title='INARA - Elite:Dangerous companion', last_modified=datetime.datetime(2024, 4, 10, 14, 28, 13)), BookmarkEntry(uri='https://www.arcadepunks.com/retro-pie-downloads-page-date-added/', title='Pi Images Downloads Page (Newest First) - Arcade Punks', last_modified=datetime.datetime(2024, 4, 10, 14, 28, 13))]
BookmarkE

In [3]:
# Snagging html content and converting to text

In [4]:
import primp

# Impersonate
client = primp.Client(impersonate="firefox_135", impersonate_os="linux")

# GET request
resp = client.get("https://mcgillij.dev")
print(resp.text_plain)
for chunk in resp.stream():
    print(chunk)

devoops
🔍About Me
🍃 AmdFan
🪵 Blog
🖳 Projects

Welcome to my corner of the internet

Enjoy your stay! You can find all the things I'm working on with regards to:
open source
hiking
travel
guitars, amps and effects
overlanding
drones
gaming (in linux and steamdeck)
Atiya my dog

Latest Blog Posts

What goes into an item in Atiya’s Quest

published on 📅 25/02/2025

Explore the intricate process of creating equipment in Atiya’s Quest, from effects and rarity to
unique names and descriptions.

| tags : Linux, Godot, GameDev, Atiya

Godot Patterns: Prefab

published on 📅 07/11/2024

In this post I’ll go over the Prefab pattern that I’ve been using in Atiya’s Quest, that allows for
dynamic content creation in Godot.

| tags : Linux, Python, GDScript, Godot, Patterns, Prefab

Godot Patterns: EventBus

published on 📅 06/11/2024

EventBus pattern in Godot

| tags : Linux, Python, GDScript, Godot, Patterns, EventBus

GDScript vs Python (from a Python Dev’s perspective)

published on 📅 04/11/2024


In [5]:
import sqlite3
from datetime import datetime

# Function to convert datetime objects to a string format for SQLite
def adapt_datetime(dt):
    return dt.isoformat()

# Register the adapter with sqlite3
sqlite3.register_adapter(datetime, adapt_datetime)

try:
    # Create or connect to an SQLite database
    conn = sqlite3.connect('bookmarks.db')
    cursor = conn.cursor()

    # Create the table if it doesn't exist
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS bookmarks (
        uri TEXT PRIMARY KEY,
        title TEXT NOT NULL,
        last_modified TIMESTAMP NOT NULL,
        content TEXT,
        checked BOOLEAN
    )
    ''')

    def insert_or_update_bookmark(bookmark, content=None, checked=False):
        if not bookmark.uri or not bookmark.title or not bookmark.last_modified:
            print(bookmark)
            raise ValueError("Bookmark URI, title, and last_modified must be provided.")
        
        # Check if the bookmark already exists in the database
        cursor.execute('SELECT * FROM bookmarks WHERE uri = ?', (bookmark.uri,))
        existing_bookmark = cursor.fetchone()
        
        if existing_bookmark and checked:
            # do noting
            cursor.execute('''
                UPDATE bookmarks 
                SET title = ?, last_modified = ?, content = ?, checked = ?
                WHERE uri = ?
                ''', (bookmark.title, bookmark.last_modified, content, checked, bookmark.uri))

        elif existing_bookmark and not checked:
            pass
        else:
            # Insert a new bookmark
            print("New value found inserting")
            cursor.execute('''
            INSERT INTO bookmarks (uri, title, last_modified, content, checked)
            VALUES (?, ?, ?, ?, ?)
            ''', (bookmark.uri, bookmark.title, bookmark.last_modified, content, checked))
        
        conn.commit()

    # Insert all extracted bookmarks into the database
    for entry in bookmarks:
        insert_or_update_bookmark(entry)

    # Debugging function to print out a small bit of content from the database
    def debug_database():
        cursor.execute('SELECT uri, title, last_modified, SUBSTR(content, 1, 100) AS snippet, checked FROM bookmarks LIMIT 5')
        rows = cursor.fetchall()
        print(f"{len(rows)=}")
        for row in rows:
            uri, title, last_modified, snippet, checked = row
            print(f"URI: {uri}")
            print(f"Title: {title}")
            print(f"Last Modified: {last_modified}")
            print(f"Content Snippet: {snippet}...")
            print(f"Checked: {checked}")
            print("-" * 80)

    # Call the debugging function to display some data
    debug_database()

except sqlite3.Error as e:
    print(f"SQLite error: {e}")
except ValueError as ve:
    print(f"Value Error: {ve}")
except Exception as ex:
    print(f"An unexpected error occurred: {ex}")

finally:
    # Ensure the database connection is closed
    if conn:
        conn.close()


len(rows)=5
URI: https://www.reddit.com/r/WorldofTanks/comments/cxhh4w/na_only_september_scavenger_hunt_thread/
Title: [NA ONLY] September Scavenger Hunt Thread : WorldofTanks
Last Modified: 2024-04-10T14:28:13
Content Snippet: Skip to main content
Open menu Open navigation .snoo-cls-1 { fill: url(#snoo-radial-gragient) white;...
Checked: 1
--------------------------------------------------------------------------------
URI: https://inara.cz/galaxy
Title: INARA - Elite:Dangerous companion
Last Modified: 2024-04-10T14:28:13
Content Snippet: Main site logoElite:DangerousEliteStarfieldStarfieldKingdom Come 2KCD2MMO.RPG.SPACEM.R.S
Inara for E...
Checked: 1
--------------------------------------------------------------------------------
URI: https://bsky.app/
Title: B
Last Modified: 2025-01-21T00:36:24.038000
Content Snippet: <h1 lang="en">JavaScript Required</h1> <p lang="en">This is a heavily interactive web application,
a...
Checked: 1
----------------------------------------------------

In [7]:
# Function to fetch text content from a URI using primp
def fetch_text_content(uri):
    if uri and uri.startswith('http'):
        client = primp.Client(impersonate="firefox_135", impersonate_os="linux")
        resp = client.get(uri)
        return resp.text_plain

# Function to update the database with fetched text content and set checked to True
def update_bookmarks_with_text_content():
    # Query for bookmarks where checked is False
    cursor.execute('SELECT uri, title, last_modified FROM bookmarks WHERE checked = 0 LIMIT 5')
    rows = cursor.fetchall()
    
    for row in rows:
        uri, title, last_modified = row
        bookmark = BookmarkEntry(uri=uri, title=title, last_modified=last_modified)
        #Bookmark(uri, title, last_modified)
        
        try:
            # Fetch text content
            text_content = fetch_text_content(uri)
            
            # Update the database with the fetched text content and set checked to True
            insert_or_update_bookmark(bookmark, content=text_content, checked=True)
            print(f"Updated bookmark for URI: {uri}")
        
        except Exception as e:
            print(f"Failed to fetch or update bookmark for URI: {uri}. Error: {e}")

try:
    conn = sqlite3.connect('bookmarks.db')
    cursor = conn.cursor()
    update_bookmarks_with_text_content()
except sqlite3.Error as e:
    print(f"SQLite error: {e}")
except ValueError as ve:
    print(f"Value Error: {ve}")
except Exception as ex:
    print(f"An unexpected error occurred: {ex}")

finally:
    # Ensure the database connection is closed
    if conn:
        conn.close()

Updated bookmark for URI: https://tankrewards.com/en/
Failed to fetch or update bookmark for URI: https://volknn.ru/fire/#/RE4GD. Error: error sending request for url (https://volknn.ru/fire/#/RE4GD): client error (Connect)

Caused by:
    0: client error (Connect)
    1: tcp connect error: Connection refused (os error 111)
    2: Connection refused (os error 111)
Failed to fetch or update bookmark for URI: https://www.saltyjedi.com/obj-279e. Error: error sending request for url (https://www.saltyjedi.com/obj-279e): client error (Connect)

Caused by:
    0: client error (Connect)
    1: dns error: failed to lookup address information: Name or service not known
    2: failed to lookup address information: Name or service not known
Failed to fetch or update bookmark for URI: https://thetrove.net/Books/Shadow%20of%20the%20Demon%20Lord/index.html. Error: error sending request for url (https://thetrove.net/Books/Shadow%20of%20the%20Demon%20Lord/index.html): client error (Connect)

Caused by

In [8]:
def debug_database2():
        cursor.execute('SELECT uri, title, last_modified, SUBSTR(content, 1, 100) AS snippet, checked FROM bookmarks WHERE checked = 1 LIMIT 5')
        rows = cursor.fetchall()
        for row in rows:
            uri, title, last_modified, snippet, checked = row
            print(f"URI: {uri}")
            print(f"Title: {title}")
            print(f"Last Modified: {last_modified}")
            print(f"Content Snippet: {snippet}...")
            print(f"Checked: {checked}")
            print("-" * 80)
        else:
            print("no entries")

try:
    conn = sqlite3.connect('bookmarks.db')
    cursor = conn.cursor()
    debug_database2()
except sqlite3.Error as e:
    print(f"SQLite error: {e}")
except ValueError as ve:
    print(f"Value Error: {ve}")
except Exception as ex:
    print(f"An unexpected error occurred: {ex}")

finally:
    # Ensure the database connection is closed
    if conn:
        conn.close()

URI: https://www.reddit.com/r/WorldofTanks/comments/cxhh4w/na_only_september_scavenger_hunt_thread/
Title: [NA ONLY] September Scavenger Hunt Thread : WorldofTanks
Last Modified: 2024-04-10T14:28:13
Content Snippet: Skip to main content
Open menu Open navigation .snoo-cls-1 { fill: url(#snoo-radial-gragient) white;...
Checked: 1
--------------------------------------------------------------------------------
URI: https://inara.cz/galaxy
Title: INARA - Elite:Dangerous companion
Last Modified: 2024-04-10T14:28:13
Content Snippet: Main site logoElite:DangerousEliteStarfieldStarfieldKingdom Come 2KCD2MMO.RPG.SPACEM.R.S
Inara for E...
Checked: 1
--------------------------------------------------------------------------------
URI: https://bsky.app/
Title: B
Last Modified: 2025-01-21T00:36:24.038000
Content Snippet: <h1 lang="en">JavaScript Required</h1> <p lang="en">This is a heavily interactive web application,
a...
Checked: 1
----------------------------------------------------------------

In [13]:
def get_content():
        cursor.execute('SELECT title, uri, content FROM bookmarks WHERE checked = 1')
        rows = cursor.fetchall()
        return rows
        
content = []
try:
    conn = sqlite3.connect('bookmarks.db')
    cursor = conn.cursor()
    content = get_content()
except sqlite3.Error as e:
    print(f"SQLite error: {e}")
except ValueError as ve:
    print(f"Value Error: {ve}")
except Exception as ex:
    print(f"An unexpected error occurred: {ex}")

finally:
    # Ensure the database connection is closed
    if conn:
        conn.close()
        
#print(content[:1])
content_list = []
for i in content:
    title, uri, c = i
    #print(f"{title=}, {uri=}, {c=}")
    content_list.append(f"{title=}, {uri=}, {c=}")

id_array = [f'id{i+1}' for i in range(len(content_list))]
print(id_array)
print(len(content_list))

['id1', 'id2', 'id3', 'id4', 'id5', 'id6', 'id7', 'id8', 'id9', 'id10', 'id11', 'id12', 'id13', 'id14', 'id15', 'id16', 'id17', 'id18', 'id19', 'id20', 'id21', 'id22', 'id23', 'id24', 'id25', 'id26', 'id27', 'id28', 'id29', 'id30', 'id31', 'id32', 'id33', 'id34', 'id35', 'id36', 'id37', 'id38', 'id39', 'id40', 'id41', 'id42', 'id43', 'id44', 'id45', 'id46', 'id47', 'id48', 'id49', 'id50', 'id51', 'id52', 'id53', 'id54', 'id55', 'id56', 'id57', 'id58', 'id59', 'id60', 'id61', 'id62', 'id63', 'id64', 'id65', 'id66', 'id67', 'id68', 'id69', 'id70', 'id71', 'id72', 'id73', 'id74', 'id75', 'id76', 'id77', 'id78', 'id79', 'id80', 'id81', 'id82', 'id83', 'id84', 'id85', 'id86', 'id87', 'id88', 'id89', 'id90', 'id91', 'id92', 'id93', 'id94', 'id95', 'id96', 'id97', 'id98', 'id99', 'id100', 'id101', 'id102', 'id103', 'id104', 'id105', 'id106', 'id107', 'id108', 'id109', 'id110', 'id111', 'id112', 'id113', 'id114', 'id115', 'id116', 'id117', 'id118', 'id119', 'id120', 'id121', 'id122', 'id123', 

In [24]:
# test out some chromadb stuff


import chromadb

embedding_directory = "./content/chroma_db"

chroma_client = chromadb.PersistentClient(path=embedding_directory)

# switch `create_collection` to `get_or_create_collection` to avoid creating a new collection every time
collection = chroma_client.get_or_create_collection(name="my_collection")

# switch `add` to `upsert` to avoid adding the same documents every time
collection.upsert(
    documents=content_list,
    ids=id_array
)

results = collection.query(
    query_texts=["how to write some python"], # Chroma will embed this for you
    n_results=2 # how many results to return
)
from pprint import pprint
pprint(results)



{'data': None,
 'distances': [[0.8909375459945122, 0.9188441239116446]],
 'documents': [["title='Understanding all of Python, through its builtins', "
                "uri='https://sadh.life/post/builtins/', "
                "c='TS\\nRSS\\n\\nSunday, 10 October 2021\\n\\nUnderstanding "
                'all of Python, through its builtins\\n\\nPython has a whole '
                'lot of builtins that are unknown to most people. This guide '
                'aims to introduce you\\nto everything that Python has to '
                'offer, through its seemingly obscure '
                'builtins.\\n\\nUnderstanding all of Python, through its '
                'builtins\\n\\n@tusharsadhwani\\n\\nPython as a language is '
                'comparatively simple. And I believe, that you can learn quite '
                'a lot about\\nPython and its features, just by learning what '
                'all of its builtins are, and what they do. And to\\nback up '
                "that claim,

In [17]:
import torch

In [16]:
print(torch.cuda.is_available())

True


In [23]:
from langchain.llms import OpenAI
from langchain import PromptTemplate, LLMChain
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.schema import prompt
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
import textwrap
import gradio
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores import Chroma
import os

os.environ["TOKENIZERS_PARALLELISM"] = "true"
os.environ["OPENAI_API_KEY"] = "dummy-key"


temperature = 0.1 # Use a value between 0 and 2. Lower = factual, higher = creative
n_gpu_layers = 43  # Change this value based on your model and your GPU VRAM pool.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.

# Make sure the model path is correct for your system!
llm = OpenAI(
       openai_api_base='http://192.168.2.35:1234',
       openai_api_key='dummy-key'
       )

## Follow the default prompt style from the OpenOrca-Platypus2 huggingface model card.

def get_prompt():
  return """Use the following Context information to answer the user's question. If you don't know the answer, just say that you don't know, don't try to make up an answer.
### Instruction:
Context: {context}
User Question: {question}
###
Response:
"""

def wrap_text_preserve_newlines(text, width=110):
    # Split the input text into lines based on newline characters
    lines = text.split('\n')

    # Wrap each line individually
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]

    # Join the wrapped lines back together using newline characters
    wrapped_text = '\n'.join(wrapped_lines)

    return wrapped_text

def process_llm_response(llm_response):
    if not llm_response:
      return "Please enter a question"
    print(wrap_text_preserve_newlines(llm_response['result']))
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])
    response = llm_response['result']
    response = response.split("### Response")[0]
    return response

def startChat():

  # embedding_directory = "./content/chroma_db"
  # embedding_model=HuggingFaceBgeEmbeddings(model_name='BAAI/bge-base-en', model_kwargs={'device':'cpu'})
  # embedding_db = Chroma(persist_directory=embedding_directory, embedding_function=embedding_model)
    vector_store_from_client = Chroma(
        client=chroma_client,
        collection_name="my_collection",
    )
    prompt_template = get_prompt()
    
    llama_prompt = PromptTemplate(
       template=prompt_template, input_variables=["context", "question"]
    )
    
    chain_type_kwargs = {"prompt": llama_prompt}
    
    
    retriever = vector_store_from_client.as_retriever(search_type="mmr", search_kwargs={'k': 5})
    
    # create the chain to answer questions
    qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                       chain_type="stuff",
                                       retriever=retriever,
                                       chain_type_kwargs=chain_type_kwargs,
                                       return_source_documents=True)
    
    def runChain(query, history):
        return process_llm_response(qa_chain(query))
    
    app = gradio.ChatInterface(runChain)
    app.queue()
    app.launch(share=False, debug=True)
