In [1]:
import chromadb
import ollama
from pyzotero import zotero
from tqdm import tqdm
import os
import subprocess
from pushover import Client

# Yet Another RAG system (YARAGS)

## Setup

In [2]:
# Chromadb
chroma_client = chromadb.PersistentClient()
collection = chroma_client.get_or_create_collection("annotations")
# Backup "all-my-documents"

In [3]:
zot_client = zotero.Zotero(
        library_id=os.environ.get("ZOTERO_USER_ID"),
        library_type="user",
        api_key=os.environ.get("ZOTERO_API_KEY"),
        local=False
    )

## Import annotations from Zotero to Chromadb

When inserting into Chromadb, automatic tokenization, embedding, and indexing.

In [10]:
annotations = zot_client.everything(zot_client.items(itemType="annotation"))

In [11]:
for annotation in tqdm(annotations):
    annot = annotation['data']
    if 'annotationText' in annot:
        content = annot['annotationText']
        collection.add(
            documents=[content],
            metadatas=[{"parent": annot['parentItem']}],
            ids=[annot["key"]],
        )

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 932/932 [02:27<00:00,  6.31it/s]


## Query most similar results

In [4]:
QUERY = "Auscultation is not reliable"

In [16]:
results = collection.query(
    query_texts=[QUERY],
    n_results=20
)

TypeError: string indices must be integers

In [6]:
for i, key in enumerate(results['ids'][0]):
    parent = results['metadatas'][0][i]['parent']
    text = results['documents'][0][i]
    print(f"\nzotero://open-pdf/library/items/{parent}?annotation={key.ljust(40)}: {results['distances'][0][i]}")
    print(text)


zotero://open-pdf/library/items/Z25QYFM3?annotation=5AMIITIM                                : 0.9542385339736938
Based on this review, the diagnostic utility of auscultation is unclear and medical doctors should not rely too much on auscultation alone.

zotero://open-pdf/library/items/5CPSK3X2?annotation=HVB9655E                                : 1.0933171510696411
Characteristics and Performance of Independent Clinical Auscultators

zotero://open-pdf/library/items/EACS73U9?annotation=WIFTLZTA                                : 1.1441478729248047
Our findings show that auscultation is good for finding AS, including mild cases.

zotero://open-pdf/library/items/EACS73U9?annotation=DDDSMWUN                                : 1.1681032180786133

zotero://open-pdf/library/items/Z25QYFM3?annotation=BLIGZHRH                                : 1.201717734336853
Sensitivity of auscultation ranged from 30% to 100%, and specificity ranged from 28% to 100%. LRs ranged from 1.35 to 26.


## LLM

In [7]:
# We make sure our LLM is running

#MODEL = "phi3:mini"
MODEL = "gpt-oss:20b"
process = subprocess.Popen(["ollama", "run", MODEL])

In [8]:
context=""
for key, text in zip(results['ids'][0], results['documents'][0]):
    context = context + f"{text} [{key}]\n"
print(context)

Based on this review, the diagnostic utility of auscultation is unclear and medical doctors should not rely too much on auscultation alone. [5AMIITIM]
Characteristics and Performance of Independent Clinical Auscultators [HVB9655E]
Our findings show that auscultation is good for finding AS, including mild cases. [WIFTLZTA]
Sensitivity of auscultation ranged from 30% to 100%, and specificity ranged from 28% to 100%. LRs ranged from 1.35 to 26. [BLIGZHRH]



In [None]:
response = ollama.chat(model=MODEL, messages=[
        {"role": "system",
         "content": "You are a helpful scientific assistant." +
                    "Using the context provided, answer the user's question." +
                    "Make sure to insert the references (in square braces) found in the context appropriately."
        },
        {"role": "user", "content": f"Context:\n{context}\nUser query: Write a short paragraph on this topic: {QUERY}"}
    ])
response_text = response['message']['content']
print(response_text)

### Bonus

In [12]:
client = Client(os.environ.get("PUSHOVER_USER_KEY"), api_token=os.environ.get("PUSHOVER_API_TOKEN"))
client.send_message(response_text, title="Done!")

<pushover.MessageRequest at 0x128d088ada0>