In [None]:
# !pip install langchain
# !pip install pypdf
# !pip install yt_dlp
# !pip install pydub
# !pip install chromadb
# !pip install langchain_openai
# !pip install sentence_transformers

In [1]:
import os

import shutil

In [2]:
# Load dependencies
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.llms import HuggingFacePipeline, HuggingFaceHub
from transformers import AutoTokenizer, pipeline, logging
from openai import OpenAI
from langchain.chains import RetrievalQA, RetrievalQAWithSourcesChain
from langchain.retrievers.self_query.base import SelfQueryRetriever

In [None]:

# Load the protocol and cut it up into chunks
loader = PyPDFLoader("TITRE-protocol.pdf")
pages = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
splits = text_splitter.split_documents(pages)
print(len(splits))

In [None]:
# Embed the chunks and store in Chromadb
model_name = "BAAI/bge-base-en"
encode_kwargs = {'normalize_embeddings': True}
embedding_function = HuggingFaceBgeEmbeddings(model_name=model_name,
                                              encode_kwargs=encode_kwargs)
persist_directory = 'docs/chroma/'
# if os.path.exists("docs/chroma/") and os.path.isdir(persist_directory):
#     shutil.rmtree("docs/")
#     print(f"The directory {persist_directory} has been deleted.")
# else:
#     print(f"The directory {persist_directory} did not exist.")
# persist_directory = 'docs/chroma/'
vectordb = Chroma.from_documents(documents=splits,
                                 embedding=embedding_function,
                                 persist_directory=persist_directory)


In [25]:
# Set up my local LLM, which is running on the M3 laptop.
llm = OpenAI(base_url="http://192.168.1.157:1234/v1/", api_key="not-needed")
# llm = OpenAI(base_url="http://192.168.1.157:11434", api_key="not-needed")

In [13]:
def getRagData(query:str, k=5):
    rag_return = vectordb.similarity_search(query, k=k)
    prompt = ""
    for r in rag_return:
        prompt = prompt + r.page_content
    prompt = query + "\n" + prompt
    return prompt

In [22]:
def askQuestion(query: str, k=5):
    prompt = getRagData(query,k)
    completion = llm.chat.completions.create(
        model="local-model", # this field is currently unused
        messages=[  
            {"role": "system", "content": "You are an expert helpful assistant."},   
            {"role": "user", "content": prompt}
        ], 
        temperature=0.01
    )
    print(completion.choices[0].message.content)

In [32]:
import requests
import json

def askQuestionOllama(query: str, k=5, model="deepseek-r1:8b"):
    prompt = getRagData(query, k)
    
    # Ollama API endpoint
    url = "http://192.168.1.157:11434/api/generate"
    
    # Prepare the payload according to Ollama API
    payload = {
        "model": model,
        "prompt": prompt,
        "system": "You are an expert helpful assistant.",
        "stream": False,
        "temperature": 0.01
    }
    
    headers = {"Content-Type": "application/json"}
    
    try:
        response = requests.post(url, headers=headers, data=json.dumps(payload))
        response.raise_for_status()
        
        result = response.json()
        
        # Ollama returns the response in the "response" field
        if "response" in result:
            print(result["response"])
            return result["response"]
        else:
            print(f"Unexpected response structure: {result}")
            return None
    except Exception as e:
        print(f"Error during Ollama API call: {e}")
        import traceback
        traceback.print_exc()
        return None

In [9]:
def askQuestion(query: str, k=5):
    prompt = getRagData(query, k)
    try:
        completion = llm.chat.completions.create(
            model="meta-llama-3.1-8b-instruct",
            messages=[
                {"role": "system", "content": "You are an expert helpful assistant."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.01
        )
        print(completion.choices[0].message.content)
    except Exception as e:
        print(f"Error during API call: {e}")

In [None]:
askQuestion("Describe the inclusion and exclusion criteria of the study.",15)

In [None]:
askQuestion("Please write a detailed checklist for starting up and activating a clinical site for the study.")


In [None]:
!curl -v http://192.168.1.157:11434

In [None]:
import requests
import json

url = "http://192.168.1.157:1234/v1/chat/completions"
payload = {
    "model": "meta-llama-3.1-8b-instruct",
    "messages": [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Hello, how are you?"}
    ],
    "temperature": 0.01
}
headers = {"Content-Type": "application/json"}

try:
    response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=5)
    response.raise_for_status()
    print(response.json())
except requests.exceptions.RequestException as e:
    print(f"Error: {e}")

In [None]:
askQuestionOllama("Please write a descriptive summary of the study, including the background, significance, and reasons for doing the study.")

In [None]:
from openai import OpenAI
import logging

# Enable debugging
logging.basicConfig(level=logging.DEBUG)

# Initialize the OpenAI client
llm = OpenAI(base_url="http://192.168.1.157:1234/v1", api_key="not-needed")

def getRagData(query: str, k=5):
    rag_return = vectordb.similarity_search(query, k=k)  # Assuming vectordb is defined
    prompt = ""
    for r in rag_return:
        prompt += r.page_content
    prompt = query + "\n" + prompt
    return prompt

def askQuestion(query: str, k=5):
    prompt = getRagData(query, k)
    try:
        completion = llm.chat.completions.create(
            model="meta-llama-3.1-8b-instruct",
            messages=[
                {"role": "system", "content": "You are an expert helpful assistant."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.01
        )
        print(completion.choices[0].message.content)
    except Exception as e:
        print(f"Error during API call: {e}")

# Run the function
askQuestion("Describe the inclusion and exclusion criteria of the study.", 15)

In [None]:
askQuestion("What are the primary and secondary endpoints of the study. What are the main hypotheses?")


In [None]:
askQuestion("What are the risks and benefits of participating in the study?")


In [None]:
askQuestion("What are the potential adverse events and how are these defined? ")


In [None]:
askQuestionOllama("List all the visits in the study")

In [None]:
askQuestion("List all the visits in the study")