In [105]:
## RAG Hands-on tutorial
"""
Step 1: Vector Store
Step 2: Search the vector store and retrieve relevant documents
Step 3: Call LLM with the user query and the retrieved documents
Step 4: Return the LLM response to the user
"""


'\nStep 1: Vector Store\nStep 2: Search the vector store and retrieve relevant documents\nStep 3: Call LLM with the user query and the retrieved documents\nStep 4: Return the LLM response to the user\n'

In [106]:
import os
import json
import logging
import sys

from dotenv import load_dotenv
load_dotenv(override=True)

# create and configure logger
logging.basicConfig(level=logging.INFO, datefmt='%Y-%m-%dT%H:%M:%S',
                    format='%(asctime)-15s.%(msecs)03dZ %(levelname)-7s : %(name)s - %(message)s',
                    handlers=[logging.FileHandler("llm.log"), logging.StreamHandler(sys.stdout)])
# create log object with current module name
log = logging.getLogger(__name__)

In [112]:
%%sh
which python

python --version


/Users/minum/.pyenv/versions/3.12.0/bin/python
Python 3.12.0


In [None]:
%%sh
pip install -r requirements.txt

Collecting jupyter (from -r requirements.txt (line 1))
  Obtaining dependency information for jupyter from https://files.pythonhosted.org/packages/83/df/0f5dd132200728a86190397e1ea87cd76244e42d39ec5e88efd25b2abd7e/jupyter-1.0.0-py2.py3-none-any.whl.metadata
  Using cached jupyter-1.0.0-py2.py3-none-any.whl.metadata (995 bytes)
Collecting notebook (from jupyter->-r requirements.txt (line 1))
  Obtaining dependency information for notebook from https://files.pythonhosted.org/packages/32/b4/b0cdaf52c35a3a40633136bee5152d6670acb555c698d23a3458dca65781/notebook-7.2.1-py3-none-any.whl.metadata
  Downloading notebook-7.2.1-py3-none-any.whl.metadata (10 kB)
Collecting qtconsole (from jupyter->-r requirements.txt (line 1))
  Obtaining dependency information for qtconsole from https://files.pythonhosted.org/packages/f2/3f/de5e5eb44900c1ed1c1567bc505e3b6e6f4c01cf29e558bf2f8cee29af5b/qtconsole-5.5.2-py3-none-any.whl.metadata
  Downloading qtconsole-5.5.2-py3-none-any.whl.metadata (5.1 kB)
Collecti

Collecting argon2-cffi-bindings (from argon2-cffi>=21.1->jupyter-server<3,>=2.4.0->notebook->jupyter->-r requirements.txt (line 1))
  Obtaining dependency information for argon2-cffi-bindings from https://files.pythonhosted.org/packages/5a/e4/bf8034d25edaa495da3c8a3405627d2e35758e44ff6eaa7948092646fdcc/argon2_cffi_bindings-21.2.0-cp38-abi3-macosx_10_9_universal2.whl.metadata
  Using cached argon2_cffi_bindings-21.2.0-cp38-abi3-macosx_10_9_universal2.whl.metadata (6.7 kB)
Collecting jsonschema-specifications>=2023.03.6 (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.27.1->notebook->jupyter->-r requirements.txt (line 1))
  Obtaining dependency information for jsonschema-specifications>=2023.03.6 from https://files.pythonhosted.org/packages/ee/07/44bd408781594c4d0a027666ef27fab1e441b109dc3b76b4f836f8fd04fe/jsonschema_specifications-2023.12.1-py3-none-any.whl.metadata
  Using cached jsonschema_specifications-2023.12.1-py3-none-any.whl.metadata (3.0 kB)
Collecting referencing>=0.28.4 (fro


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [None]:
# data loaders
from langchain_community.document_loaders import CSVLoader, DataFrameLoader, PyPDFLoader, Docx2txtLoader, UnstructuredRSTLoader, DirectoryLoader


class DataLoaders:
    """
    specify all data loaders here
    """
    def __init__(self, data_dir_path):
        self.data_dir_path = data_dir_path
    
    def csv_loader(self):
        csv_loader_kwargs = {
                            "csv_args":{
                                "delimiter": ",",
                                "quotechar": '"',
                                },
                            }
        dir_csv_loader = DirectoryLoader(self.data_dir_path, glob="**/*.csv", use_multithreading=True,
                                    loader_cls=CSVLoader, 
                                    loader_kwargs=csv_loader_kwargs,
                                    )
        return dir_csv_loader
    
    def pdf_loader(self):
        dir_pdf_loader = DirectoryLoader(self.data_dir_path, glob="**/*.pdf",
                                    loader_cls=PyPDFLoader,
                                    )
        return dir_pdf_loader
    
    def word_loader(self):
        dir_word_loader = DirectoryLoader(self.data_dir_path, glob="**/*.docx",
                                    loader_cls=Docx2txtLoader,
                                    )
        return dir_word_loader
    
    def rst_loader(self):
        rst_loader_kwargs = {
                        "mode":"single"
                        }
        dir_rst_loader = DirectoryLoader(self.data_dir_path, glob="**/*.rst",
                                    loader_cls=UnstructuredRSTLoader, 
                                    loader_kwargs=rst_loader_kwargs
                                    )
        return dir_rst_loader
    
    
    def get_text_metadatas(csv_data=None, pdf_data=None, word_data=None, rst_data=None):
        """
        Format text and metadata content
        """
        csv_texts = [doc.page_content for doc in csv_data]
        csv_metadatas = [{'source': doc.metadata['source'], 'row_page': doc.metadata['row']} for doc in csv_data] # metadata={'source': 'filename.csv', 'row': 0}
        pdf_texts = [doc.page_content for doc in pdf_data]
        pdf_metadatas = [{'source': doc.metadata['source'], 'row_page': doc.metadata['page']} for doc in pdf_data]  # metadata={'source': 'data/filename.pdf', 'page': 8}
        word_texts = [doc.page_content for doc in word_data]
        word_metadatas = [{'source': doc.metadata['source'], 'row_page': ''} for doc in word_data] 
        rst_texts = [doc.page_content for doc in rst_data]
        rst_metadatas = [{'source': doc.metadata['source'], 'row_page': ''} for doc in rst_data]         # metadata={'source': 'docs/images/architecture/index.rst'}
        
        for doc in pdf_data:
            print(doc)
            break

        texts = csv_texts + pdf_texts + word_texts + rst_texts
        metadatas = csv_metadatas + pdf_metadatas + word_metadatas + rst_metadatas
        return texts, metadatas

In [None]:
# load data
data_dir_path = os.getenv('DATA_DIR_PATH', "data")
data_loader = DataLoaders(data_dir_path=data_dir_path)
log.info("Loading files from directory %s", data_dir_path)
dir_csv_loader = data_loader.csv_loader()
dir_word_loader = data_loader.word_loader()
dir_pdf_loader = data_loader.pdf_loader()
dir_rst_loader = data_loader.rst_loader()
csv_data = dir_csv_loader.load()
word_data = dir_word_loader.load()
pdf_data = dir_pdf_loader.load()
rst_data = dir_rst_loader.load()
texts , metadatas = DataLoaders.get_text_metadatas(csv_data, pdf_data, word_data, rst_data)






In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from typing import List

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=1000,
        chunk_overlap=200,
        separators=[
            "\n\n", "\n", ". ", " ", ""
        ]  # try to split on paragraphs... fallback to sentences, then chars, ensure we always fit in context window
    )

docs: List[Document] = text_splitter.create_documents(texts=texts, metadatas=metadatas)


In [None]:
print(docs[0])
print(len(docs))


page_content='Delta Login Methods

Direct Access Login Nodes

Direct access to the Delta login nodes is via SSH using your NCSA
username, password, and NCSA Duo MFA. See the NCSA
Allocation and Account Management page for links to NCSA Identity
and NCSA Duo services. The login nodes provide access to the CPU and GPU
resources on Delta.

See NCSA
Allocation and Account Management for the steps to change your NCSA
password for direct access and set up NCSA Duo.

For ACCESS awarded projects, to find your local NCSA username go to
your ACCESS Profile
page and scroll to the bottom for the Resource Provider Site
Usernames table. If you do not know your NCSA username, submit
a support request (help) for assistance.


In January 2024, Delta was upgraded to Slingshot11. Please use the
round robin login, login.delta.ncsa.illinois.edu, to SSH into the
system. For single host SSH, use dt-login03.delta.ncsa.illinois.edu or
dt-login04.delta.ncsa.illinois.edu. See the ACCESS Delta Notice:
Delta maint

In [None]:
from langchain_community.vectorstores import Qdrant
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()
collection_name = os.getenv('QDRANT_COLLECTION_NAME', "data-collection")

# create vector Store
vectorstore = Qdrant.from_documents(
    documents=docs,
    embedding=embeddings,
    location=":memory:",  # Local mode with in-memory storage only
    collection_name=collection_name,
    )

2024-08-05T11:34:49.587Z INFO    : httpx - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-08-05T11:34:50.420Z INFO    : httpx - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [None]:
from langchain import hub
from langchain_openai import ChatOpenAI

# Retrieve and generate using the relevant snippets
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")
llm: ChatOpenAI = ChatOpenAI(
            temperature=0,
            model="gpt-4o",
            max_retries=500,
        )

In [None]:
from langchain_community.llms import Ollama

ollama_api_key = os.getenv('OLLAMA_API_KEY')
ollama_headers = {"Authorization": f"Bearer {ollama_api_key}"}
ollamallm: Ollama = Ollama(
    base_url="https://sd-gpu.ncsa.illinois.edu/ollama",
    model="llama3:latest",
    headers=ollama_headers,
    )

In [None]:
ollamallm.invoke("What is the capital of the world?")

'There is no single "capital of the world." The concept of a capital city typically refers to the seat of government for a country, state, or province. Each country has its own capital city, and there is no one city that serves as the capital of the entire world.\n\nHowever, there are a few cities that have been referred to as the "capital of the world" in various contexts:\n\n* The United Nations (UN) headquarters in New York City, USA, is often referred to as the "capital of the world" because it is the site where world leaders gather to discuss global issues and make important decisions.\n* Geneva, Switzerland has been called the "capital of the world" due to its long history as a hub for international diplomacy, human rights organizations, and humanitarian work.\n* Brussels, Belgium serves as the capital of the European Union (EU) and is often referred to as the "capital of Europe" because it is home to many EU institutions and agencies.\n\nIt\'s worth noting that these cities are 

In [None]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

openai_rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

ollama_rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | ollamallm
    | StrOutputParser()
)

In [104]:
openai_rag_chain.invoke("What is Delta?")



2024-08-05T16:22:32.612Z INFO    : httpx - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


'Delta appears to be a high-performance computing (HPC) system designed to help applications transition from CPU-only to GPU or hybrid CPU-GPU codes. It features multiple node types, including CPU-only and GPU nodes, with specific architectural details such as AMD CPUs, NVIDIA A100/A40 GPUs, and a HPE/Cray Slingshot interconnect.'

In [16]:
ollama_rag_chain.invoke("Who is the president of USA?")

2024-08-05T11:39:43.576Z INFO    : httpx - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-08-05T11:39:44.388Z INFO    : httpx - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


"I don't know."

In [17]:
## adding sources
from langchain_core.runnables import RunnableParallel

openai_rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    | prompt
    | llm
    | StrOutputParser()
)

openai_rag_chain_with_source = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
).assign(answer=openai_rag_chain_from_docs)

In [18]:
openai_rag_chain_with_source.invoke("What is Delta?")

2024-08-05T11:40:01.412Z INFO    : httpx - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-08-05T11:40:04.526Z INFO    : httpx - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


{'context': [Document(metadata={'source': 'docs_copy/acknowledge.rst', 'row_page': '', '_id': '51435aea3cdb48cfaf92d1d19ebeb420', '_collection_name': 'delta-collection'}, page_content='Acknowledging Delta\n\nSee Delta\nCitations for information on how to properly acknowledge the NCSA\nDelta system or Delta Project/NCSA Staff.\n\nSee Acknowledging\nACCESS for information on how to properly acknowledge ACCESS.'),
  Document(metadata={'source': 'docs_copy/architecture.rst', 'row_page': '', '_id': 'ae4cc6f4c73740afb16a4b4b35da2abd', '_collection_name': 'delta-collection'}, page_content='System Architecture\n\nDelta is designed to help applications transition from CPU-only to\nGPU or hybrid CPU-GPU codes. Delta has some important architectural\nfeatures to facilitate new discovery and insight:\n\nA single processor architecture (AMD) across all node types: CPU and\nGPU\n\nSupport for NVIDIA A100 MIG GPU partitioning, allowing for\nfractional use of the A100s if your workload is not able to 