In [2]:
%pip install --q unstructured langchain
%pip install --q "unstructured[all-docs]"
!ollama pull nomic-embed-text
!ollama list
%pip install --q chromadb
%pip install --q langchain-text-splitters

Note: you may need to restart the kernel to use updated packages.


In [1]:
from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_community.document_loaders import OnlinePDFLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever

In [None]:
local_path = "books\Fundamentals-of-Psychological-Disorders.pdf"

# Local PDF file uploads
if local_path:
  loader = UnstructuredPDFLoader(file_path=local_path)
  data = loader.load()
else:
  print("Upload a PDF file")

In [None]:
# Preview first page
data[0].page_content



In [None]:
# Split and chunk 
text_splitter = RecursiveCharacterTextSplitter(chunk_size=7500, chunk_overlap=100)
chunks = text_splitter.split_documents(data)

In [None]:
# Add to vector database
vector_db = Chroma.from_documents(
    documents=chunks, 
    embedding=OllamaEmbeddings(model="nomic-embed-text",show_progress=True),
    collection_name="local-rag"
)

OllamaEmbeddings: 100%|██████████| 129/129 [05:08<00:00,  2.39s/it]


## Retrieval

In [None]:
# LLM from Ollama
#local_model = "mistral"
local_model='phi3'
llm = ChatOllama(model=local_model)

In [None]:
QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate five
    different versions of the given user question to retrieve relevant documents from
    a vector database. By generating multiple perspectives on the user question, your
    goal is to help the user overcome some of the limitations of the distance-based
    similarity search. Provide these alternative questions separated by newlines.
    Original question: {question}""",
)

In [None]:
retriever = MultiQueryRetriever.from_llm(
    vector_db.as_retriever(), 
    llm,
    prompt=QUERY_PROMPT
)

# RAG prompt
template = """Answer the question based ONLY on the following context:
{context}
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

In [None]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
chain.invoke(input(""))

OllamaEmbeddings: 100%|██████████| 1/1 [00:06<00:00,  6.85s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.10s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.11s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.11s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.09s/it]


'The book "Fundamentals of Psychological Disorders" provides a comprehensive overview of various psychological disorders, including Autism Spectrum Disorder (ASD). Chapter 8 specifically focuses on ASD and details the clinical presentation of the disorder. It explains that ASD is characterized by two main features: impairments in social communication and interaction, as well as restricted, repetitive patterns of behavior, interests, or activities. The book covers both Criterion A (social communication) and Criterion B (restricted/repetitive behaviors), offering insights into the diagnostic process for ASD by outlining specific symptoms under each criterion. This chapter also delves into early diagnosis and highlights why individuals with varying levels of intellectual functioning, such as those with exceptionally high or low IQ, might have their symptoms overlooked. For more in-depth information on Intellectual Developmental Disorder (IDD) and specific learning disorders related to rea

In [None]:
chain.invoke("What is Epidemiology?")

OllamaEmbeddings: 100%|██████████| 1/1 [00:06<00:00,  6.40s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.09s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.08s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.10s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.12s/it]


"Clinical assessment refers to the process of gathering information about a patient through various methods such as observation, psychological tests, neurological evaluations, and interviews. Reliability in this context relates to the consistency with which these methods produce stable and consistent results over time, while validity ensures that the assessments accurately measure what they are intended to measure. Standardization is crucial as it provides a uniform approach to administering tests and evaluations, thereby minimizing variability in how each patient's data is collected.\n\nSection 3.1 Review Questions\n\n1. Clinical assessment involves multiple methods that have their own strengths (e.g., objectivity of psychological testing) and limitations (e.g., subjective interpretation of interviews). Reliability ensures consistent results across repeated tests, whereas validity guarantees the assessment measures accurately. Standardization allows for uniform administration, critica

In [None]:
chain.invoke(
    "explain clinical disorder in 20 words or less")

OllamaEmbeddings: 100%|██████████| 1/1 [00:05<00:00,  5.77s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.07s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.12s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.12s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.10s/it]


'A clinical disorder is a psychological condition diagnosed through symptoms, causing distress and impairment.'

In [None]:
from typing import List
from fastapi import FastAPI
from langchain.llms import Ollama
from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain.prompts import PromptTemplate
from langserve import add_routes
import uvicorn

llama2 = Ollama(model="mistral")
template = PromptTemplate.from_template("Tell me a joke about {topic}.")
chain = template | llama2 | CommaSeparatedListOutputParser()

app = FastAPI(title="LangChain", version="1.0", description="The first server ever!")
add_routes(app, chain, path="/chain")

if __name__ == "__main__":
    uvicorn.run(app, host="localhost", port=8000)

In [None]:
# Delete all collections in the db
vector_db.delete_collection()