In [None]:
# Step 1: Install All Dependencies and Setup Ollama

print("Step 1.1: Installing libraries...")
# PyPDF is needed for the PDF loader
!pip install -q langchain langchain-community langchain-huggingface faiss-cpu pandas kaggle pypdf

print("\nStep 1.2: Installing and starting Ollama...")
# Install Ollama using the official script
!curl -fsSL https://ollama.com/install.sh | sh

# Run Ollama in the background
!nohup ollama serve > ollama.log 2>&1 &

# Give the server a moment to start
import time
time.sleep(5)

print("\nStep 1.3: Pulling the 'mistral' model (this may take 5-10 minutes)...")
!ollama pull mistral

print("\n✅ Full Setup Complete! You are ready for the next step.")

Step 1.1: Installing libraries...
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m94.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m67.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m310.5/310.5 kB[0m [31m27.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m444.0/444.0 kB[0m [31m37.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.7/64.7 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires requests==2.32.4, but you have request

In [None]:
pdf_file_path = "drive/MyDrive/Colab Notebooks/mental_health_at_work-7-133.pdf" # <-- CHANGE THIS

print(f"Using your uploaded PDF: {pdf_file_path}")

Using your uploaded PDF: drive/MyDrive/Colab Notebooks/mental_health_at_work-7-133.pdf


In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings

print("Step 3.1: Loading the PDF document...")
# The PyPDFLoader loads the PDF page by page into a list of Document objects.
loader = PyPDFLoader(pdf_file_path)
documents = loader.load()
print(f"Successfully loaded {len(documents)} pages from the PDF.")

print("\nStep 3.2: Splitting the document into smaller chunks...")
# RAG works best with smaller, focused chunks of text.
# RecursiveCharacterTextSplitter is good at keeping related text together.
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
split_documents = text_splitter.split_documents(documents)
print(f"Split the document into {len(split_documents)} chunks.")
print(split_documents[0])

print("\nStep 3.3: Loading the embedding model...")
# Use a local, open-source embedding model for this.
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

print("\nStep 3.4: Creating the vector store (knowledge base)...")
# This step creates the embeddings for each chunk and stores them in FAISS.
# This can take a moment depending on the size of the PDF.
vector_store = FAISS.from_documents(split_documents, embeddings)
print("\n✅ Vector store created successfully!")

Step 3.1: Loading the PDF document...
Successfully loaded 127 pages from the PDF.

Step 3.2: Splitting the document into smaller chunks...
Split the document into 366 chunks.
page_content='Soumya Swaminathan
WHO Chief Scientist
             v
Foreword
Working people, like all people, deserve an inherent right to the highest attainable standard of mental health at work, regardless 
of their type of employment. And people living with mental health conditions have a right to access, participate and thrive in 
work. Governments and employers have a responsibility to uphold that right by providing work that simultaneously prevents 
workers from experiencing excessive stress and mental health risks; protects and promotes workers’ mental health and well-
being; and supports people to fully and effectively participate in the workforce, free from stigma, discrimination or abuse.
Yet the world of work is changing. Across the globe, technology, globalization, demographic shifts, emergencies and c

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]


Step 3.4: Creating the vector store (knowledge base)...

✅ Vector store created successfully!


In [None]:
from langchain.chains import RetrievalQA
from langchain_community.chat_models import ChatOllama
from langchain.prompts import PromptTemplate

print("Step 4.1: Building the QA Chain that returns sources...")

# Instantiate our local LLM
llm = ChatOllama(model="mistral", temperature=0)

# Create a retriever from our vector store
retriever = vector_store.as_retriever()

# Create a custom prompt template to ensure the LLM knows what to do
prompt_template = """
Use the following pieces of context to answer the question at the end.
If you don't know the answer from the context, just say that you don't know, don't try to make up an answer.
Be concise and helpful.

Context: {context}

Question: {question}

Helpful Answer:"""
QA_PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

# Create the RetrievalQA chain, but this time, configure it to return the source documents
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,  # <-- This is the crucial parameter!
    chain_type_kwargs={"prompt": QA_PROMPT}
)

print("✅ QA Chain is ready and will now return source documents.")

import textwrap

def ask_and_evaluate(question, chain):
    print(f"-> Question: {question}")

    # The result is now a dictionary containing the answer and the source documents
    result = chain.invoke(question)

    print("\n-> LLM Answer:")
    print(textwrap.fill(result['result'], width=80))

    print("\n" + "="*50)
    print("-> Source Documents Used:")

    for i, doc in enumerate(result['source_documents']):
        print(f"\n--- Source {i+1} ---")
        print(textwrap.fill(doc.page_content, width=80))
        # You can also print metadata like the page number
        if 'page' in doc.metadata:
            print(f"\nPage Number: {doc.metadata['page']}")

    print("="*50)

ask_and_evaluate("I feel tired after work, and don't want to go to office. Do I have mental health problem?", qa_chain)

Step 4.1: Building the QA Chain that returns sources...
✅ QA Chain is ready and will now return source documents.
-> Question: I feel tired after work, and don't want to go to office. Do I have mental health problem?

-> LLM Answer:
 While feeling tired after work can be a common experience, it doesn't
necessarily indicate a mental health problem. However, if this persists over
time or is accompanied by other symptoms such as difficulty concentrating,
changes in appetite or sleep patterns, feelings of anxiety or depression, it
might be worth discussing with a healthcare professional to rule out any
underlying issues.

-> Source Documents Used:

--- Source 1 ---
Recommendations for  returning to work after  absence associated with  mental
health conditions © WHO / Lindsay Mackenzie

Page Number: 66

--- Source 2 ---
14 Guidelines on mental health at work Overall, no outcomes were reported for
positive mental  health, quality of life and functioning, suicidal behaviours,
substance use an

In [None]:
import os
import subprocess

# Check if the Ollama server process is running
try:
    result = subprocess.check_output("ps -ef | grep '[o]llama serve'", shell=True)
    print("Ollama server is already running.")
    print(result.decode('utf-8'))
except subprocess.CalledProcessError:
    # The command returns a non-zero exit code if no process is found
    print("Ollama server not found. Starting it now...")

    # Run Ollama in the background using nohup
    # This is the same command from our setup cell
    !nohup ollama serve > ollama.log 2>&1 &

    # Give the server a few seconds to start up
    import time
    time.sleep(5)
    print(" Ollama server has been started.")