<a href="https://colab.research.google.com/github/maarishkhan/PathwayPal/blob/main/AI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# PathwayPal

In [4]:
!pip install streamlit
!pip install langchain
!pip install langchain_community

Collecting streamlit
  Downloading streamlit-1.35.0-py2.py3-none-any.whl (8.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.6/8.6 MB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m
Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit)
  Downloading GitPython-3.1.43-py3-none-any.whl (207 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m21.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m53.9 MB/s[0m eta [36m0:00:00[0m
Collecting watchdog>=2.1.5 (from streamlit)
  Downloading watchdog-4.0.1-py3-none-manylinux2014_x86_64.whl (83 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m83.0/83.0 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.19,<4,>=3.0.7->streamlit)
  Downloading gitdb-4.0.

In [5]:
import requests
import json
import streamlit as st
from langchain.llms import Ollama
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OllamaEmbeddings
from langchain.document_loaders import PDFPlumberLoader
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain.prompts import PromptTemplate
from langchain.schema import HumanMessage, AIMessage
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains.history_aware_retriever import create_history_aware_retriever
import os

# Define the API endpoint and headers
url = "http://localhost:11434/api/generate"
headers = {
    'Content-Type': 'application/json',
}

# Initialize conversation history with the initial prompt
initial_prompt = (
    "As a Humanitarian Aid Worker, your primary goal is to assist refugees by addressing their questions promptly and clearly. "
    "Regardless of the language in which the queries are posed, strive to respond in the same language for effective communication and understanding. "
    "Offer concise and professional answers to ensure the refugees receive the support they need in a timely manner. "
    "Remember, your responses should be both informative and empathetic to meet the refugees' diverse needs and situations. "
    "Remember, your role is crucial in providing essential support and information to refugees in need. "
    "Your professionalism and compassion can make a significant difference in their lives."
)
conversation_history = []

chat_history = []
folder_path = "data"
cached_llm = Ollama(model="gemma")
embedding = OllamaEmbeddings()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1024, chunk_overlap=80, length_function=len, is_separator_regex=False
)

raw_prompt = PromptTemplate.from_template(
    """
    <s>[INST] As a Humanitarian Aid Worker, your primary goal is to assist refugees by addressing their questions
    promptly and clearly. Regardless of the language in which the queries are posed, strive to respond in the same
    language for effective communication and understanding. Offer concise and professional answers to ensure the
    refugees receive the support they need in a timely manner. Remember, your responses should be both informative and
    empathetic to meet the refugees' diverse needs and situations.
    Remember, your role is crucial in providing essential support and information to refugees in need. Your
    professionalism and compassion can make a significant difference in their lives.
    [INST] {input}
           Context: {context}
           Answer:
    [/INST]
    """
)


# Load PDF data and create the Chroma vector store at startup
def load_pdf_data():
    vector_store = None
    if os.path.exists(folder_path):
        docs = []
        for filename in os.listdir(folder_path):
            if filename.endswith(".pdf"):
                file_path = os.path.join(folder_path, filename)
                loader = PDFPlumberLoader(file_path)
                docs.extend(loader.load_and_split())

        if docs:
            chunks = text_splitter.split_documents(docs)
            vector_store = Chroma.from_documents(
                documents=chunks, embedding=embedding, persist_directory=folder_path
            )
            vector_store.persist()
    return vector_store


vector_store = load_pdf_data()


def generate_response(prompt):
    """Generate a response from the LangChain model based on the given prompt."""
    try:
        # Append the initial prompt if the conversation is new
        if not conversation_history:
            conversation_history.append(initial_prompt)

        # Append the user's prompt to the conversation history
        conversation_history.append(prompt)
        full_prompt = "\n".join(conversation_history)

        # Prepare the data payload
        data = {
            "model": "gemma",
            "stream": True,
            "prompt": full_prompt,
        }

        # Make a POST request to the API
        response = requests.post(url, headers=headers, data=json.dumps(data))

        # Check if the request was successful
        if response.status_code == 200:
            response_data = response.json()
            actual_response = response_data.get("response", "")
            conversation_history.append(actual_response)
            return actual_response
        else:
            st.error(f"Error: {response.status_code} - {response.text}")
            return None
    except Exception as e:
        st.error(f"An error occurred: {e}")
        return None


def handle_query(question):
    """Handle the query using LangChain model."""
    responses = cached_llm.predict(question)
    return responses


def handle_pdf_query(queries):
    """Handle the query using the PDF data."""
    retriever = vector_store.as_retriever(
        search_type="similarity_score_threshold",
        search_kwargs={"k": 20, "score_threshold": 0.1},
    )

    retriever_prompt = ChatPromptTemplate.from_messages(
        [
            MessagesPlaceholder(variable_name="chat_history"),
            ("human", "{input}"),
            (
                "human",
                "Given the above conversation, generate a search query to lookup in order to get information relevant "
                "to the conversation",
            ),
        ]
    )

    history_aware_retriever = create_history_aware_retriever(
        llm=cached_llm, retriever=retriever, prompt=retriever_prompt
    )

    document_chain = create_stuff_documents_chain(cached_llm, raw_prompt)
    retrieval_chain = create_retrieval_chain(history_aware_retriever, document_chain)
    result = retrieval_chain.predict({"input": queries})

    chat_history.append(HumanMessage(content=queries))
    chat_history.append(AIMessage(content=result["answer"]))

    sources = [{"source": doc.metadata["source"], "page_content": doc.page_content} for doc in result["context"]]
    return {"answer": result["answer"], "sources": sources}


def main():
    """Main function to run the Streamlit app."""
    st.title("Pathway Pal")

    # Input for the user's prompt
    prompt = st.text_input("Enter your prompt here...", "")

    # Button to generate the response
    if st.button("Generate Response"):
        if prompt:
            # Generate response from LangChain model
            response = generate_response(prompt)
            if response:
                st.text_area("Response", value=response, height=200)

            # Generate response from PDF data
            if vector_store:
                pdf_response = handle_pdf_query(prompt)
                st.write("PDF Sources:")
                st.write(pdf_response)
        else:
            st.warning("Please enter a prompt.")
    else:
        st.write("Awaiting your prompt...")


if __name__ == "__main__":
    main()

2024-06-09 02:47:14.139 
  command:

    streamlit run /usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2024-06-09 02:47:14.146 Session state does not function when running a script without `streamlit run`


In [None]:
!streamlit run /usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://35.199.180.204:8501[0m
[0m
