# Import Libraries

In [1]:
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_mongodb import MongoDBAtlasVectorSearch
from pymongo import MongoClient
from langchain_core.prompts import MessagesPlaceholder
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
from langchain_google_genai import (
    ChatGoogleGenerativeAI,
    HarmBlockThreshold,
    HarmCategory,
)
import os
from langchain.schema.runnable import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from dotenv import load_dotenv
load_dotenv()

True

# LLM Model

In [2]:
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro-latest",
    safety_settings={
        HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
    },
)

In [3]:
# testing
llm.invoke("what is your name")

AIMessage(content="I am a large language model, trained by Google.  I don't have a name.\n", additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': []}, id='run-b8fb7380-8c3a-4890-804e-28e5118113bc-0', usage_metadata={'input_tokens': 5, 'output_tokens': 21, 'total_tokens': 26, 'input_token_details': {'cache_read': 0}})

# Retriever

In [16]:
def vector_store(model_name,embedding_dim,cluster_uri,db_name,collection_name):
    embeddings = SentenceTransformerEmbeddings(model_name=model_name)

    # initialize MongoDB python client
    client = MongoClient(cluster_uri)
    ATLAS_VECTOR_SEARCH_INDEX_NAME = "langchain-test-index-vectorstores"

    MONGODB_COLLECTION = client[db_name][collection_name]

    vector_store = MongoDBAtlasVectorSearch(
        collection=MONGODB_COLLECTION,
        embedding=embeddings,
        index_name=ATLAS_VECTOR_SEARCH_INDEX_NAME,
        relevance_score_fn="cosine",
    )

    # Create vector search index on the collection
    # Since we are using the default OpenAI embedding model (ada-v2) we need to specify the dimensions as 1536
    #vector_store.create_vector_search_index(dimensions=embedding_dim)

    return vector_store

In [17]:
model_name = "all-MiniLM-L6-v2"
embedding_dim = 384
cluster_uri =os.getenv("CLUSTER_URL")
db_name = "langchain"
collection_name = "vector"


In [18]:
vector_store = vector_store(model_name=model_name,embedding_dim=embedding_dim,cluster_uri=cluster_uri,db_name=db_name,collection_name=collection_name)
vector_store.similarity_search("when to eat salad",k=3)

[Document(metadata={'_id': 'd3e3bfe9-edf9-4e04-a3dc-84999f759dde', 'source': 'https://www.everydayhealth.com/diet-nutrition-pictures/best-salad-greens-for-your-health.aspx', 'head': 'Healthy Recipes', 'title': 'Ranking_14_Top_Salad_Greens_From_Best_to_Worst'}, page_content='9. RomainePortion:\xa01 cup shreddedCalories:\xa05Carbs:\xa01 gFiber:\xa00.05 gProtein:\xa00.05 gWhy It’s Healthy\xa0Romaine lettuce’s dark green color, long leaves, and crunchy texture make it a very popular salad base. Two cups of romaine bring about 30 percent of your daily vitamin A, and nearly three-quarters of your vitamin K.[18]Meal Prep Inspo“Romaine lettuce has a great crunch and is delicious served in a salad or on top of a sandwich,” says Kennedy. “Romaine can even be lightly grilled for a unique, slightly charred flavor.”To boost the nutritional value of your salad, mix romaine with some spinach or kale to pack in more antioxidants, or opt for a premixed blend. “Prepackaged salad green mixtures offer a w

In [19]:
retriever = vector_store.as_retriever(search_kwargs={"k": 2})

In [20]:
def docs2str(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# History Aware Chat Prompt Template

In [21]:
contextualize_q_system_prompt = """
Given a chat history and the latest user question
which might reference context in the chat history,
formulate a standalone question which can be understood
without the chat history. Do NOT answer the question,
just reformulate it if needed and otherwise return it as is.
"""

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

In [22]:
from langchain_core.messages import HumanMessage, AIMessage
chat_history = []

In [23]:
system_prompt = (
    "You are an health doctor A.I who provide medical advices for queries. "
    "communicate as a real doctor"
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use five sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [24]:
response = rag_chain.invoke({"input":"symptoms of brest cancer?","chat_history":chat_history,"context":history_aware_retriever})

In [25]:
response["answer"]

'Breast cancer symptoms can include a lump or mass, breast or nipple pain, nipple retraction (turning inward), and skin changes like dimpling, redness, flakiness, or thickening.  You may also experience nipple discharge or swollen lymph nodes.  If you notice any of these changes, please consult a doctor immediately for proper evaluation and diagnosis. Early detection is crucial for successful treatment.\n'

In [34]:
response["context"][0].metadata

{'head': '',
 'source': 'https://www.everydayhealth.com/breast-cancer/guide/',
 'title': 'What_Is_Breast_Cancer_Symptoms_Causes_Diagnosis_Treatment_and_Prevention'}

In [None]:
import sqlite3
from datetime import datetime
import uuid

DB_NAME = "rag_app.db"

def get_db_connection():
    conn = sqlite3.connect(DB_NAME)
    conn.row_factory = sqlite3.Row
    return conn

def create_application_logs():
    conn = get_db_connection()
    conn.execute('''CREATE TABLE IF NOT EXISTS application_logs
    (id INTEGER PRIMARY KEY AUTOINCREMENT,
    session_id TEXT,
    user_query TEXT,
    gpt_response TEXT,
    model TEXT,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP)''')
    conn.close()

def insert_application_logs(session_id, user_query, gpt_response, model):
    conn = get_db_connection()
    conn.execute('INSERT INTO application_logs (session_id, user_query, gpt_response, model) VALUES (?, ?, ?, ?)',
                 (session_id, user_query, gpt_response, model))
    conn.commit()
    conn.close()

def get_chat_history(session_id):
    conn = get_db_connection()
    cursor = conn.cursor()
    cursor.execute('SELECT user_query, gpt_response FROM application_logs WHERE session_id = ? ORDER BY created_at', (session_id,))
    messages = []
    for row in cursor.fetchall():
        messages.extend([
            {"role": "human", "content": row['user_query']},
            {"role": "ai", "content": row['gpt_response']}
        ])
    conn.close()
    return messages

# Initialize the database
create_application_logs()