In [None]:
%pip install -r requiremnets.txt

In [None]:
%pip install einops

In [None]:
# Set paths in your notebook
model_path = r"E:/CSE299/chatbot/llm"
embedding_save_path = r"E:/CSE299/chatbot/Embedding"
pdfs_path = r"E:/CSE299/chatbot/docs"

In [None]:
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_community.embeddings.ollama import OllamaEmbeddings
from langchain_community.llms.ollama import Ollama
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


In [None]:
import subprocess
import os
# Function to stream output of the subprocess
def stream_output(process):
    for line in process.stdout:
        print(line, end='')  # Output is already a string, so no need to decode

# Function to start Ollama
def start_ollama():
    try:
        # Redirect stdout and stderr to os.devnull
        with open(os.devnull, 'w') as devnull:
            ollama_process = subprocess.Popen(
                ["ollama", "serve"], 
                stdout=devnull,  # Discard stdout
                stderr=devnull,  # Discard stderr
                shell=True       # Required for Windows
            )
            print("Ollama is starting...", flush=True)
            return ollama_process
    except Exception as e:
        print(f"Error starting Ollama: {e}", flush=True)
        return None



# When Backend starts
ollama_process = start_ollama()

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings


def get_embedding_model(model_name, model_kwargs, path):
    encode_kwargs = {'normalize_embeddings': True}

    # Initialize HuggingFaceEmbeddings with model name and kwargs
    hf = HuggingFaceEmbeddings(
        model_name=model_name,
        model_kwargs=model_kwargs,
        encode_kwargs=encode_kwargs,
        cache_folder=path
    )

    return hf

In [None]:
# @title get Ollama model

from langchain_community.llms import Ollama

def get_ollama_model(model):
    llm = Ollama(model=model)
    return llm


In [None]:
# import os
from transformers import AutoModel


embedding_model_save_path = r"E:\CSE299\chatbot\llm\baai"



embedding_model_name = "BAAI/bge-small-en-v1.5"
model_kwargs = {'device': 'cpu', 'trust_remote_code': True}

# Assuming get_embedding_model is defined to support a 'path' argument for saving locally
embedding = get_embedding_model(embedding_model_name, model_kwargs, path=embedding_model_save_path)

In [None]:
from langchain.vectorstores import Chroma
embedding_vector_db_path = r"E:\CSE299\chatbot\Embedding\baai\recursive"

vectorstore = Chroma(persist_directory=embedding_vector_db_path, embedding_function=embedding)

retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

In [None]:
NEW_COT_PROMPT_WITH_HISTORY = """
Here's the conversation so far:
{history}

If any information from the previous conversation matches the user's current question or provides useful background, feel free to incorporate it into your response. 

1. Identify the type of question based on the context and prior conversation:
   - For greetings or casual questions, respond with a friendly, concise message.
   - For straightforward information requests, provide a direct answer.
   - For complex or multi-step questions, break down each part and answer carefully.

2. Evaluate the current question:
   Question: {question}
   Context: {context}

3. Use relevant context to support accuracy and clarity in your response if applicable.

Begin crafting your response:
"""

In [None]:
llm = get_ollama_model("qwen2.5:1.5b")

In [None]:
from langchain.memory import ConversationEntityMemory, ConversationBufferMemory

entity_memory = ConversationEntityMemory(
    llm=llm,
    human_prefix="User",
    ai_prefix="Chatbot",
    memory_key="entities",
    return_messages=True,
)

# Initialize conversation buffer memory
conversation_buffer = ConversationBufferMemory(
    human_prefix="User",
    ai_prefix="Chatbot",
    memory_key="history",
    return_messages=True,
)

In [None]:
# Define the prompt template
PROMPT_TEMPLATE = """
Use the following context, conversation history, and extracted entities to answer the question clearly:
Context:
{context}

Conversation History:
{history}

Entities:
{entities}

Question:
{question}

Answer in simple and detailed terms.
"""

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableMap, RunnableSequence

# llm = get_ollama_model("gemma2:2b")

prompt = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)

# Define a function to extract memory outputs
def extract_memory_data(user_input):
    history = conversation_buffer.load_memory_variables({}).get("history", "")
    entities = entity_memory.load_memory_variables({}).get("entities", "")
    context = retriever.get_relevant_documents(user_input)
    return {
        "context": context,
        "history": history,
        "entities": entities,
        "question": user_input,
    }


# # Build the RAG retrieval chain
# retrieval_chain = RunnableSequence(
#     steps=[
#         RunnableMap(extract_memory_data),  # Prepare inputs for the chain
#         prompt,
#         llm,
#         StrOutputParser(),
#     ]
# )

# for chunk in retrieval_chain.stream("explain newtons first law with example"):
#     print(chunk, end="", flush=True)

In [None]:

# Terminal chat function
def chat():

    
    print("Chatbot: Hello! Ask me anything. Type 'exit' to end the chat.")
    while True:
        user_input = input("You: ")
        if user_input.lower() == "exit":
            print("Chatbot: Goodbye!")
            break
          
        # Prepare the input data
        try:
            input = f"You: {user_input}"  # Replace with your 
            print(f"{input}")
            inputs = extract_memory_data(user_input)
            # Build the retrieval chain using RunnablePassthrough
            retrieval_chain = (
                RunnablePassthrough()  # Pass the inputs directly through
                | PROMPT_TEMPLATE  # Format the prompt with the inputs
                | llm  # Invoke the model with the formatted prompt
                | StrOutputParser()  # Parse the model's output into a string
            )
            
            response = retrieval_chain.invoke(inputs)
            print(response)
            conversation_buffer.save_context({"input": user_input}, {"output": response})
        except Exception as e:
            print(f"Error: {e}")

if __name__ == "__main__":
    chat()