# Chatbot with document and chat history
This script demonstrates how to create a chatbot that retrieves information from a web document using LangChain and FAISS.


In [11]:
import os
import sys
import numpy as np
from dotenv import load_dotenv


current_path = os.getcwd()
parrent_path = os.path.abspath(os.path.join(current_path, '..'))
sys.path.append(parrent_path)

from Update_Git import git_add, git_commit, git_push

file_path = os.path.join(current_path, 'Test_5.ipynb')
git_add(file_path)
git_commit('Updated test 5')
git_push('main')

os.environ.pop("OPENAI_API_KEY", None) # Because it loads a key from some place I dont know!
env_path = os.path.join(current_path, ".env")
load_dotenv(dotenv_path=env_path)

True

In [10]:
# LangChain Core
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.output_parsers import StrOutputParser, CommaSeparatedListOutputParser, JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.messages import HumanMessage, AIMessage

# LangChain OpenAI
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

# LangChain Chains
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

# LangChain Community
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores.faiss import FAISS

# LangChain Utilities
from langchain.text_splitter import RecursiveCharacterTextSplitter


# Function to load and split documents from a given URL
def document_loader(url):
    loader = WebBaseLoader(url)  # Load the document from the web
    docs = loader.load()  # Extract the content
    
    # Split the document into smaller chunks for processing
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=400,  # Each chunk has a max size of 200 characters
        chunk_overlap=50  # Overlapping of 50 characters between chunks for context continuity
    )
    split_docs = splitter.split_documents(docs)  # Split the documents

    return split_docs  # Return the processed document chunks

# Function to create a FAISS vector database
def create_db(docs):
    embedding = OpenAIEmbeddings()  # Generate embeddings using OpenAI
    vector_store = FAISS.from_documents(docs, embedding=embedding)  # Store documents in FAISS index
    return vector_store  # Return the FAISS vector store

# Function to create a retrieval chain using the vector store
def create_chain(vector_store):
    model = ChatOpenAI(
        model='gpt-3.5-turbo',  # Use GPT-3.5-turbo model
        temperature=0.4  # Set temperature for response variability
    )

    # Define a prompt template for the chatbot
    prompt = ChatPromptTemplate.from_messages([
        ("system", "Answer the user's question based on the given context: {context}"),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{input}")
    ])

    parser = StrOutputParser()  # Convert output to a string format

    # Create the document processing chain
    chain = create_stuff_documents_chain(
        llm=model,
        prompt=prompt,
        output_parser=parser
    )

    retriever = vector_store.as_retriever(search_kwaargs = {"k": 3})  # Convert vector store into a retriever
    retrieval_chain = create_retrieval_chain(
        retriever, 
        chain
        )  # Create a retrieval-based chain

    return retrieval_chain  # Return the retrieval chain


def process_chat(chain, question, history):
    # Query the retrieval chain with a sample question
    response = chain.invoke({
    "input": question,
    "chat_history": history,
    "context": docs
})

    # Print the answer from the model
    return response["answer"]


if __name__ == "__main__":
    docs = document_loader("https://en.wikipedia.org/wiki/FreeSurfer")
    vector_store = create_db(docs)
    chain = create_chain(vector_store)

    chat_history = []

    while True:
        user_input = input("You: ")
        if user_input.lower() == "exit":
            break
        response = process_chat(chain, user_input, chat_history)
        chat_history.append(HumanMessage(content=user_input))
        chat_history.append(AIMessage(content=response))
        print("AI: ", response)






For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  exec(code_obj, self.user_global_ns, self.user_ns)
USER_AGENT environment variable not set, consider setting it to identify your requests.


AI:  Hello! How can I assist you today?
AI:  The document attached seems to be a list of selected references related to FreeSurfer, a software suite used for the analysis and visualization of structural and functional neuroimaging data. If you need more specific information or assistance regarding FreeSurfer or any related topic, feel free to ask!
AI:  FreeSurfer is a brain imaging software package originally developed by Bruce Fischl, Anders Dale, Martin Sereno, and Doug Greve. It is now primarily maintained by the Laboratory for Computational Neuroimaging at the Athinoula A. Martinos Center for Biomedical Imaging. FreeSurfer includes a set of programs designed for analyzing magnetic resonance imaging (MRI) data of the brain. It is used for tasks such as segmentation, cortical surface reconstruction, volumetric analysis, and more. FreeSurfer is available for Mac OS and Linux operating systems, and while it can be downloaded and installed for free, a license key is required to run the 