In [3]:
from langchain.prompts import PromptTemplate
from langchain.llms.openai import OpenAI
from langchain.chains import LLMChain, RetrievalQA, ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader, PyPDFLoader
from langchain.memory import ConversationBufferMemory
from langchain.vectorstores import Chroma, DocArrayInMemorySearch
from dotenv import load_dotenv
import os

load_dotenv()

from langchain_openai import ChatOpenAI

new_api_key = os.getenv("OPENAI_API_KEY")


In [4]:
embedding = OpenAIEmbeddings(api_key=new_api_key)
vectordb = Chroma(persist_directory="db", embedding_function=embedding)

llm = ChatOpenAI(model_name="gpt-4o", api_key=new_api_key, temperature=0.1)

memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)
     
retriever=vectordb.as_retriever()
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory
)

  embedding = OpenAIEmbeddings(api_key=new_api_key)
  vectordb = Chroma(persist_directory="db", embedding_function=embedding)
  memory = ConversationBufferMemory(


In [5]:
def load_db(file, chain_type, k):
    # load documents
    loader = PyPDFLoader(file)
    documents = loader.load()

    # split documents
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
    docs = text_splitter.split_documents(documents)

    # define embedding
    embeddings = OpenAIEmbeddings(api_key=new_api_key)

    # create vector database from data
    db = DocArrayInMemorySearch.from_documents(docs, embeddings)

    # define retriever
    retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})

    # create a chatbot chain. Memory is managed externally.
    qa = ConversationalRetrievalChain.from_llm(
        llm=ChatOpenAI(model_name="gpt-4", api_key=new_api_key, temperature=0),
        chain_type=chain_type,
        retriever=retriever,
        return_source_documents=True,
        return_generated_question=True,
    )
    return qa

In [9]:
import param

class cbfs(param.Parameterized):
    chat_history = param.List([])
    answer = param.String("")
    db_query = param.String("")
    db_response = param.List([])

    def __init__(self, **params):
        super(cbfs, self).__init__(**params)
        self.loaded_file = "data/AML_IEEE_ACCESS_2024.pdf"
        self.qa = load_db(self.loaded_file, "stuff", 4)

    def call_load_db(self, file_path=None):
        """Load a new database file."""
        if file_path:
            self.loaded_file = file_path
            self.qa = load_db(file_path, "stuff", 4)
            self.clr_history()
            print(f"Loaded File: {self.loaded_file}")
        else:
            print(f"No file specified. Using default file: {self.loaded_file}")

    def convchain(self, query):
        """Process user query and update chat history."""
        if not query:
            print("No query provided!")
            return
        result = self.qa({"question": query, "chat_history": self.chat_history})
        self.chat_history.append((query, result["answer"]))
        self.db_query = result["generated_question"]
        self.db_response = result["source_documents"]
        self.answer = result["answer"]

        print(f"User: {query}")
        print(f"LLM Chatbot: {self.answer}")

    def get_lquest(self):
        """Display the last database query."""
        if not self.db_query:
            print("No database accesses so far!")
        else:
            print(f"Last question to DB: {self.db_query}")

    def get_sources(self):
        """Display the source documents from the last database response."""
        if not self.db_response:
            print("No database response available!")
        else:
            print("Result of DB lookup:")
            for doc in self.db_response:
                print(f"- {doc}")

    def get_chats(self):
        """Display the chat history."""
        if not self.chat_history:
            print("No chat history yet!")
        else:
            print("Chat History:")
            for user_query, bot_answer in self.chat_history:
                print(f"User: {user_query}")
                print(f"Bot: {bot_answer}")

    def clr_history(self):
        """Clear the chat history."""
        self.chat_history = []
        print("Chat history cleared.")

cb = cbfs()


cb.convchain("What is the title of paper?")

cb.get_chats()
cb.get_lquest()
cb.get_sources()


User: What is the title of paper?
LLM Chatbot: The title of the paper is "Scalable Semi-Supervised Graph Learning Techniques for Anti Money Laundering".
Chat History:
User: What is the title of paper?
Bot: The title of the paper is "Scalable Semi-Supervised Graph Learning Techniques for Anti Money Laundering".
Last question to DB: What is the title of paper?
Result of DB lookup:
- page_content='2024 The Authors. This work is licensed under a Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 License.
For more information, see https://creativecommons.org/licenses/by-nc-nd/4.0/ VOLUME 12, 2024' metadata={'source': 'data/AML_IEEE_ACCESS_2024.pdf', 'page': 0}
- page_content='of an AML model. Nevertheless, we hope the approach
presented in this paper will make non-trivial contributions
and give network security and financial crime analysis some
insights into how to employ semi-supervised graph learning
VOLUME 12, 2024 50027' metadata={'source': 'data/AML_IEEE_ACCESS_2024.pdf', 'pa