This notebook showcases an agent designed to interact with a sql databases. The agent builds off of SQLDatabaseChain and is designed to answer more general questions about a database, as well as recover from errors.

In [None]:
# Importing required libraries
from langchain.memory import ConversationBufferWindowMemory, ConversationSummaryBufferMemory
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BlipProcessor
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain.document_loaders import TextLoader, DirectoryLoader
from langchain.prompts.chat import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai import OpenAI, OpenAIEmbeddings
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.vectorstores import Chroma
from langchain.tools import BaseTool
import xml.etree.ElementTree as ET
import pandas as pd
import numpy as np
import requests
import sqlite3
import json
import os

In [3]:
# Configuring OpenAI API key
os.environ["OPENAI_API_KEY"] = ""
llmModel = OpenAI(organization='')

In [4]:
# Loading data from SQLite databases
conn = sqlite3.connect('SQLite/AdvisorData.db')
advisors_df = pd.read_sql_query("SELECT * FROM advisors", conn)

users_df = pd.read_sql_query("SELECT * FROM users", conn)

user_1p_data_df = pd.read_sql_query("SELECT * FROM user_1p_data", conn)

users_data_df = pd.merge(users_df, user_1p_data_df, how='inner', left_on='ID', right_on='User ID (Hashed)')

# Close database connections
conn.close()

In [5]:
# Loading data from XML file
tree = ET.parse('dataset/firm/IA_FIRM_SEC_Feed_06_08_2024.xml')
root = tree.getroot()
firms_data = []
for firm in root.findall('Firm'):
    info = firm.find('Info')
    firms_data.append({
        'FirmCrdNb': info.attrib['FirmCrdNb'],
        'FirmName': info.attrib['FirmName'],
        'FirmCity': info.attrib['FirmCity'],
        'FirmState': info.attrib['FirmState'],
        'FirmCountry': info.attrib['FirmCountry']
    })
firms_df = pd.DataFrame(firms_data)

In [6]:
# Function to fetch news data from API
def fetch_news_data(query):
    NEWS_API_KEY=""
    
    url = f"https://newsapi.org/v2/everything?q={query}&apiKey={NEWS_API_KEY}"
    response = requests.get(url).json()
    
    return response['articles']

In [11]:
# Function to fetch stock data from API
def fetch_stock_data(ticker):
    POLYGON_API_KEY=""
    
    url = f"https://api.polygon.io/v2/aggs/ticker/{ticker}/prev?apiKey={POLYGON_API_KEY}"
    response = requests.get(url).json()
    
    return response

In [14]:
# Create the memory object to store conversation history
memory = ConversationBufferWindowMemory(input_key="question", output_key="answer", human_prefix="Human", ai_prefix="AI", llm=llmModel, memory_key="chat_history", return_messages=True, k=100)
#memory = ConversationSummaryBufferMemory(llm=llmModel, memory_key="chat_history", max_token_limit=100)
#memory = ConversationSummaryMemory(llm=llmModel, memory_key="chat_history", return_messages=True)

In [17]:
# Define a tool to gather user information
class UserInfoGatherer(BaseTool):
    name = "user_info_gatherer"
    description = "Gathers user information by asking for their name and additional preferences."

    def _run(self, query: str) -> str:
        user_prompt = PromptTemplate(input_variables=["user"], template="To assist you better, may I know your full name and any additional preferences you might have?")
        user_input = input(user_prompt.format(user=""))
        user_profile = users_df.loc[users_df['Name'] == user_input]
        
        if user_profile.empty: 
            response = "Sorry, I couldn't find your profile in our database. Let's continue our conversation, and I'll do my best to assist you."
        else:
            try:
                
                user_name = user_profile['Name'].values[0] #user_profile[0].metadata['Name']
                
                obj_user_profile = {
                    "name" : user_profile['Name'].values[0],
                    "age" : user_profile['Location'].values[0],
                    "occupation" : user_profile['Occupation'].values[0],
                    "net_worth_USD" : int(np.int64(user_profile['Estimated Net Worth (USD Million)'].values[0])),
                    "investment_focus": user_profile['Investment Focus'].values[0],
                    "risk_tolerance": user_profile['Risk Tolerance'].values[0],
                    "financial_goals": user_profile['Financial Goals'].values[0],
                    "topics_of_interest" : user_profile['Topics of Interest'].values[0]
                }
                
                json_user_profile = json.dumps(obj_user_profile)
                
                memory.save_context({"profile" : json_user_profile}, {"output": ""})

            except Exception as e:
                print(f"Error accessing user profile: {e}")
        
        return user_name

In [53]:
def load_prompt():
  context="""
  Understand the question from the user which are related to advisors or stocks or news. 
  Promptly ask questions related to users specific interests, preferences and provide required assistance as per request.
  Gather user profile information and any additional preferences to cater based on user's interest, skills and preferences. 
  Once user info is found in memory context, gather more information from the profile of the user as well to know about user, his additional preferences from the financial aspect and any information about him from the record fetched from user_df.
  Make it conversational and continue to keep the user engaged with financial info and respond back to his question related to advisors, stocks and news.
  """

  system_template="""
  You are an expert in financial investment who provides great insights on investment, financial advisors, stocks and news. 
  Use the following pieces of context to answer the users question. If you don't know the answer, 
  just say that you don't know, don't try to make up an answer. 
  Remember to only use the given context to answer the question, very important.
  
  question: {question}
  ```
  Begin!
  ----------------
  {context}"""
  messages = [
      SystemMessagePromptTemplate.from_template(system_template),
      HumanMessagePromptTemplate.from_template("{question}")
  ]
  prompt = ChatPromptTemplate.from_messages(messages)

  return prompt

# Prompt Template & Messages
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
Condense_Question_Prompt = PromptTemplate.from_template(_template)

In [20]:
def load_chat_prompt():
    context="""
        Understand the question from the user which are related to advisors or stocks or news. Promptly ask questions related to users specific interests, preferences and provide required assistance as per request.
        Gather user profile information and any additional preferences to cater based on user's interest, skills and preferences. Once user info is found in memory context, gather more information from the profile of the user as well to know about user, his additional preferences from the financial aspect and any information about him from the record fetched from user_df.
        Make it conversational and continue to keep the user engaged with financial info and respond back to his question related to advisors, stocks and news.
    """
    
    Template = """
        You are an expert in financial investment who provides great insights on investment, financial advisors, stocks and news. Use the following pieces of context to answer the users question. If you don't know the answer, 
        just say that you don't know, don't try to make up an answer. Remember to only use the given context to answer the question, very important."

        Context:
        {context}

        History chat information in summary:
        {chat_history}

        Question: {question}
        answer: 
    """

    prompt = PromptTemplate(
        input_variables=["context", "chat_history", "question"], template=Template
    )
    return prompt

In [21]:
# Creating vector store and retrieval QA chain
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)

texts = [f"{row['Name']} - {row['Location']} - {row['FirmID']} - {row['Areas of Expertise']} - {row['Assets Handled (in millions USD)']} - {row['Clientele']} - {row['Expertise']} - {row['Education']} - {row['Years of Experience']} - {row['Certifications']} - {row['Languages Spoken']} - {row['Previous Employers']}" for _, row in advisors_df.iterrows()]

texts += [f"{row['Name']} - {row['email']} - {row['Age']} - {row['Location']} - {row['Occupation']} - {row['Estimated Net Worth (USD Million)']} - {row['Investment Focus']} - {row['Risk Tolerance']} - {row['Financial Goals']} - {row['Topics of Interest']} - {row['Content Type']} - {row['Headline/Title']} - {row['Keywords']} - {row['Clickstream Data']} - {row['Device Type']} - {row['Investment Account Type']} - {row['Geographic Location']} - {row['Previous Visits']} - {row['Time Spent on Page']}" for _, row in users_data_df.iterrows()]

texts += [f"{row['FirmName']} - {row['FirmCity']} - {row['FirmState']} - {row['FirmCountry']}" for _, row in firms_df.iterrows()]

texts = [text_splitter.split_text(text) for text in texts]

texts = [chunk for text in texts for chunk in text]

embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_texts(texts, embeddings)


# Create the conversational retrieval chain
qa = ConversationalRetrievalChain.from_llm(
    llm=llmModel,
    chain_type="stuff", #"ConversationalRetrievalChain",
    retriever=vectorstore.as_retriever(search_kwargs={"k": 1}),
    combine_docs_chain_kwargs={"prompt": load_chat_prompt()},
    #CONDENSE_QUESTION_PROMPT=Condense_Question_Prompt,
    memory=memory,    
    output_key="answer",
    get_chat_history=lambda h : h
    #return_source_documents=True,
    #tools=[user_info_tool]
)

In [24]:
def processStockResponse(response):
    # Parse the JSON response
    data = response#json.loads(response)

    print(f"stock data-{data}")
    # Extract the results array
    results = data.get("results", [])

    print(f"results-{data}")

    # Initialize an empty list to store the key-value pairs
    stocks = []

    # Iterate over the results array and create key-value pairs
    for result in results:
        kvp = {
            "Open": int(np.int64(result.get("o"))),
            "High": int(np.int64(result.get("h"))),
            "Low": int(np.int64(result.get("l"))),
            "Close":  int(int(np.int64(result.get("c")))),
            "Volume":  int(np.int64(result.get("v")))
        }
        stocks.append(kvp)

    # Print the array of key-value pairs
    print(stocks)
    return stocks

In [None]:
# Interactive chatbot
print("Welcome to the Financial Assistant Chatbot!")
chat_history = []
user_name = ""
actual_query = ""

# Create an instance of the custom tool
user_info_tool = UserInfoGatherer()

while True:
    query = input("How can I help you? ")
    if query.lower() == "exit":
        break

    chat_history_dict = {"question": query} 
    chat_history.append(chat_history_dict)  
        
    
    try:  
        """ userProfile = user_context.get("profile", {})
        print("profile:",userProfile)
        
        if userProfile:
            profile_info = json.loads(userProfile)
            user_name = profile_info.get("name") """

        print(f"User Name : {user_name}")
        if not user_name:
            
            # Use the UserInfoGatherer tool to gather user information
            # Prepare an empty chat history for the first interaction
            
            actual_query = query
            result = user_info_tool.run(query)
            chat_history.append({"answer": result})
            user_name = result
            response = f"Hello {user_name}, nice to meet you! Let me look into that for you."
            print(response)          
            
        #else:
            
        if any(keyword.lower() in query.lower() for keyword in ["news", "headlines", "articles"]):
            articles = fetch_news_data(query)
            
            print(f"Articles : {articles}")
            result = qa({"question": query, "context": [articles]})

            print(f"Question: {query}")
            print(f"answer: {result}")
            
            chat_history.append({"answer": result})
            
            #for article in articles:
            #print(f"Title: {article['title']}")
            #print(f"Summary: {article['description']}")
            #print(f"Link: {article['url']}") """
            print("-" * 50)

        elif any(keyword.lower() in query.lower() for keyword in ["stock", "stocks", "market"]):
                
                stock_data = fetch_stock_data("AAPL")

                print(f"Stock Data: {stock_data}")
                print(f"Process Stock Response : {processStockResponse(stock_data)}")

                stock_details = processStockResponse(stock_data);
                result = qa({"question": query, "context": [stock_details]})
                
                print(f"Question: {query}")
                print(f"answer: {result}")
                
                chat_history.append({"answer": result})            
                
                print("-" * 50)

        else:
            advisor_query = f"{user_name} {query}"
            print(f"advisor_query : {advisor_query}")

            relevant_docs = vectorstore.similarity_search(advisor_query, k=1)
            print(f"similarity_search : {relevant_docs}")

            if relevant_docs:
                result = qa({"question": query, "context": [doc.page_content for doc in relevant_docs]})                
            else:
                result = qa({"question": query})
                print("I'm sorry, I couldn't find any relevant information related to advisors in my database. Please try rephrasing your query or provide more context.")
            
            print(f"question: {query}")
            print(f"answer: {result}")
            print(f"Chat History - {result['chat_history']}\n")

            chat_history.append({"answer": result})
            print("-" * 50)
    
    except Exception as e:
            print(f"Error processing your request : {e}")