<a href="https://colab.research.google.com/github/josephananda/GenAI_in_Finance/blob/main/Generative_AI_Workshop_(Modified_Exercise2).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Name: Joseph Ananda Sugihdharma

Email: jose

In [1]:
# Import Necessary Libraries
!pip install langchain
!pip install langchain-groq

import os
import json
import requests
from langchain_core.tools import tool
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_groq import ChatGroq
from langchain.agents import create_tool_calling_agent, AgentExecutor

Collecting langchain
  Downloading langchain-0.2.11-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-core<0.3.0,>=0.2.23 (from langchain)
  Downloading langchain_core-0.2.26-py3-none-any.whl.metadata (6.2 kB)
Collecting langchain-text-splitters<0.3.0,>=0.2.0 (from langchain)
  Downloading langchain_text_splitters-0.2.2-py3-none-any.whl.metadata (2.1 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langchain)
  Downloading langsmith-0.1.95-py3-none-any.whl.metadata (13 kB)
Collecting tenacity!=8.4.0,<9.0.0,>=8.1.0 (from langchain)
  Downloading tenacity-8.5.0-py3-none-any.whl.metadata (1.2 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain-core<0.3.0,>=0.2.23->langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl.metadata (3.0 kB)
Collecting orjson<4.0.0,>=3.9.14 (from langsmith<0.2.0,>=0.1.17->langchain)
  Downloading orjson-3.10.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4

In [2]:
# Import API Key
from google.colab import userdata

SECTORS_API_KEY = userdata.get('SECTORS_API_KEY')
GROQ_API_KEY = userdata.get('GROQ_API_KEY')

### Warm up
Here's a quick warm-up exercise. Below, we're using one of the [endpoints available at Sectors API](https://docs.sectors.app) to make our first HTTP request.  

In [3]:
# Sample Endpoint Calls

import requests
import pprint

headers = {
    "Authorization": SECTORS_API_KEY
}


def get_overview(stock:str, section:str) -> requests:
    url = f"https://api.sectors.app/v1/company/report/{stock}/?sections={section}"
    response = requests.get(url, headers=headers)
    return response


response = get_overview("BBCA", "financials")
pprint.pprint(response.json())

{'company_name': 'PT Bank Central Asia Tbk.',
 'financials': {'cash_flow_debt_ratio': 4.305466746529966,
                'dar_mrq': 0.0095859653450628,
                'der_mrq': 0.0556534961571916,
                'historical_financials': [{'cash_and_equivalents': None,
                                           'cash_only': None,
                                           'current_liabilities': None,
                                           'earnings': 25855154000000,
                                           'earnings_before_tax': 32706064000000,
                                           'fixed_assets': None,
                                           'gross_profit': None,
                                           'operating_pnl': 32512504000000,
                                           'revenue': 63028090000000,
                                           'tax': 6854404000000,
                                           'total_assets': 824787944000000,
                        

### Self Assessment

Complete **any one of the following two** exercises to get a certificate.



# Exercise 1
Modify and improve the orchestration script below. Here are some things you might want to try:



*   Sam says [the `llama3-groq-70b-8192-tool-use-preview`](https://console.groq.com/docs/models) is a superior model specialized in tool use and function calling tasks. It might be worth swapping in that model.
*   Could the system prompt be improved?
*   Could the docstring in that tool be improved?



In [4]:
from langchain_core.tools import tool

def get_today_date() -> str:
    """
    New function to get today's date (In order to improve the model understanding of dates)
    """
    from datetime import date

    today = date.today()
    return today.strftime("%Y-%m-%d")

def retrieve_from_endpoint(url: str) -> dict:
    headers = {"Authorization": SECTORS_API_KEY}

    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        data = response.json()
    except requests.exceptions.HTTPError as err:
        raise SystemExit(err)
    return json.dumps(data)


@tool
def get_top_companies_by_tx_volume(
    start_date: str, end_date: str, top_n: int = 5
) -> str:
    """
    Get top companies by transaction volume.
    Return the data of stock symbol, the company name, traded volume, and the stock price of the specified date.
    """
    url = f"https://api.sectors.app/v1/most-traded/?start={start_date}&end={end_date}&n_stock={top_n}"

    return retrieve_from_endpoint(url)

tools = [get_top_companies_by_tx_volume]
# Tuned with suggested model
llm = ChatGroq(
    temperature=0,
    model_name="llama3-groq-70b-8192-tool-use-preview",
    groq_api_key=GROQ_API_KEY,
)

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Answer the following queries, being as factual and analytical as you can.
            If you need the start and end dates but they are not explicitly provided,
            infer from the query. If the volume was about a single day,
            the start and end parameter should be the same.
            If there's no data today since the market hasn't open today, take the data from yesterday.
            The minimum amount of top n chosen stocks is 1.
            Today's date is
            """
            + get_today_date(),
        ),
        ("human", "{input}"),
        # msg containing previous agent tool invocations and corresponding tool outputs
        MessagesPlaceholder("agent_scratchpad"),
    ]
)
agent = create_tool_calling_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

query = "What are the top 3 companies by transaction volume over the last 7 days?"
result = agent_executor.invoke({"input": query})
print("Answer:", "\n", result["output"], "\n\n======\n\n")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `get_top_companies_by_tx_volume` with `{'start_date': '2024-07-25', 'end_date': '2024-07-31', 'top_n': 3}`


[0m[36;1m[1;3m{"2024-07-25": [{"symbol": "BSBK.JK", "company_name": "PT Wulandari Bangun Laksana Tbk", "volume": 1513766400, "price": 76}, {"symbol": "GOTO.JK", "company_name": "PT GoTo Gojek Tokopedia Tbk", "volume": 911091700, "price": 54}, {"symbol": "BBKP.JK", "company_name": "PT Bank KB Bukopin Tbk", "volume": 432596400, "price": 56}], "2024-07-26": [{"symbol": "BSBK.JK", "company_name": "PT Wulandari Bangun Laksana Tbk", "volume": 2408506000, "price": 72}, {"symbol": "GOTO.JK", "company_name": "PT GoTo Gojek Tokopedia Tbk", "volume": 913457100, "price": 53}, {"symbol": "MSJA.JK", "company_name": "PT Multi Spunindo Jaya Tbk.", "volume": 450977300, "price": 342}], "2024-07-29": [{"symbol": "GOTO.JK", "company_name": "PT GoTo Gojek Tokopedia Tbk", "volume": 1621366300, "price": 54}, {"symbol": "ATLA.JK

If you'd like another level of challenge, try and apply your tool-calling LLM model on the following, arguably more challenging, queries.

You need to successfully execute this exercise with these 3 queries returning correct results to be considered successful at this challenge!

# Exercise 2

Our users have been using our tool and having a lot of success with it. It is now time to take it one step further: by collecting user's feedback!

Here are the sample queries that performed sub-optimally according to user feedback:


*   What is the performance of GOTO (symbol: GOTO) since its IPO listing?
*   If i had invested into GOTO vs BREN on their respective IPO listing date, which one would have given me a better return over a 90 day horizon?"

Clearly, it's time to expand the tools that our language model has access to. For the second exercise, you'd be implementing **an additional** tool directly into the orchestrator to give your financial AI model a direct pathway to answering questions relating to stock performance since their listing date.


> You may have to [refer to Sectors API Documentation](https://docs.sectors.app) for a list of endpoints and pick the one most suitable for the job.

To help you get started, I've also added two new tools. Use them as a base reference! If you proceed to run the exercise without adding the right tool(s), `query_4` and `query_5` is most likely going to fail or cause the LLM to answer incorrectly.

In [7]:
@tool
def get_company_overview(stock: str) -> str:
    """
    Get company overview, such as phone, email, website, market cap.
    """
    url = f"https://api.sectors.app/v1/company/report/{stock}/?sections=overview"

    return retrieve_from_endpoint(url)


@tool
def get_daily_tx(stock: str, start_date: str, end_date: str) -> str:
    """
    Get daily transaction for a stock from a range of start date and end date.
    """
    url = f"https://api.sectors.app/v1/daily/{stock}/?start={start_date}&end={end_date}"

    return retrieve_from_endpoint(url)

@tool
def get_performance_since_ipo(stock: str) -> str:
    """
    Get stock performance since initial public offering (IPO) listing. Returns price change over the last 7 days (chg_7d), 30 days (chg_30d), 90 days (chg_90d), and 365 days (chg_365d).
    """
    url = f"https://api.sectors.app/v1/listing-performance/{stock}/"

    return retrieve_from_endpoint(url)


tools = [
    get_company_overview,
    # we created this in the earlier code chunk under Exercise 1
    # (so make sure you've run that cell),
    get_top_companies_by_tx_volume,
    get_daily_tx,
    get_performance_since_ipo
]

query_1 = "What are the top 3 companies by transaction volume over the last 7 days?"
query_2 = "Based on the closing prices of BBCA between 1st and 30th of June 2024, are we seeing an uptrend or downtrend? Try to explain why."
query_3 = "What is the company with the largest market cap between BBCA and BREN? For said company, retrieve the email, phone number, listing date and website for further research."
query_4 = "What is the performance of GOTO (symbol: GOTO) since its IPO listing?"
query_5 = "If i had invested into GOTO vs BREN on their respective IPO listing date, which one would have given me a better return over a 90 day horizon?"

queries = [query_1, query_2, query_3, query_4, query_5]

llm = ChatGroq(
    temperature=0,
    model_name="llama3-groq-70b-8192-tool-use-preview",
    groq_api_key=GROQ_API_KEY,
)

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Answer the following queries, being as factual and analytical as you can.
            If you need the start and end dates but they are not explicitly provided,
            infer from the query. Whenever you return a list of names, return also the
            corresponding values for each name. If the volume was about a single day,
            the start and end parameter should be the same. Note that the endpoint for
            performance since IPO has only one required parameter, which is the stock.
            If a comparison needed between two stock or company, invoke queries for both stock.
            For each query, select one of the available and suitable tool.
            Today's date is
            """
            + get_today_date(),
        ),
        ("human", "{input}"),
        # msg containing previous agent tool invocations and corresponding tool outputs
        MessagesPlaceholder("agent_scratchpad"),
    ]
)

agent = create_tool_calling_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

for query in queries:
    print("Question:", query)
    result = agent_executor.invoke({"input": query})
    print("Answer:", "\n", result["output"], "\n\n======\n\n")

Question: What are the top 3 companies by transaction volume over the last 7 days?


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `get_top_companies_by_tx_volume` with `{'start_date': '2024-07-25', 'end_date': '2024-07-31', 'top_n': 3}`


[0m[33;1m[1;3m{"2024-07-25": [{"symbol": "BSBK.JK", "company_name": "PT Wulandari Bangun Laksana Tbk", "volume": 1513766400, "price": 76}, {"symbol": "GOTO.JK", "company_name": "PT GoTo Gojek Tokopedia Tbk", "volume": 911091700, "price": 54}, {"symbol": "BBKP.JK", "company_name": "PT Bank KB Bukopin Tbk", "volume": 432596400, "price": 56}], "2024-07-26": [{"symbol": "BSBK.JK", "company_name": "PT Wulandari Bangun Laksana Tbk", "volume": 2408506000, "price": 72}, {"symbol": "GOTO.JK", "company_name": "PT GoTo Gojek Tokopedia Tbk", "volume": 913457100, "price": 53}, {"symbol": "MSJA.JK", "company_name": "PT Multi Spunindo Jaya Tbk.", "volume": 450977300, "price": 342}], "2024-07-29": [{"symbol": "GOTO.JK", "company_name": "PT