<a href="https://colab.research.google.com/github/lutfilaili/Gen-AI/blob/main/Generative_AI_for_Finance_IDX.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# INSTALLING AND IMPORTING REQUIRED LIBRARIES

In [28]:
!pip install langchain
!pip install langchain-groq
!pip install --upgrade langchain langchain-community langchain-groq

from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage
import os
import json
import requests
from langchain_core.tools import tool
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_groq import ChatGroq
from langchain.agents import create_tool_calling_agent, AgentExecutor



**Application Programming Interface (API) Key**

Obtain an API Key from sectors.app to support your data (https://sectors.app/api)
and GROQ API Key (https://console.groq.com/keys).


In [29]:
from google.colab import userdata

SECTORS_API_KEY = userdata.get('SECTORS_API_KEY')
GROQ_API_KEY = userdata.get('GROQ_API_KEY')

***

#SETTING UP THE TOOLS
creating tools as many as possible to bolster up.
It is important to write proper docstrings to help models answer the prompt nicely.

In [50]:
from langchain_core.tools import tool
from datetime import date, timedelta

def retrieve_from_endpoint(url: str) -> dict:
    headers = {"Authorization": SECTORS_API_KEY}

    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        data = response.json()
    except requests.exceptions.HTTPError as err:
        raise SystemExit(err)
    return json.dumps(data)


@tool
def get_top_companies_by_tx_volume(
    start_date: str = (date.today() - timedelta(days=7)).strftime('%Y-%m-%d'),
    end_date: str = date.today().strftime('%Y-%m-%d'),
    top_n: int = 5
) -> str:
    """
    Retrieves the top (n) companies based on transaction volume within a specified date range from what is being asked.
    The date must be up-to-date.
    """
    url = f'https://api.sectors.app/v1/most-traded/?start={start_date}&end={end_date}&n_stock={top_n}'

    return retrieve_from_endpoint(url)

@tool
def get_company_overview(stock: str) -> str:
    """
    Get company overview, provide the information only if being asked.
    """
    url = f"https://api.sectors.app/v1/company/report/{stock}/?sections=overview"

    return retrieve_from_endpoint(url)

@tool
def get_daily_tx(start_date: str, end_date: str, top_n: int) -> str:
    """
    Get daily transaction for stocks in a specified date range when being asked.
    """
    url = f"https://api.sectors.app/v1/daily/BBCA/?start={start_date}&end={end_date}"

    return retrieve_from_endpoint(url)

@tool
def get_companies_performance_since_ipo(stock: str) -> str:
    """
    Get companies performance since its IPO listing.
    You must answer the queries with corresponding data, and describe it afterwards.
    """
    url = f'https://api.sectors.app/v1/listing-performance/{stock}/'

    return retrieve_from_endpoint(url)

@tool
def get_index_daily_transaction(index_code: str, start_date: str, end_date: str, top_n: int) -> str:
    """
    Get index daily transaction for stocks in a specified date range when being asked.
    """
    url = f'https://api.sectors.app/v1/index-daily/{index_code}/?start={start_date}&end={end_date}'

    return retrieve_from_endpoint(url)

@tool
def get_top_companies_ranked(classifications: str, stock: str) -> str:
    """
    Get top companies ranked, be flexible with the questions.
    """
    # Handle cases where classifications might be empty
    url = 'https://api.sectors.app/v1/companies/top/'

    if classifications:
        # Replace spaces with plus signs to properly encode the URL
        classifications = classifications.replace(' ', '+')
        url += f'?classifications={classifications}'

    if stock:
        url += f'&stock={stock}'

    url += '&n_stock=5&year=2024' # Add the default parameters

    return retrieve_from_endpoint(url)

@tool
def get_subsector_aggregated_statistics(sector: str) -> str:
    """
    Get subsector aggregated statistics, be flexible with the questions.
    """
    url = f'https://api.sectors.app/v1/subsector/report/{sector}/?sections={sections}'

    return retrieve_from_endpoint(url)

@tool
def get_company_revenue_and_cost_segment(stock: str) -> str:
    """
    Get company revenue and cost segment, be flexible with the questions.
    """
    url = f'https://api.sectors.app/v1/company/get-segments/{stock}/'

    return retrieve_from_endpoint(url)

@tool
def get_company_report(stock: str) -> str:
    """
    Get company report, be flexible with the questions.
    """
    url = f'https://api.sectors.app/v1/company/report/{stock}/?sections={sections}'

    return retrieve_from_endpoint(url)

tools = [
    get_top_companies_by_tx_volume,
    get_company_overview,
    get_daily_tx,
    get_companies_performance_since_ipo,
    get_index_daily_transaction,
    get_top_companies_ranked,
    get_subsector_aggregated_statistics,
    get_company_revenue_and_cost_segment,
    get_company_report
    ]

llm = ChatGroq(
    temperature=0,
    model_name="llama3-70b-8192",
    groq_api_key=GROQ_API_KEY,
)

def get_today_date() -> str:
    """
    Get today's date
    """
    from datetime import date

    today = date.today()
    return today.strftime("%Y-%m-%d")

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Answer the following queries as you are a financial-robo advisor.
            In case the queries are not explicitly stated, you may focus on concluding the queries and you can have a follow-up question.
            If the questions mention a date reference, you must restate the date before giving data accurately.
            It would be beneficial if you provide data in a tabular, ensuring it is easy to understand, after giving a tabular data, you must paraphrase the questions and describe it logically, and tell a little bit good or bad story behind the data if necessary.
            When being asked in a long sentence, you have to ensure answering all possible questions in it, sometimes users do not put the question mark at the end.
            It is compulsory to understand the queries and answer them approprately when users ask from the date they state, you must provide fully data. Please be mindful when attributing date.
            """
        ),
        ("human", "{input}"),
        # msg containing previous agent tool invocations and corresponding tool outputs
        MessagesPlaceholder("agent_scratchpad"),
    ]
)
agent = create_tool_calling_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

To handle unexpected errors

In [40]:
import time

def handle_llm_response(response):
  try:
    # Process the LLM response here
    processed_response = process_response(response)
    return processed_response
  except Exception as e:
    print(f"Error processing LLM response: {e}")
    # Handle the error, e.g., retry, log, or provide a default response
    if isinstance(e, TimeoutError):
      return "LLM response timed out. Please try again later."
    elif isinstance(e, ValueError):
      return "Invalid LLM response format."
    else:
      return "An unexpected error occurred."

# Example usage:
# Assuming 'prompt' is a string containing the user's query
# Format the prompt template with user input and an empty agent_scratchpad
formatted_prompt = prompt.format_prompt(input="Your user query here", agent_scratchpad=[]).to_messages()
response = llm(formatted_prompt)  # Pass the formatted messages
result = handle_llm_response(response)

Error processing LLM response: name 'process_response' is not defined


#INPUTING QUERY
We may place our questions here, and you will get the answer as soon as you run it.

In [54]:
# Put yours
query_1 = "What are the top 5 companies by transaction volume on the first of this month?",
query_2 = "What are the most traded stock yesterday?",
query_3 = "What are the top 7 most traded stocks between 6th June to 10th June this year?",
query_4 = "What are the top 3 companies by Transaction Volume over the last 7 days?",
query_5 = "what is the best stock over the past 3 years, and is it worth to invest for the 5 years ahead?"

queries = [query_1, query_2, query_3, query_4, query_5]

for query in queries:
    print("Question:", query)
    result = agent_executor.invoke({"input": query})
    print("Answer:", "\n", result["output"], "\n\n======\n\n")

Question: ('What are the top 5 companies by transaction volume on the first of this month?',)


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `get_top_companies_by_tx_volume` with `{'end_date': '2024-08-01', 'start_date': '2024-08-01', 'top_n': 5}`


[0m[36;1m[1;3m{"2024-08-01": [{"symbol": "GOTO.JK", "company_name": "PT GoTo Gojek Tokopedia Tbk", "volume": 1425057600, "price": 53}, {"symbol": "BIPI.JK", "company_name": "PT Astrindo Nusantara Infrastruktur Tbk.", "volume": 521955600, "price": 61}, {"symbol": "NICL.JK", "company_name": "PT PAM Mineral Tbk", "volume": 362601100, "price": 252}, {"symbol": "WIKA.JK", "company_name": "PT Wijaya Karya (Persero) Tbk", "volume": 314787400, "price": 220}, {"symbol": "DEWA.JK", "company_name": "Darma Henwa Tbk", "volume": 294681400, "price": 67}]}[0m[32;1m[1;3mHere is the top 5 companies by transaction volume on the first of this month:

| Rank | Company Name | Symbol | Volume | Price |
| --- | --- | --- | --- | ---