In [1]:
import os
import sys

# Get the current working directory and add the parent directory to the Python path
current_working_directory = os.getcwd()
print(os.path.join(current_working_directory, ".."))
sys.path.append(os.path.join(current_working_directory, ".."))

/Users/L024258/lilly_work/github-copilot/exploration/applications/..


In [2]:
from decouple import AutoConfig
config = AutoConfig(search_path='./../.env')

os.environ["AZURE_OPENAI_API_KEY"] = config('AZURE_OPENAI_API_KEY')
os.environ["AZURE_OPENAI_ENDPOINT"] = config('AZURE_ENDPOINT')

In [3]:
from langchain_community.document_loaders import WebBaseLoader
import bs4

urls = ('http://www.moneycontrol.com/news/business/', 
        'http://www.moneycontrol.com/news/business/markets/',
        'http://www.moneycontrol.com/news/business/stocks/',
        'http://www.moneycontrol.com/news/business/economy/',
        'http://www.moneycontrol.com/news/business/companies/',
        'http://www.moneycontrol.com/news/business/economy/',
        'http://www.moneycontrol.com/news/business/ipo/',)
tag = ('li',)
tag_classes = ('clearfix',)

# Load, chunk and index the contents of the blog.
loader = WebBaseLoader(
    web_paths=urls,
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(tag,
            class_=tag_classes
        )
    ),
)

docs = loader.load()
doc = docs[0]
print(len(docs))

USER_AGENT environment variable not set, consider setting it to identify your requests.


7


In [4]:
print(doc.page_content)

  
  Singapore sees opportunities in India's growing economy Lee highlighted the importance of the Comprehensive Economic Cooperation Agreement (CECA), a nearly two-decade-old Singapore-India free trade agreement that has helped foster extensive trade, investment, and travel links between the two countries. 
 
  Daily Voice: Here's why private banks, IT, energy could benefit in 2025 According to Jitendra Sriram of Baroda BNP Paribas Mutual Fund, the Budget may be little bit of a non-event given the shorter time gap between the post electoral final budget of 2024 and the Budget scheduled for February 2025. Already the policy thrust areas are known, he said. 
 
  Five IPOs gear up to debut on Dalal Street next week despite the bearish mood A total of eight companies will make their debut on the bourses next week including three from the mainboard segment. 
 
  Stock broker Rikhav Securities' IPO to open on January 15: Check details here Rikhav Securities IPO | The IPO consists of fresh i

In [5]:
from pydantic import BaseModel, Field
class Overview(BaseModel):
    """Overview of a section of text."""
    summary: str = Field(description="Provide a detailed summary of the content.")
    highlights: str = Field(description="Provide the highlights of the content.")
    keywords: str = Field(description="Provide keywords related to the content.")

In [6]:
from langchain.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages([
    ("system", "Extract the relevant information, if not explicitly provided do not guess. Extract partial info"),
    ("human", "{input}")
])

In [7]:
from langchain_openai import AzureChatOpenAI
model = AzureChatOpenAI(
                openai_api_version=config('AZURE_CHAT_OPENAI_API_VERSION'),
                azure_deployment=config('AZURE_GPT4o_mini_CHAT_OPENAI_DEPLOYMENT'),
                temperature=0,
                max_tokens=4096
            )

In [8]:
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
from langchain.utils.openai_functions import convert_pydantic_to_openai_function

overview_tagging_function = [
    convert_pydantic_to_openai_function(Overview)
]
tagging_model = model.bind(
    functions=overview_tagging_function,
    function_call={"name":"Overview"}
)
tagging_chain = prompt | tagging_model | JsonOutputFunctionsParser()

  convert_pydantic_to_openai_function(Overview)


In [9]:
result = tagging_chain.invoke({"input": docs})
for key, value in result.items():
    print(f"{key}")
    print(f"{value}")

summary
The documents provide a comprehensive overview of recent developments in the Indian economy, stock market, and upcoming IPOs. Key highlights include the anticipated impact of the Comprehensive Economic Cooperation Agreement (CECA) between Singapore and India, insights on the performance of private banks, IT, and energy sectors, and the upcoming IPOs of various companies including Rikhav Securities, Vidya Wires, and EMA Partners India. The documents also discuss market trends, including the bearish sentiment affecting small-cap stocks, and the performance of major companies like TCS and DMart. Additionally, there are mentions of economic forecasts from the IMF regarding a weaker Indian economy in 2025, and the need for reforms in the debt market and corporate tax rates.
highlights
1. Singapore-India CECA fosters trade and investment. 2. Anticipated benefits for private banks, IT, and energy sectors in 2025. 3. Upcoming IPOs: Rikhav Securities, Vidya Wires, EMA Partners India. 4.

In [10]:
from typing import List

class Tip(BaseModel):
    """Information about papers mentioned."""
    company_name: str = Field(description="The name or ticker symbol of the company.")
    sentiment: str = Field(description="The sentiment associated with the company in the text.")
    reason: str = Field(description='Extracted text snippet that conveys the sentiment.')
    signal: str = Field(description="Infer BUY or SELL signal associated with the company in the text.")


class Info(BaseModel):
    """Information to extract"""
    tips: List[Tip]

In [14]:
prompt = ChatPromptTemplate.from_messages([
    ("system", "Extract the relevant information, if not explicitly provided do not guess or extract partial info"),
    ("human", "{input}")
])

In [15]:
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser

tip_extraction_function = [
    convert_pydantic_to_openai_function(Info)
]
extraction_model = model.bind(
    functions=tip_extraction_function, 
    function_call={"name":"Info"}
)
extraction_chain = prompt | extraction_model | JsonOutputFunctionsParser()

In [16]:
result = extraction_chain.invoke({"input": docs})
result
# for item in result:
#     for key, value in item.items():
#         print(f"{key}: {value}")
#     print("\n")

{'tips': [{'company_name': 'Rikhav Securities',
   'sentiment': 'neutral',
   'reason': 'The IPO consists of fresh issue of 83.28 lakh shares worth Rs 71.62 crore, and an offer-for-sale of 20 lakh shares worth Rs 17.2 crore by public shareholders.',
   'signal': 'N/A'},
  {'company_name': 'Vidya Wires',
   'sentiment': 'neutral',
   'reason': 'The initial share sale is a mix of fresh issuance of equity shares worth Rs 320 crore, and an offer-for-sale of 1 crore shares by promoters.',
   'signal': 'N/A'},
  {'company_name': 'EMA Partners India',
   'sentiment': 'neutral',
   'reason': 'The company plans to raise Rs 76.01 crore via initial share sale which comprises of fresh issue up to 53.34 lakh shares, and an offer-for-sale of 7.96 lakh shares.',
   'signal': 'N/A'},
  {'company_name': 'Laxmi Dental',
   'sentiment': 'positive',
   'reason': 'The initial share sale will open for public subscription on January 13 and close on January 15, with a price band of Rs 407-428 per share.',
   