#**FinNewsBot â€“ A Q&A chatbot using financial news.**

In [None]:
from google.colab import drive
drive.mount("/content/drive")

In [None]:
%pip install langchain_groq

In [None]:
%pip install langchain_huggingface

In [None]:
!pip install bs4

In [None]:
%pip install langchain_chroma

In [None]:
%pip install feedparser

In [None]:
%pip install langchain_community

In [None]:
import os
from langchain_groq import ChatGroq
from google.colab import userdata # Import userdata to access secrets

# Get the API key from Colab secrets
groq_api_key = userdata.get("GROQ_API_KEY")

# Check if the API key was retrieved successfully
if not groq_api_key:
    raise ValueError("GROQ_API_KEY not found in Colab secrets. Please add it to Colab's Secrets Manager.")

llm=ChatGroq(groq_api_key=groq_api_key,model_name="llama-3.3-70b-versatile")

llm # Iam going to use LLama 3 70B Model

In [None]:
import os
from langchain_huggingface import HuggingFaceEmbeddings
from google.colab import userdata

# Get the HF_TOKEN from Colab secrets
hf_token = userdata.get("HF_TOKEN")

# Check if the HF token was retrieved successfully and set it as an environment variable
if not hf_token:
    raise ValueError("HF_TOKEN not found in Colab secrets. Please add it to Colab's Secrets Manager.")
else:
    os.environ['HF_TOKEN'] = hf_token


embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [None]:
embeddings # Our embedding model is ready

In [None]:
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

In [None]:
# 1. Load, chunk and index the contents of the blog to create a retriever.
import bs4
loader = WebBaseLoader(
    web_paths=("https://www.cnbc.com/world/?region=world",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)

docs=loader.load()
docs

In [None]:
pip install feedparser beautifulsoup4


In [None]:
import feedparser
from bs4 import BeautifulSoup

# CNBC World news RSS feed
rss_url = "https://www.cnbc.com/id/100727362/device/rss/rss.html"  # CNBC World News RSS

# Parse the RSS feed
feed = feedparser.parse(rss_url)

# Store titles and summaries in a list called 'doc'
doc = []

for entry in feed.entries:
    title = entry.get("title", "")
    summary_html = entry.get("summary", "")
    summary_text = BeautifulSoup(summary_html, "html.parser").get_text()

    # Combine title and summary into one string
    full_text = f"Title: {title}\nSummary: {summary_text}"

    doc.append(full_text)

# Optional: print first 5 items
for i, news in enumerate(doc[:5], start=1):
    print(f"{i}.\n{news}\n")



In [None]:
doc

In [None]:
from langchain.schema import Document

# Convert each item in doc (which are strings) into a Document
docs = [Document(page_content=item) for item in doc]

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)


In [None]:
splits

In [None]:
vectorstore=Chroma.from_documents(documents=splits,embedding=embeddings)
retriever=vectorstore.as_retriever()
retriever

In [None]:
## Prompt Template
system_prompt = (
    "You are a Global Financial Market News assistant for question-answering tasks. "
    "Use the following retrieved news context to answer the question. "
    "These news articles are the top trending financial news. "
    "If you don't know the answer, say that you don't know. "
    "Keep your answer concise and use no more than three sentences.\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [None]:
question_answer_chain=create_stuff_documents_chain(llm,prompt)
rag_chain=create_retrieval_chain(retriever,question_answer_chain)

In [None]:
response=rag_chain.invoke({"input":"What is the current hot topic news about financial markets?"})
print("Input:", response['input'])
print("Answer:", response['answer'])

In [None]:
response=rag_chain.invoke({"input":"What powell said?"})
print("Input:", response['input'])
print("Answer:", response['answer'])