<a href="https://colab.research.google.com/github/mems250005048-wq/GDG/blob/main/Task2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
!pip install -qU langchain \
                langchain-community \
                langchain-groq \
                langchain-huggingface \
                langchain-text-splitters \
                chromadb \
                newsapi-python

In [7]:
import os
from google.colab import userdata
from newsapi import NewsApiClient
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document

# 1. SETUP KEYS
os.environ["GROQ_API_KEY"] = userdata.get('GROQ_API_KEY')
news_api_key = userdata.get('NEWS_API')
newsapi = NewsApiClient(api_key=news_api_key)

# 2. FETCH REAL DATA: Get latest news for 'Nvidia' or 'Tesla'
print("Fetching live news...")
raw_news = newsapi.get_everything(q='Nvidia', language='en', sort_by='relevancy', page_size=5)

# 3. TRANSFORM: Convert JSON news into LangChain Documents
docs = []
for article in raw_news['articles']:
    content = f"Title: {article['title']}\nSource: {article['source']['name']}\nContent: {article['description']}"
    docs.append(Document(page_content=content, metadata={"source": article['url']}))

# 4. CHUNK: Split into smaller pieces for the LLM
text_splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=50)
splits = text_splitter.split_documents(docs)

# 5. EMBED & STORE: The "Library" setup
print("Encoding and saving to library...")
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# This creates a folder named 'finance_db' in your Colab files
vector_db = Chroma.from_documents(
    documents=splits,
    embedding=embeddings,
    persist_directory="./finance_db"
)

print(f"Success! Saved {len(splits)} news chunks to your local library.")

Fetching live news...
Encoding and saving to library...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Success! Saved 1 news chunks to your local library.


In [9]:
!pip install -qU langchain-chroma

In [10]:
import os
from google.colab import userdata
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma  # <--- UPDATED IMPORT
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from pydantic import BaseModel, Field

# 1. INITIALIZE BRAIN AND LIBRARY
os.environ["GROQ_API_KEY"] = userdata.get('GROQ_API_KEY')

# UPDATED MODEL: Using the newer Llama 3.3 model
llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0)

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# UPDATED CHROMA: Loading the library using the new package
vector_db = Chroma(persist_directory="./finance_db", embedding_function=embeddings)
retriever = vector_db.as_retriever(search_kwargs={"k": 3})

# 2. DEFINE THE JSON TEMPLATE
class StockAnalysis(BaseModel):
    sentiment: str = Field(description="The market sentiment: Bullish, Bearish, or Neutral")
    summary: str = Field(description="A 1-sentence summary of why the stock is moving")
    risk_level: int = Field(description="Risk score from 1 to 10")

parser = JsonOutputParser(pydantic_object=StockAnalysis)

# 3. THE PROMPT ARCHITECTURE
system_template = """You are a financial analyst. Use the following news context to answer the user's question.
If you don't know the answer based on the news, say 'Information not found'.
You MUST return your answer in valid JSON format.

Context: {context}
{format_instructions}"""

prompt = ChatPromptTemplate.from_template(system_template)

# 4. THE RAG CHAIN
chain = (
    {"context": retriever, "question": lambda x: x}
    | prompt.partial(format_instructions=parser.get_format_instructions())
    | llm
    | parser
)

# 5. ASK A QUESTION
question = "What is the latest outlook on Nvidia based on the news?"
response = chain.invoke(question)

print("--- ANALYST JSON RESPONSE ---")
print(response)

--- ANALYST JSON RESPONSE ---
{'sentiment': 'Neutral', 'summary': "Universal Music Group is partnering with Nvidia to bring a new AI model to one of the world's largest music catalogs", 'risk_level': 5}


In [11]:
import ipywidgets as widgets
from IPython.display import display, clear_output

# 1. CREATE UI COMPONENTS
text_input = widgets.Text(
    placeholder='Ask about a stock (e.g., "What is the outlook for Nvidia?")',
    description='Question:',
    layout=widgets.Layout(width='70%')
)
button = widgets.Button(description="Analyze News", button_style='success')
output = widgets.Output()

# 2. DEFINE THE CHAT LOGIC
def on_button_clicked(b):
    with output:
        clear_output()
        question = text_input.value
        if not question:
            print("Please enter a question!")
            return

        print(f"🔍 Searching library for: {question}...")
        try:
            # Using the 'chain' we built in the previous step
            response = chain.invoke(question)

            print("\n--- 📈 FINANCIAL ANALYSIS ---")
            print(f"SENTIMENT: {response['sentiment']}")
            print(f"RISK LEVEL: {response['risk_level']}/10")
            print(f"SUMMARY: {response['summary']}")

        except Exception as e:
            print(f"❌ Error: {str(e)}")

# 3. CONNECT BUTTON TO LOGIC
button.on_click(on_button_clicked)

# 4. DISPLAY EVERYTHING
display(widgets.VBox([widgets.HBox([text_input, button]), output]))

VBox(children=(HBox(children=(Text(value='', description='Question:', layout=Layout(width='70%'), placeholder=…