In [2]:
# !uv add langchain
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from langchain_core.prompts import PromptTemplate
from typing import List, Dict

In [3]:
# Usage
news = """Apple Inc. reported quarterly earnings that beat Wall Street expectations, 
with revenue up 8% year-over-year driven by strong iPhone sales in emerging markets."""

In [4]:
class FinBERTSentimentAnalyzer:
    def __init__(self):
        """initialize FinBERT model"""
        model_name = "ProsusAI/finbert"
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
        self.labels = ["positive", "negative", "neutral"]
    
    def analyze(self, news_text: str) -> Dict[str, any]:
        """Analyze sentiment with probabilities"""
        inputs = self.tokenizer(news_text, return_tensors="pt",
                                truncation=True, max_length=512)
        
        with torch.no_grad():
            outputs = self.model(**inputs)
            predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
        
        scores = predictions[0].tolist()
        sentiment_dict = {label: score for label, score in zip(self.labels, scores)}
        
        # Get primary sentiment
        primary_sentiment = self.labels[scores.index(max(scores))]
        confidence = max(scores)
        
        return {
            "sentiment": primary_sentiment,
            "confidence": confidence,
            "scores": sentiment_dict,
            "text": news_text
        }
    
    def batch_analyze(self, news_list: List[str]) -> List[Dict]:
        """Analyze multiple news items"""
        return [self.analyze(news) for news in news_list]


In [None]:

# Usage
news = """Apple Inc. reported quarterly earnings that beat Wall Street expectations, 
with revenue up 8% year-over-year driven by strong iPhone sales in emerging markets."""

analyzer = FinBERTSentimentAnalyzer()
result = analyzer.analyze(news)
print(f"Sentiment: {result['sentiment']}")
print(f"Confidence: {result['confidence']:.2%}")
print(f"All scores: {result['scores']}")

In [None]:
# from langchain_core.chains import LLMChain
from langchain_ollama import OllamaLLM
from langchain_core.prompts import PromptTemplate

class HybridFinancialAnalyzer:
    def __init__(self):
        self.finbert = FinBERTSentimentAnalyzer()
        self.llm = OllamaLLM(model="minimax-m2:cloud", temperature=0.1)
        
        self.explanation_prompt = PromptTemplate(
            input_variables=["news_text", "sentiment", "confidence"],
            template="""The following financial news has been classified as {sentiment} with {confidence:.1%} confidence:

News: {news_text}

Provide a detailed analysis explaining:
1. Why this sentiment classification makes sense
2. Key financial indicators or events mentioned
3. Potential market implications
4. Any risks or uncertainties

Analysis:"""
        )
        
        # self.chain = LLMChain(llm=self.llm, prompt=self.explanation_prompt)
    
    def analyze(self, news_text: str) -> Dict:
        # Get quick sentiment from FinBERT
        print("Analyzing sentiment with FinBERT...")
        finbert_result = self.finbert.analyze(news_text)

        # Create the chain using the | operator (modern LangChain syntax)
        print("Generating detailed analysis with LLM...")
        chain = self.explanation_prompt | self.llm
        # Get detailed explanation from LLM
        explanation = chain.invoke({
            "news_text": news_text,
            "sentiment": finbert_result['sentiment'],
            "confidence": finbert_result['confidence']
        })
        print("Analysis complete!")
        return {
            **finbert_result,
            "detailed_analysis": explanation
        }
# print(result)

In [14]:
# Usage
hybrid_analyzer = HybridFinancialAnalyzer()
result = hybrid_analyzer.analyze(news)
print(f"Sentiment: {result['sentiment']}")
print(f"Confidence: {result['confidence']:.2%}")
print(f"All scores: {result['scores']}")
print(result['detailed_analysis'])

Analyzing sentiment with FinBERT...
Generating detailed analysis with LLM...
Analysis complete!
Sentiment: positive
Confidence: 95.90%
All scores: {'positive': 0.9590075612068176, 'negative': 0.02066086418926716, 'neutral': 0.020331567153334618}
## Analysis of Apple Earnings News Classification

### 1. Why This Sentiment Classification Makes Sense

The 95.9% confidence positive classification is justified for several compelling reasons:

**Fundamental Beat**: Apple "beat Wall Street expectations" represents a fundamental positive surprise for investors, as beating earnings estimates typically drives stock prices higher and signals company management is outperforming market consensus.

**Growth Confirmation**: The 8% year-over-year revenue increase demonstrates continuous business growth, which is particularly important for a mature, large-cap company like Apple where investors often worry about growth deceleration.

**Market Expansion**: Strong iPhone sales in "emerging markets" (likel

In [15]:
import requests
from datetime import datetime, timedelta

class NewsRetriever:
    def get_financial_news(self, query: str = "stocks", days_back: int = 1):
        """
        Free news sources you can use:
        - NewsAPI (free tier)
        - RSS feeds from financial sites
        - Alpha Vantage news API
        """
        # Example with RSS (completely free)
        import feedparser
        
        rss_feeds = [
            "https://feeds.finance.yahoo.com/rss/2.0/headline",
            "https://www.cnbc.com/id/100003114/device/rss/rss.html",
        ]
        
        articles = []
        for feed_url in rss_feeds:
            feed = feedparser.parse(feed_url)
            for entry in feed.entries[:5]:  # Get top 5 from each
                articles.append({
                    'title': entry.title,
                    'summary': entry.get('summary', ''),
                    'link': entry.link,
                    'published': entry.get('published', '')
                })
        
        return articles


In [16]:

# Complete workflow
news_retriever = NewsRetriever()
analyzer = HybridFinancialAnalyzer()

# Get news and analyze
news_items = news_retriever.get_financial_news()
for item in news_items:
    text = f"{item['title']}. {item['summary']}"
    sentiment = analyzer.analyze(text)
    print(f"\nNews: {item['title']}")
    print(f"Sentiment: {sentiment['sentiment']} ({sentiment['confidence']:.1%})")
    llm_result = analyzer.analyze(text)
    print(llm_result['detailed_analysis'])

Analyzing sentiment with FinBERT...
Generating detailed analysis with LLM...
Analysis complete!

News: Shutdown means another missed jobs report Friday. Here's what it probably would have shown
Sentiment: negative (96.1%)
Analyzing sentiment with FinBERT...
Generating detailed analysis with LLM...
Analysis complete!
**Analysis of Negative Sentiment Classification (96.1% Confidence)**

## 1. Why This Sentiment Classification Makes Sense

The 96.1% negative confidence classification is highly justified because this news represents a significant institutional failure with multiple negative implications:

- **Governance Disruption**: A "record-long government shutdown" signals systemic dysfunction in federal operations
- **Information Asymmetry**: The absence of official jobs data creates a critical information void in financial markets
- **Economic Monitoring Failure**: Jobs reports are among the most closely watched economic indicators; their absence hampers market participants' ability 

In [17]:
import requests
from bs4 import BeautifulSoup

In [18]:
page = requests.get('https://www.moneycontrol.com/')
soup = BeautifulSoup(page.content, 'html.parser')

In [19]:
weblinks = soup.find_all('article')

In [34]:
page = requests.get('https://qz.com/africa/latest') 
soup = BeautifulSoup(page.content, 'html.parser')
weblinks = soup.find_all('a', class_="hover:text-neutral-500")
# pagelinks = []

# for link in weblinks[5:]:    
#     url = link.contents[0].find_all('a')[0]   
#     pagelinks.append('http://qz.com'+url.get('href'))

In [None]:
<div class="basis-0 grow"><a class="hover:text-neutral-500" href="/nvidia-blackwell-ai-chips-china-jensen-huang"><h4 class="font-bold mb-1 text-xl md:text-xl md:font-semibold"><span>Nvidia isn't going back to China anytime soon, CEO signals</span></h4><p class="font-sans text-sm text-ellipsis overflow-hidden">Jensen Huang had previously said that Nvidia's business in China is now “100% out,” describing a collapse from “95% market share to 0%”</p></a></div>

In [28]:
import requests
from bs4 import BeautifulSoup

def extract_qz_articles():
    page = requests.get('https://qz.com/africa/latest')
    soup = BeautifulSoup(page.content, 'html.parser')
    
    articles = []
    
    # Target the specific div structure you showed
    article_divs = soup.find_all('div', class_='basis-0 grow')
    
    for div in article_divs:
        # Find the link within this div
        link = div.find('a', href=True)
        
        if link:
            # Extract href
            href = link.get('href')
            full_url = f'https://qz.com{href}' if href.startswith('/') else href
            
            # Extract title from the span inside h4
            title_element = link.find('h4')
            title = ''
            if title_element:
                span = title_element.find('span')
                title = span.get_text(strip=True) if span else title_element.get_text(strip=True)
            
            # Extract summary from the p tag
            summary_element = link.find('p')
            summary = summary_element.get_text(strip=True) if summary_element else ''
            
            if title:  # Only add if we found a title
                articles.append({
                    'title': title,
                    'summary': summary,
                    'url': full_url,
                    'href': href
                })
    
    return articles

# Usage
articles = extract_qz_articles()
for i, article in enumerate(articles[:5]):
    print(f"{i+1}. Title: {article['title']}")
    print(f"   Summary: {article['summary'][:100]}...")
    print(f"   URL: {article['url']}\n")