[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Hawksight-AI/semantica/blob/main/cookbook/use_cases/trading/02_News_Sentiment_Analysis.ipynb)

# News Sentiment Analysis - Semantic Extraction & Correlation

## Overview

This notebook demonstrates **news sentiment analysis** using Semantica with focus on **sentiment extraction**, **correlation analysis**, **financial KG**, and **price movement prediction**. The pipeline correlates news sentiment with price movements using financial knowledge graphs.

### Key Features

- **Sentiment Extraction**: Extracts sentiment from financial news articles
- **Correlation Analysis**: Correlates sentiment with price movements
- **Financial Knowledge Graph**: Builds financial KGs for correlation analysis
- **Price Movement Prediction**: Predicts price movements based on sentiment
- **Semantic Extraction**: Emphasizes semantic extraction and correlation analysis

### Pipeline Architecture

1. **Phase 0**: Setup & Configuration
2. **Phase 1**: Financial News Ingestion
3. **Phase 2**: Sentiment Extraction
4. **Phase 3**: Financial Entity Extraction (Article, Sentiment, Price, Correlation, Trend)
5. **Phase 4**: Financial Knowledge Graph Construction
6. **Phase 5**: Sentiment-Price Correlation Analysis
7. **Phase 6**: Price Movement Prediction
8. **Phase 7**: Visualization & Export

---

## Installation


In [None]:
%pip install -qU semantica networkx matplotlib plotly pandas groq


---

## Phase 0: Setup & Configuration


In [None]:
import os
from semantica.core import Semantica, ConfigManager
from semantica.semantic_extract import SentimentAnalyzer

os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY", "your-key")

config_dict = {
    "project_name": "News_Sentiment_Analysis",
    "extraction": {"provider": "groq", "model": "llama-3.1-8b-instant"},
    "knowledge_graph": {"backend": "networkx"}
}

config = ConfigManager().load_from_dict(config_dict)
core = Semantica(config=config)
sentiment_analyzer = SentimentAnalyzer()
print("Configured for news sentiment analysis with semantic extraction focus")


---

## Phase 1: Real Data Ingestion (Financial RSS Feeds)

Ingest financial news from RSS feeds.


In [None]:
from semantica.ingest import FeedIngestor, FileIngestor
from semantica.normalize import TextNormalizer
from semantica.split import TextSplitter
from semantica.context import AgentContext
from semantica.vector_store import VectorStore
import os

os.makedirs("data", exist_ok=True)

# Ingest from financial RSS feeds
financial_feeds = [
    # Add financial news RSS feeds here
]

documents = []
for feed_url in financial_feeds:
    try:
        feed_ingestor = FeedIngestor()
        feed_documents = feed_ingestor.ingest(feed_url, method="rss")
        documents.extend(feed_documents)
    except Exception as e:
        print(f"Feed ingestion failed: {e}")

# Fallback: Sample data
if not documents:
    news_data = """
    2024-01-01: Positive news about AAPL earnings beat expectations. Stock price rose 5%.
    2024-01-02: Negative sentiment: MSFT faces regulatory concerns. Stock price dropped 3%.
    2024-01-03: Mixed sentiment: GOOGL announces new product. Stock price stable.
    Correlation: Positive news correlates with price increases. Negative news correlates with price drops.
    """
    with open("data/financial_news.txt", "w") as f:
        f.write(news_data)
    documents = FileIngestor().ingest("data/financial_news.txt")
    print(f"Ingested {len(documents)} documents from sample data")

# Normalize sentiment data
normalizer = TextNormalizer()
normalized_documents = []
for doc in documents:
    normalized_text = normalizer.normalize(
        doc.content if hasattr(doc, 'content') else str(doc),
        clean_html=True,
        normalize_entities=True,
        remove_extra_whitespace=True
    )
    normalized_documents.append(normalized_text)

print(f"Normalized {len(normalized_documents)} documents")

# Use semantic_transformer or entity_aware chunking for semantic article chunking
splitter = TextSplitter(method="semantic_transformer", chunk_size=1000, similarity_threshold=0.7)
# Alternative: entity_aware for financial entity preservation
# splitter = TextSplitter(method="entity_aware", ner_method="llm", chunk_size=1000)

chunked_docs = []
for doc_text in normalized_documents:
    chunks = splitter.split(doc_text)
    chunked_docs.extend([chunk.content if hasattr(chunk, 'content') else str(chunk) for chunk in chunks])

print(f"Created {len(chunked_docs)} semantic chunks")

# Extract sentiment
sentiments = sentiment_analyzer.analyze_sentiment([doc for doc in chunked_docs])

# Build financial knowledge graph with sentiment
result = core.build_knowledge_base(
    sources=chunked_docs,
    custom_entity_types=["Article", "Sentiment", "Price", "Correlation", "Trend"],
    graph=True,
    embeddings=True
)

kg = result["knowledge_graph"]

# Setup enhanced GraphRAG
vector_store = VectorStore(backend="faiss", dimension=384)
if result.get("embeddings"):
    vector_store.store_vectors(
        vectors=result["embeddings"]["vectors"],
        metadata=result["embeddings"]["metadata"]
    )
context = AgentContext(vector_store=vector_store, knowledge_graph=kg)

print(f"Built financial KG with {len(kg.get('entities', []))} entities")
print(f"Extracted {len(sentiments)} sentiment scores")
print("Focus: Sentiment extraction, enhanced GraphRAG, correlation analysis, financial KG, price movement prediction")


In [None]:
# Correlate sentiment with price movements
from semantica.reasoning import GraphReasoner

reasoner = GraphReasoner(kg)
correlations = reasoner.find_correlations(
    source_types=["Sentiment"],
    target_types=["Price"]
)

# Identify sentiment-price correlations
sentiment_price_rels = [r for r in kg.get("relationships", []) 
                        if "correlate" in str(r.get("predicate", "")).lower() or
                        "sentiment" in str(r.get("predicate", "")).lower()]

print(f"Correlation analysis: {len(correlations)} sentiment-price correlations found")
print(f"Sentiment-price relationships: {len(sentiment_price_rels)} relationships identified")
print("This cookbook emphasizes semantic extraction and correlation analysis")


---

## Phase 7: Visualization


In [None]:
from semantica.visualization import KGVisualizer

visualizer = KGVisualizer()
visualizer.visualize(kg, output_path="news_sentiment_kg.html")

print("News sentiment analysis complete")
print("Emphasizes: Sentiment extraction, correlation analysis, financial KG, price movement prediction")
