<a href="https://colab.research.google.com/github/navneethiu/GenAI_Architect_Azure/blob/main/Navneeth_Assignment1_Market_Sentiment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
pip install -r /content/sample_data/requirements.txt



In [11]:
import os
import json
import mlflow
import yfinance as yf
from typing import Dict, List, Optional
from pydantic import BaseModel, Field
from langchain_openai import AzureChatOpenAI
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import PydanticOutputParser
from langchain_community.tools.yahoo_finance_news import YahooFinanceNewsTool
from langchain.chains import LLMChain
from langchain.schema import BaseOutputParser
import requests
from datetime import datetime, timedelta
import time
import logging

In [12]:
# Load environment variables from .env file
from dotenv import load_dotenv
load_dotenv('/content/sample_data/.env')

# Load environment variables
azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
azure_api_key = os.getenv("AZURE_OPENAI_API_KEY")
azure_deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT") # Add this line
azure_api_version = os.getenv("AZURE_OPENAI_API_VERSION") # Add this line
openai_api_type = os.getenv("OPENAI_API_TYPE") # Add this line




In [13]:
# Pydantic model for structured output
class SentimentAnalysis(BaseModel):
    company_name: str = Field(description="Name of the company")
    stock_code: str = Field(description="Stock ticker symbol")
    newsdesc: str = Field(description="Summary of news articles")
    sentiment: str = Field(description="Overall sentiment: Positive/Negative/Neutral")
    people_names: List[str] = Field(description="Names of people mentioned in news")
    places_names: List[str] = Field(description="Places mentioned in news")
    other_companies_referred: List[str] = Field(description="Other companies mentioned")
    related_industries: List[str] = Field(description="Related industries mentioned")
    market_implications: str = Field(description="Potential market implications")
    confidence_score: float = Field(description="Confidence score between 0 and 1")

In [14]:
class MarketSentimentAnalyzer:
    def __init__(self, azure_endpoint: str, azure_api_key: str, azure_deployment: str, openai_api_version: str, openai_api_type: str):
        """Initialize the Market Sentiment Analyzer"""
        self.azure_endpoint = azure_endpoint
        self.azure_api_key = azure_api_key
        self.azure_deployment = azure_deployment
        self.openai_api_version = openai_api_version
        self.openai_api_type = openai_api_type

        # Print the values being passed to the MarketSentimentAnalyzer constructor
        print(f"Values passed to MarketSentimentAnalyzer:") # Add this line
        print(f"  azure_endpoint: {azure_endpoint}") # Add this line
        print(f"  azure_api_key: {azure_api_key}") # Add this line
        print(f"  azure_deployment: {azure_deployment}") # Add this line
        print(f"  azure_api_version: {azure_api_version}") # Add this line
        print(f"  openai_api_type: {openai_api_type}") # Add this line


        self.llm = AzureChatOpenAI(
              azure_endpoint=azure_endpoint,
              openai_api_key=azure_api_key,
              azure_deployment=azure_deployment,
              openai_api_version=openai_api_version,
              openai_api_type=openai_api_type,
              temperature=0.1,
              streaming=False,
          )

        # Initialize Yahoo Finance News tool
        self.news_tool = YahooFinanceNewsTool()

        # Initialize MLflow
        mlflow.set_tracking_uri("http://20.75.92.162:5000/")
        mlflow.set_experiment("navneeth-market-sentiment-analysis")

        # Output parser
        self.output_parser = PydanticOutputParser(pydantic_object=SentimentAnalysis)

        # Create the sentiment analysis prompt
        self.sentiment_prompt = PromptTemplate(
            template="""
            You are a financial analyst expert in market sentiment analysis.

            Analyze the following news articles about {company_name} (Stock: {stock_code}) and provide a structured sentiment analysis.

            News Content:
            {news_content}

            Instructions:
            1. Determine the overall sentiment (Positive/Negative/Neutral)
            2. Extract people names mentioned
            3. Extract places/locations mentioned
            4. Identify other companies referenced
            5. Identify related industries
            6. Analyze potential market implications
            7. Provide a confidence score (0.0 to 1.0)

            {format_instructions}

            Provide your analysis as a JSON object following the exact structure specified.
            """,
            input_variables=["company_name", "stock_code", "news_content"],
            partial_variables={"format_instructions": self.output_parser.get_format_instructions()}
        )

        # Create LangChain chain
        self.sentiment_chain = LLMChain(
            llm=self.llm,
            prompt=self.sentiment_prompt,
            output_parser=self.output_parser
        )

    def get_stock_code(self, company_name: str) -> Optional[str]:
        """Extract stock code from company name using yfinance"""
        with mlflow.start_span(name="stock_code_extraction") as span:
            try:
                # Common company name to ticker mappings
                common_tickers = {
                    "apple": "AAPL", "microsoft": "MSFT", "google": "GOOGL",
                    "amazon": "AMZN", "tesla": "TSLA", "meta": "META",
                    "nvidia": "NVDA", "netflix": "NFLX", "disney": "DIS"
                }

                company_lower = company_name.lower()
                for key, ticker in common_tickers.items():
                    if key in company_lower:
                        span.set_attribute("method", "common_lookup")
                        span.set_attribute("ticker_found", ticker)
                        return ticker

                # Try to search for ticker using yfinance
                try:
                    # Search for the company
                    ticker_obj = yf.Ticker(company_name)
                    info = ticker_obj.info
                    if 'symbol' in info:
                        span.set_attribute("method", "yfinance_lookup")
                        span.set_attribute("ticker_found", info['symbol'])
                        return info['symbol']
                except:
                    pass

                span.set_attribute("ticker_found", None)
                return None

            except Exception as e:
                span.set_attribute("error", str(e))
                mlflow.log_param("stock_extraction_error", str(e))
                return None

    def fetch_company_news(self, company_name: str, stock_code: str) -> str:
        """Fetch recent news about the company using Yahoo Finance"""
        with mlflow.start_span(name="news_fetching") as span:
            try:
                span.set_attribute("stock_code", stock_code)
                span.set_attribute("company_name", company_name)

                if not stock_code or stock_code == "N/A":
                    logging.warning(f"No valid stock code for {company_name}, using company name for news search")
                    # Fallback: try to get news without ticker - Yahoo Finance tool may still work
                    news_content = "No stock ticker available for news lookup."
                    span.set_attribute("news_method", "no_ticker_fallback")
                    return news_content

                # Use Yahoo Finance news tool - much more reliable than web search
                try:
                    # The tool expects just the ticker symbol
                    news_result = self.news_tool.run(stock_code)
                    span.set_attribute("news_method", "yahoo_finance_success")

                    # Yahoo Finance tool returns structured news data
                    if news_result and len(str(news_result)) > 50:
                        news_content = str(news_result)
                        # Trim if too long to avoid token limits
                        if len(news_content) > 2000:
                            news_content = news_content[:2000] + "... [content truncated for processing]"
                    else:
                        news_content = f"Limited news data available for {company_name} ({stock_code})"

                except Exception as news_error:
                    logging.warning(f"Yahoo Finance news fetch failed for {stock_code}: {news_error}")
                    span.set_attribute("news_error", str(news_error))

                    # Fallback: try with yfinance directly for basic info
                    try:
                        ticker = yf.Ticker(stock_code)
                        # Get basic company info as fallback
                        info = ticker.info
                        news_content = f"Company: {info.get('longName', company_name)}. "
                        news_content += f"Sector: {info.get('sector', 'Unknown')}. "
                        news_content += f"Industry: {info.get('industry', 'Unknown')}. "
                        news_content += f"Market Cap: {info.get('marketCap', 'N/A')}. "
                        news_content += "Note: Recent news data unavailable - analysis based on company profile."
                        span.set_attribute("news_method", "yfinance_fallback")
                    except:
                        news_content = f"Unable to fetch news data for {company_name} ({stock_code}). Analysis will be limited."
                        span.set_attribute("news_method", "complete_fallback")

                span.set_attribute("news_content_length", len(news_content))
                mlflow.log_param("news_content_length", len(news_content))

                logging.info(f"Fetched {len(news_content)} characters of news content for {stock_code}")
                return news_content

            except Exception as e:
                span.set_attribute("error", str(e))
                mlflow.log_param("news_fetch_error", str(e))
                logging.error(f"Error fetching news: {e}")
                return f"News fetch failed for {company_name}. Error: {str(e)[:100]}..."

    def analyze_sentiment(self, company_name: str, stock_code: str, news_content: str) -> Dict:
        """Analyze sentiment using Azure OpenAI LLM"""
        with mlflow.start_span(name="sentiment_analysis") as span:
            try:
                # Run the LangChain chain with retries
                max_retries = 2
                for attempt in range(max_retries):
                    try:
                        result = self.sentiment_chain.run(
                            company_name=company_name,
                            stock_code=stock_code,
                            news_content=news_content
                        )
                        break
                    except Exception as e:
                        if "rate limit" in str(e).lower() or "quota" in str(e).lower():
                            if attempt < max_retries - 1:
                                logging.warning(f"Rate limit hit, waiting 5 seconds...")
                                time.sleep(5)
                                continue
                        raise e

                # Handle different return types - the parser doesn't always work perfectly
                if isinstance(result, SentimentAnalysis):
                    result_dict = result.dict()
                elif isinstance(result, dict):
                    result_dict = result
                else:
                    # Sometimes the LLM returns a string that needs parsing
                    try:
                        result_dict = json.loads(str(result))
                    except:
                        raise ValueError(f"Could not parse LLM output: {result}")

                # Validate required fields are present
                required_fields = ["company_name", "stock_code", "sentiment", "confidence_score"]
                for field in required_fields:
                    if field not in result_dict:
                        result_dict[field] = "Unknown" if field != "confidence_score" else 0.0

                span.set_attribute("sentiment", result_dict.get("sentiment", "Unknown"))
                span.set_attribute("confidence_score", result_dict.get("confidence_score", 0.0))

                logging.info(f"Analysis complete: {result_dict.get('sentiment')} sentiment with {result_dict.get('confidence_score', 0):.2f} confidence")
                return result_dict

            except Exception as e:
                span.set_attribute("error", str(e))
                mlflow.log_param("sentiment_analysis_error", str(e))
                logging.error(f"Sentiment analysis failed: {e}")

                # Return structured fallback when LLM fails
                return {
                    "company_name": company_name,
                    "stock_code": stock_code,
                    "newsdesc": news_content[:200] + "..." if len(news_content) > 200 else news_content,
                    "sentiment": "Neutral",
                    "people_names": [],
                    "places_names": [],
                    "other_companies_referred": [],
                    "related_industries": [],
                    "market_implications": f"Analysis failed due to processing error: {str(e)[:100]}",
                    "confidence_score": 0.0
                }

    def process_company(self, company_name: str) -> Dict:
        """Main processing pipeline"""
        with mlflow.start_run(run_name=f"sentiment_analysis_{company_name}"):
            # Log input parameters
            mlflow.log_param("company_name", company_name)
            mlflow.log_param("timestamp", datetime.now().isoformat())

            try:
                # Step 1: Get stock code
                stock_code = self.get_stock_code(company_name)
                if not stock_code:
                    stock_code = "N/A"

                mlflow.log_param("stock_code", stock_code)

                # Step 2: Fetch news
                news_content = self.fetch_company_news(company_name, stock_code)

                # Step 3: Analyze sentiment
                result = self.analyze_sentiment(company_name, stock_code, news_content)

                # Log results
                mlflow.log_param("sentiment", result.get("sentiment"))
                mlflow.log_param("confidence_score", result.get("confidence_score"))
                mlflow.log_param("market_implications", result.get("market_implications"))

                # Log the complete result as an artifact
                mlflow.log_dict(result, "sentiment_analysis_result.json")

                return result

            except Exception as e:
                mlflow.log_param("pipeline_error", str(e))
                return {
                    "company_name": company_name,
                    "stock_code": "ERROR",
                    "newsdesc": f"Pipeline error: {str(e)}",
                    "sentiment": "Neutral",
                    "people_names": [],
                    "places_names": [],
                    "other_companies_referred": [],
                    "related_industries": [],
                    "market_implications": "Unable to process due to error",
                    "confidence_score": 0.0
                }

In [15]:
def main():
    """Main function for testing - used this during development"""

    # Load environment variables
    azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
    azure_api_key = os.getenv("AZURE_OPENAI_API_KEY")
    azure_deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT") # Add this line
    azure_api_version = os.getenv("AZURE_OPENAI_API_VERSION") # Add this line
    openai_api_type = os.getenv("OPENAI_API_TYPE") # Add this line

    # Initialize analyzer
    print("Initializing analyzer with Yahoo Finance news...")
    analyzer = MarketSentimentAnalyzer(azure_endpoint, azure_api_key, azure_deployment, azure_api_version, openai_api_type)

    # Test with Microsoft - worked well during testing
    test_companies = ["Microsoft", "Apple", "Tesla"]

    for company_name in test_companies:
        print(f"\n{'='*50}")
        print(f"Analyzing sentiment for: {company_name}")
        print(f"{'='*50}")

        start_time = time.time()
        result = analyzer.process_company(company_name)
        end_time = time.time()

        print(f"\nAnalysis completed in {end_time - start_time:.2f} seconds")
        print(f"Sentiment: {result.get('sentiment', 'N/A')}")
        print(f"Confidence: {result.get('confidence_score', 0):.2f}")
        print(f"Stock Code: {result.get('stock_code', 'N/A')}")

        # Save result for review
        filename = f"{company_name.lower()}_analysis.json"
        with open(filename, 'w') as f:
            json.dump(result, f, indent=2)
        print(f"Results saved to {filename}")

        # Brief pause between analyses to avoid rate limits
        if company_name != test_companies[-1]:
            time.sleep(2)

if __name__ == "__main__":
    main()

Initializing analyzer with Yahoo Finance news...
Values passed to MarketSentimentAnalyzer:
  azure_endpoint: https://eastus.api.cognitive.microsoft.com/
  azure_api_key: a66b314ed6934fe3baff2b1393b78751
  azure_deployment: gpt4o
  azure_api_version: 2024-08-01-preview
  openai_api_type: azure


2025/09/17 06:06:20 INFO mlflow.tracking.fluent: Experiment with name 'navneeth-market-sentiment-analysis' does not exist. Creating a new experiment.



Analyzing sentiment for: Microsoft


/tmp/ipython-input-2897799663.py:195: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  result_dict = result.dict()


🏃 View run sentiment_analysis_Microsoft at: http://20.75.92.162:5000/#/experiments/224617341359404100/runs/670d318b285841c19caf0d438d9b230b
🧪 View experiment at: http://20.75.92.162:5000/#/experiments/224617341359404100

Analysis completed in 3.68 seconds
Sentiment: Neutral
Confidence: 0.80
Stock Code: MSFT
Results saved to microsoft_analysis.json

Analyzing sentiment for: Apple


/tmp/ipython-input-2897799663.py:195: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  result_dict = result.dict()


🏃 View run sentiment_analysis_Apple at: http://20.75.92.162:5000/#/experiments/224617341359404100/runs/1757038e763f4f6f9bbcb689f3724405
🧪 View experiment at: http://20.75.92.162:5000/#/experiments/224617341359404100

Analysis completed in 58.32 seconds
Sentiment: Neutral
Confidence: 0.80
Stock Code: AAPL
Results saved to apple_analysis.json

Analyzing sentiment for: Tesla


/tmp/ipython-input-2897799663.py:195: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  result_dict = result.dict()


🏃 View run sentiment_analysis_Tesla at: http://20.75.92.162:5000/#/experiments/224617341359404100/runs/01df71a79281494a8911689077ac8a1b
🧪 View experiment at: http://20.75.92.162:5000/#/experiments/224617341359404100

Analysis completed in 60.45 seconds
Sentiment: Neutral
Confidence: 0.70
Stock Code: TSLA
Results saved to tesla_analysis.json
