In [1]:
import yfinance as yf
import pandas as pd
from pprint import pprint
from markitdown import MarkItDown
import requests
import re
import traceback

In [39]:
def get_news(stock: str) -> list:
    """
    Fetch relevant news articles for a given stock ticker.

    Parameters:
    - stock (str): The stock ticker symbol.

    Returns:
    - list: A list of dictionaries containing title, summary, URL, and publication date of relevant news articles.
    """
    try:
        # Fetch the ticker object and retrieve its news
        ticker = yf.Ticker(stock)
        news = ticker.news

        if not news:
            print(f"No news found for {stock}.")
            return []

        # Filter news with contentType='STORY'
        relevant_news = [
            item for item in news if item.get('content', {}).get('contentType') == 'STORY'
        ]

        all_news = []
        for i, item in enumerate(relevant_news):
            try:
                content = item.get('content', {})
                current_news = {
                    'title': content.get('title'),
                    'summary': content.get('summary'),
                    'url': content.get('canonicalUrl', {}).get('url'),
                    'pubdate': content.get('pubDate', '').split('T')[0],
                }
                all_news.append(current_news)
            except Exception as e:
                print(f"Error processing news {i}: {e}")
                continue

        return all_news

    except Exception as e:
        print(f"An error occurred while fetching news for {stock}: {e}")
        return None


    
news = get_news('SOFI')
news[1]

{'title': "This Cathie Wood Fintech Stock Just Hit a New 52-Week High -- but I'm Not Selling a Single Share",
 'summary': "Cathie Wood's ARK Invest offers several popular exchange-traded funds (ETFs), and they tend to be rather concentrated, with all of them holding three dozen or fewer stocks.  The banking innovator is the sixth-largest holding in the ARK Fintech Innovation ETF (NYSEMKT: ARKF), making up 5% of the fund's total assets.  You'll also find about $95 million worth of SoFi stock in the flagship ARK Innovation ETF (NYSEMKT: ARKK), and it's also worth noting that the SoFi app is the exclusive distribution partner for the ARK Venture Fund (NASDAQMUTFUND: ARKVX), which allows investors to get exposure to companies like SpaceX and OpenAI before their initial public offering.",
 'url': 'https://www.fool.com/investing/2025/01/25/this-cathie-wood-fintech-stock-just-hit-a-new-52-w/?source=eptyholnk0000202&utm_source=yahoo-host-full&utm_medium=feed&utm_campaign=article&referring_guid

In [6]:
# Create a session for reliable requests
session = requests.Session()
session.headers.update({'User-Agent': 'python-requests/2.32.3', 'Accept-Encoding': 'gzip, deflate', 'Accept': '*/*', 'Connection': 'keep-alive'})

# Initialize MarkItDown
md = MarkItDown(requests_session=session)

In [3]:
# Function to clean unnecessary links and special characters
def remove_links(text):
    text = re.sub(r'http\S+', '', text)  # Remove URLs
    text = re.sub(r'\[.*?\]', '', text)  # Remove markdown-style links
    text = re.sub(r'[#*()+\-\n]', '', text)  # Remove special characters
    text = re.sub(r'/\S*', '', text)  # Remove slashes
    text = re.sub(r'  ', '', text)  # Remove double spaces
    return text

In [42]:
# Function to extract news content from a URL
def extract_news(link):
    # Use MarkItDown to extract the content
    information_to_extract = md.convert(link)
    text_title = information_to_extract.title.strip()  # Extract title
    text_content = information_to_extract.text_content.strip()  # Extract main content
    
    # Clean and combine the title and content
    return text_title + '\n' + remove_links(text_content)

# pprint(extract_news(news[1]['url']))

In [34]:
def extract_full_news(stock: str) -> list:
    """
    Fetch full news articles.

    Parameters:
    - stock (str): The stock ticker symbol.

    Returns:
    - list: A list of dictionaries containing full_news of relevant news articles.
    """
    # Step 1: Fetch news using the get_news function
    news = get_news(stock)
    
    # Step 2: Iterate through each news article
    for i, item in enumerate(news):
        try:
            # Step 3: Extract the full news content using the URL
            full_news = extract_news(item['url'])
            item['full_news'] = full_news
        except Exception as e:
            # Step 4: Handle errors gracefully
            print(f"Error extracting news {i}: {e}")
            continue

    # Step 5: Return the list of enriched news articles
    return news


In [35]:
stock = 'SOFI'
full_news = extract_full_news('SOFI')

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM

# Step 1: Initialize the LLM with DeepSeek-R1 model
llm = OllamaLLM(model="deepseek-r1:1.5b")

# Step 2: Define the prompt template
PROMPT = """
You are an expert financial analyst. I will provide you with a list of news articles related to a specific stock. Your tasks are as follows:

1. **Sentiment Analysis:**
   - For each news article, evaluate its sentiment as 'Positive', 'Negative', or 'Neutral'.
   - Present your evaluation in a dictionary format where each key is the article's title, and the corresponding value is the assessed sentiment.

2. **Comprehensive Summary and Investment Recommendation:**
   - After analyzing all the articles, provide a concise summary that encapsulates the overall sentiment and key points from the news.
   - Based on this summary, advise whether investing in the stock is advisable at this time, supporting your recommendation with reasons derived from the news analysis.

**News Articles:**  

{articles}

**Output Format:**  

1. **Sentiment Analysis Dictionary:**  

   ```json
   {{
       "Article Title 1": "Positive",
       "Article Title 2": "Negative",
       "Article Title 3": "Neutral",
       ...
   }}
2. Summary: [Your summary here]
3. Investment Recommendation: [Your recommendation here]
"""

# Step 3: Create a ChatPromptTemplate
prompt_template = ChatPromptTemplate.from_messages(
    [
        ('system', PROMPT),
        ('human', "I would like to analyze the news articles related to the stock {stock}.")
    ]
)

# Step 4: Create a pipeline with LangGraph for orchestration
structure = prompt_template | llm

# Step 5: Invoke the pipeline with the news articles and stock data
result = structure.invoke(
    {
        "stock": stock,
        "articles": [fn['full_news'] for fn in full_news]  # Pass the full news content
    }
)


In [38]:
pprint(result)

('<think>\n'
 'Alright, so I need to analyze some news articles about the stock SOFI. '
 'First, let me look at each article provided and try to extract relevant '
 'information.\n'
 '\n'
 "Starting with Mike Bailey's CNBC interview from January 23, 2025. He "
 'mentioned that the macroeconomic conditions in the U.S., especially job '
 "growth beyond 2025, are positive. So, I can note that he's confident large "
 'caps outperform small caps for long-term growth. That seems like a key '
 'point.\n'
 '\n'
 "Looking at SOFI's performance: they had a 12.5% return over 30 days from "
 '$15.63 to $17.59. Their revenue grew by 30%, mainly due to two segments '
 'increasing their market share. The customer base is over 10 million, and '
 "they've raised funding of $73 billion in loans.\n"
 '\n'
 "Next, I should check if there's any conflict between Bailey's optimism and "
 "SOFI's positive stock performance. His confidence in large caps being better "
 'positioned for earnings growth might sug

In [25]:
from pydantic import BaseModel, Field
from typing import Dict, Optional

class news(BaseModel):
    news_rating: Optional[Dict[str, str]] = Field(..., description="Dictionary of news rating")
    overall_news_summary: str = Field(..., description="Overall news summary")
    investment_recommendation: str = Field(..., description="Investment recommendation based on news analysis")

from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4o-mini")


llm_with_structure = llm.with_structured_output(news)

PROMPT = """
You are an expert financial analyst. I will provide you with a list of news articles related to a specific stock. Your tasks are as follows:

1. **Sentiment Analysis:**
   - For each news article, evaluate its sentiment as 'Positive', 'Negative', or 'Neutral'.
   - Present your evaluation in a dictionary format where each key is the article's title, and the corresponding value is the assessed sentiment.

2. **Comprehensive Summary and Investment Recommendation:**
   - After analyzing all the articles, provide a concise summary that encapsulates the overall sentiment and key points from the news.
   - Based on this summary, advise whether investing in the stock is advisable at this time, supporting your recommendation with reasons derived from the news analysis.

**News Articles:**

{articles}

**Output Format:**

1. **Sentiment Analysis Dictionary:**

   ```json
   {{
       "Article Title 1": "Positive",
       "Article Title 2": "Negative",
       "Article Title 3": "Neutral",
       ...
   }}
2. Summary: [Your summary here]
3. Investment Recommendation: [Your recommendation here]
"""

from langchain_core.prompts import ChatPromptTemplate

prompt_template = ChatPromptTemplate.from_messages(
    [('system', PROMPT),
    ('human', "I would like to analyze the news articles related to the stock {stock}.")]
)

stucture = prompt_template | llm_with_structure

result = stucture.invoke(
    {
        "stock":     stock,
        "articles": [d['full_news_extracted'] for d in dd]
    }
)

    

In [26]:
result.news_rating, result.overall_news_summary, result.investment_recommendation

({"This Cathie Wood Fintech Stock Just Hit a New 52-Week High -- but I'm Not Selling a Single Share": 'Positive',
  'SoFi and Affirm: Top Analyst Chooses the Best Digital Financial Stocks to Buy': 'Positive',
  'SoFi Gears Up for Strong Q4, Loan Growth and Financial Services in the Spotlight': 'Positive',
  'Pre-Q4 Earnings: Is SoFi Technologies Stock a Portfolio Must Have?': 'Positive',
  "SOFI Technologies to Report Q4 Earnings: Here's What to Expect": 'Positive',
  'Stocks to watch next week: ASML, Intel, LVMH, Shell and Glencore': 'Neutral',
  'Where Will SoFi Stock Be in 5 Years?': 'Positive',
  'Zacks Investment Ideas feature highlights WisdomTree and SoFi Technologies': 'Positive',
  'Why SoFi Technologies, Inc. (SOFI) Is Skyrocketing Now': 'Positive',
  '2 Cheap Growth Stocks to Buy Now for Under $20': 'Positive'},
 "The recent news articles about SoFi Technologies reflect a strong positive sentiment regarding its performance and growth prospects. Analysts highlight SoFi's impr

In [27]:
pprint(result.overall_news_summary)

('The recent news articles about SoFi Technologies reflect a strong positive '
 'sentiment regarding its performance and growth prospects. Analysts highlight '
 "SoFi's impressive revenue growth, expanding member base, and successful "
 'diversification into financial services beyond traditional lending. The '
 'company is expected to report strong earnings in the upcoming quarter, with '
 'significant year-over-year growth anticipated. The positive outlook from '
 "analysts and the market's favorable response to SoFi's recent achievements "
 "indicate a robust confidence in the company's future.")


In [28]:
pprint(result.investment_recommendation)

('Given the overwhelmingly positive sentiment surrounding SoFi Technologies, '
 'along with its strong financial performance and growth trajectory, it is '
 "advisable to invest in the stock at this time. The company's expansion into "
 'diverse financial services, coupled with favorable market conditions and '
 'analyst recommendations, presents a compelling case for potential long-term '
 'gains.')
