In [1]:
!pip install requests transformers pandas datetime vaderSentiment requests yfinance finnhub-python
!pip install nltk
!pip install vaderSentiment
!pip install yahoo-fin
!pip install finvizfinance
!pip install "websockets>=11.0"
!pip install yfinance yahoo_fin nltk textblob matplotlib

Collecting datetime
  Downloading DateTime-5.5-py3-none-any.whl.metadata (33 kB)
Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl.metadata (572 bytes)
Collecting finnhub-python
  Downloading finnhub_python-2.4.23-py3-none-any.whl.metadata (9.2 kB)
Collecting zope.interface (from datetime)
  Downloading zope.interface-7.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
Downloading DateTime-5.5-py3-none-any.whl (52 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.6/52.6 kB[0m [31m344.8 kB/s[0m eta [36m0:00:00[0m
[?25hDownloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading finnhub_python-2.4.23-py3-none-a

In [7]:
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
import pytz
from yahoo_fin import news as yahoo_fin_news
from textblob import TextBlob
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk

# Setup
nltk.download('vader_lexicon')
vader = SentimentIntensityAnalyzer()
tz = pytz.timezone('US/Eastern')
now = datetime.now(tz)
start_window = now - timedelta(days=90)

TICKERS = ["AAPL", "MSFT", "GOOG", "AMZN", "TSLA", "NVDA", "PFE", "MRNA", "DIS", "T", "RYDE", "META", "NFLX", "AVGO", "AMD", "ORCL", "INTC",
           "QCOM", "IBM", "CRM", "JPM", "BAC", "GS", "MS", "WFC", "XOM", "CVX", "COP", "SLB", "PSX", "JNJ", "LLY", "UNH", "ABBV", "BMY",
           "WMT", "COST", "HD", "MCD", "NKE", "ADBE", "SHOP", "TSM", "PLTR", "UBER" ]

INTERVALS = {
    "1 Hour Ago": timedelta(hours=1),
    "4 Hours Ago": timedelta(hours=4),
    "Previous Day Close": timedelta(days=1),
    "7 Days Ago": timedelta(days=7)
}

class NewsAnalyzer:
    def __init__(self, ticker):
        self.ticker = ticker
        self.stock = yf.Ticker(ticker)

    def fetch_articles(self):
        try:
            all_articles = [
                {
                    "Ticker": self.ticker,
                    "Title": art["title"],
                    "Summary": art["summary"],
                    "URL": art["link"],
                    "Datetime": datetime.strptime(art["published"], "%a, %d %b %Y %H:%M:%S %z").astimezone(tz)
                }
                for art in yahoo_fin_news.get_yf_rss(self.ticker)
                if start_window <= datetime.strptime(art["published"], "%a, %d %b %Y %H:%M:%S %z").astimezone(tz) <= now
            ]
            # Sort by datetime (most recent first), and take top 5
            all_articles.sort(key=lambda x: x["Datetime"], reverse=True)
            return all_articles[:5]
        except Exception as e:
            print(f"Error fetching news for {self.ticker}: {e}")
            return []

    def get_price_near(self, timepoint):
        try:
            hist = self.stock.history(
                start=timepoint.strftime('%Y-%m-%d'),
                end=(timepoint + timedelta(days=1)).strftime('%Y-%m-%d'),
                interval='1h'
            )
            if not hist.empty:
                hist['Delta'] = abs(hist.index - timepoint)
                return round(hist.sort_values('Delta').iloc[0]['Close'], 2)
        except:
            return None
        return None

    def analyze_article(self, article):
        base_time = article["Datetime"]
        base_price = self.get_price_near(base_time)
        if base_price is None:
            return None

        prices = {}
        pct_changes = {}
        for label, delta in INTERVALS.items():
            time_check = base_time - delta
            past_price = self.get_price_near(time_check)
            prices[label] = past_price if past_price else 'N/A'
            pct_changes[label + " % Change"] = (
                round(((base_price - past_price) / past_price) * 100, 2) if isinstance(past_price, (int, float)) else 'N/A'
            )

        return {
            **article,
            "News Age (Days)": (now - base_time).days,
            "News Time": base_time,
            "Current Price": base_price,
            **prices,
            **pct_changes,
            "Headline Sentiment": round(TextBlob(article["Title"]).sentiment.polarity, 2),
            "Summary Sentiment": round(vader.polarity_scores(article["Summary"])["compound"], 2)
        }

def run_full_analysis():
    full_dataset = []
    for ticker in TICKERS:
        analyzer = NewsAnalyzer(ticker)
        articles = analyzer.fetch_articles()
        for article in articles:
            result = analyzer.analyze_article(article)
            if result:
                full_dataset.append(result)

    df = pd.DataFrame(full_dataset)
    df.to_csv("refactored_yahoo_sentiment_7daytrack.csv", index=False)
    print("Saved to refactored_yahoo_sentiment_7daytrack.csv")
    return df

# Run it
df_result = run_full_analysis()
print(df_result.head())

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
ERROR:yfinance:$CRM: possibly delisted; no price data found  (1h 2025-06-15 -> 2025-06-16)
ERROR:yfinance:$COP: possibly delisted; no price data found  (1h 2025-06-14 -> 2025-06-15)
ERROR:yfinance:$PSX: possibly delisted; no price data found  (1h 2025-06-15 -> 2025-06-16)
ERROR:yfinance:$PSX: possibly delisted; no price data found  (1h 2025-06-14 -> 2025-06-15)
ERROR:yfinance:$BMY: possibly delisted; no price data found  (1h 2025-06-15 -> 2025-06-16)
ERROR:yfinance:$HD: possibly delisted; no price data found  (1h 2025-06-15 -> 2025-06-16)
ERROR:yfinance:$HD: possibly delisted; no price data found  (1h 2025-06-15 -> 2025-06-16)


Saved to refactored_yahoo_sentiment_7daytrack.csv
  Ticker                                              Title  \
0   AAPL  Apple Just Erased a $300 Million Problem--But ...   
1   AAPL  Texas Instruments to invest more than $60 bill...   
2   AAPL  Credit Card Annual Fees Are Going Up. What It ...   
3   AAPL  The Trump phone probably won’t be built in Ame...   
4   AAPL  Apple Just Snatched Back the Top Spot in China...   

                                             Summary  \
0  A surprise courtroom reversal gives Apple brea...   
1  Texas Instruments will invest more than $60 bi...   
2  Credit card issuers are trying to attract more...   
3  President Trump's family is launching a smartp...   
4  Data showed that iPhones claimed the top spot ...   

                                                 URL  \
0  https://finance.yahoo.com/news/apple-just-eras...   
1  https://www.cnn.com/2025/06/18/tech/texas-inst...   
2  https://finance.yahoo.com/m/60052a48-3fff-3a76...   
3  https:/

In [8]:
from google.colab import files
files.download("refactored_yahoo_sentiment_7daytrack.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>