In [1]:
import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import time
from datetime import datetime

In [2]:
def scrape_finviz_news(tickers):
    headers = {'User-Agent': 'Mozilla/5.0'}
    all_data = []

    for ticker in tickers:
        print(f"Scraping news for {ticker}...")
        url = f'https://finviz.com/quote.ashx?t={ticker}'
        r = requests.get(url, headers=headers)
        soup = BeautifulSoup(r.text, 'html.parser')
        news_table = soup.find('table', class_='fullview-news-outer')

        if news_table:
            for row in news_table.findAll('tr'):
                cols = row.findAll('td')
                if len(cols) == 2:
                    date_time = cols[0].text.strip()
                    headline = cols[1].text.strip()
                    all_data.append([ticker, date_time, headline])
        time.sleep(1)  # Sleep to avoid getting blocked

    return pd.DataFrame(all_data, columns=['Ticker', 'DateTime', 'Headline'])


In [3]:
tickers=['AAPL','GOOGL','MSFT','TSLA']
df_news = scrape_finviz_news(tickers)


Scraping news for AAPL...
Scraping news for GOOGL...
Scraping news for MSFT...
Scraping news for TSLA...


In [4]:
df_news.head()

Unnamed: 0,Ticker,DateTime,Headline
0,AAPL,Today 09:36AM,S&P 500 Rally Faces $11 Trillion Gauntlet of B...
1,AAPL,09:33AM,"Stock market today: Dow, S&P 500, Nasdaq nudge..."
2,AAPL,09:07AM,Apple Isn't Leaving China. Its Footprint Is Ge...
3,AAPL,09:05AM,"US-EU trade deal, Big Tech earnings, Fed meeti..."
4,AAPL,09:00AM,Avalon GloboCare Enters into Distribution Agre...


In [5]:
df_news.shape

(400, 3)

In [6]:
df_news['Headline'] = df_news['Headline'].str.replace(r'\\n', ' ', regex=True).str.strip()

In [7]:
df_news.head()

Unnamed: 0,Ticker,DateTime,Headline
0,AAPL,Today 09:36AM,S&P 500 Rally Faces $11 Trillion Gauntlet of B...
1,AAPL,09:33AM,"Stock market today: Dow, S&P 500, Nasdaq nudge..."
2,AAPL,09:07AM,Apple Isn't Leaving China. Its Footprint Is Ge...
3,AAPL,09:05AM,"US-EU trade deal, Big Tech earnings, Fed meeti..."
4,AAPL,09:00AM,Avalon GloboCare Enters into Distribution Agre...


In [8]:
def parse_finviz_datetime(datetime_str):
    today = datetime.now().date()
    
    # Check if the string contains 'Today'
    if 'Today' in datetime_str:
        time_part = datetime_str.replace('Today', '').strip()
        # Combine today's date with the parsed time
        dt = datetime.strptime(time_part, '%I:%M%p').time()
        return datetime.combine(today, dt)
    else:
        # If it's just a time, we assume it's also for today's date
        try:
            dt = datetime.strptime(datetime_str, '%I:%M%p').time()
            return datetime.combine(today, dt)
        except ValueError:
            # Return None or NaT if format is unexpected
            return pd.NaT 

# Apply the function to the 'DateTime' column
df_news['DateTime'] = df_news['DateTime'].apply(parse_finviz_datetime)

# We can also drop any rows where the date couldn't be parsed
df_news.dropna(subset=['DateTime'], inplace=True)

print("--- Cleaned DataFrame ---")
print(df_news.head())
print("\n--- Data Types ---")
print(df_news.info())

--- Cleaned DataFrame ---
  Ticker            DateTime  \
0   AAPL 2025-07-28 09:36:00   
1   AAPL 2025-07-28 09:33:00   
2   AAPL 2025-07-28 09:07:00   
3   AAPL 2025-07-28 09:05:00   
4   AAPL 2025-07-28 09:00:00   

                                            Headline  
0  S&P 500 Rally Faces $11 Trillion Gauntlet of B...  
1  Stock market today: Dow, S&P 500, Nasdaq nudge...  
2  Apple Isn't Leaving China. Its Footprint Is Ge...  
3  US-EU trade deal, Big Tech earnings, Fed meeti...  
4  Avalon GloboCare Enters into Distribution Agre...  

--- Data Types ---
<class 'pandas.core.frame.DataFrame'>
Index: 387 entries, 0 to 399
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   Ticker    387 non-null    object        
 1   DateTime  387 non-null    datetime64[ns]
 2   Headline  387 non-null    object        
dtypes: datetime64[ns](1), object(2)
memory usage: 12.1+ KB
None


In [9]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer


In [10]:
analyzer = SentimentIntensityAnalyzer()

In [11]:
df_news['compound'] = df_news['Headline'].apply(lambda headline: analyzer.polarity_scores(headline)['compound'])

In [12]:
df_news.head()

Unnamed: 0,Ticker,DateTime,Headline,compound
0,AAPL,2025-07-28 09:36:00,S&P 500 Rally Faces $11 Trillion Gauntlet of B...,0.0
1,AAPL,2025-07-28 09:33:00,"Stock market today: Dow, S&P 500, Nasdaq nudge...",0.3182
2,AAPL,2025-07-28 09:07:00,Apple Isn't Leaving China. Its Footprint Is Ge...,0.0
3,AAPL,2025-07-28 09:05:00,"US-EU trade deal, Big Tech earnings, Fed meeti...",0.0
4,AAPL,2025-07-28 09:00:00,Avalon GloboCare Enters into Distribution Agre...,0.4939


In [13]:
def classify_sentiment(compound_score):
    if compound_score >= 0.05:
        return 'Positive'
    elif compound_score <= -0.05:
        return 'Negative'
    else:
        return 'Neutral'

In [14]:
df_news['Sentiment'] = df_news['compound'].apply(classify_sentiment)

In [15]:
df_news.head()

Unnamed: 0,Ticker,DateTime,Headline,compound,Sentiment
0,AAPL,2025-07-28 09:36:00,S&P 500 Rally Faces $11 Trillion Gauntlet of B...,0.0,Neutral
1,AAPL,2025-07-28 09:33:00,"Stock market today: Dow, S&P 500, Nasdaq nudge...",0.3182,Positive
2,AAPL,2025-07-28 09:07:00,Apple Isn't Leaving China. Its Footprint Is Ge...,0.0,Neutral
3,AAPL,2025-07-28 09:05:00,"US-EU trade deal, Big Tech earnings, Fed meeti...",0.0,Neutral
4,AAPL,2025-07-28 09:00:00,Avalon GloboCare Enters into Distribution Agre...,0.4939,Positive


In [18]:
# --- See the distribution of sentiments ---
print("\n--- Sentiment Distribution ---")
sentiment_counts = df_news['Sentiment'].value_counts()
print(sentiment_counts)

# --- See the most positive headlines ---
print("\n--- Top 5 Most Positive Headlines ---")
print(df_news.nlargest(5, 'compound')[['Headline', 'compound', 'Sentiment']])

# --- See the most negative headlines ---
print("\n--- Top 5 Most Negative Headlines ---")
print(df_news.nsmallest(5, 'compound')[['Headline', 'compound', 'Sentiment']])


--- Sentiment Distribution ---
Sentiment
Neutral     170
Positive    146
Negative     71
Name: count, dtype: int64

--- Top 5 Most Positive Headlines ---
                                              Headline  compound Sentiment
147  Trumps big beautiful bill to boost big techs c...    0.8126  Positive
132  What Are the 3 Best Bargain Artificial Intelli...    0.7351  Positive
207  NBIS Eyes Positive EBITDA in H2 2025: What Cou...    0.7351  Positive
67   Stock market today: S&P 500 clears 5th-straigh...    0.6597  Positive
68   S&P 500 clears 5th-straight record this week, ...    0.6597  Positive

--- Top 5 Most Negative Headlines ---
                                              Headline  compound Sentiment
334  Bad News Keeps Rolling in for Tesla\n\n\n(Motl...   -0.7506  Negative
83   Apple Faces Existential Threat If It Fails To ...   -0.7351  Negative
178  Apple Faces Existential Threat If It Fails To ...   -0.7351  Negative
269  Microsoft Corp (MSFT) Patches SharePoint Vulne...  