This notebook mines for any recent `stock` news sources, and saves them to a file. This is run daily.

In [56]:
import os 
from finvizfinance.news import News
from finvizfinance.screener.overview import Overview
from finvizfinance.quote import finvizfinance
import pandas as pd 
import yfinance as yf
from datetime import timedelta, datetime
from config import STOCK_PATH, STOCK_DIR
import time 

stock_list = pd.read_csv('../stocks/stocks.csv')
stock = 'AAPL'

stock_list


Unnamed: 0,Ticker,Company,Sector,Industry,Country,Price,Market Cap (billions)
0,A,Agilent Technologies Inc,Healthcare,Diagnostics & Research,USA,147.36,42.09
1,AA,Alcoa Corp,Basic Materials,Aluminum,USA,39.53,10.21
2,AAL,American Airlines Group Inc,Industrials,Airlines,USA,18.27,12.01
3,AAON,AAON Inc,Industrials,Building Products & Equipment,USA,130.18,10.58
4,AAPL,Apple Inc,Technology,Consumer Electronics,USA,229.98,3458.41
...,...,...,...,...,...,...,...
672,ZBRA,Zebra Technologies Corp,Technology,Communication Equipment,USA,405.71,20.93
673,ZG,Zillow Group Inc,Communication Services,Internet Content & Information,USA,74.83,18.02
674,ZM,Zoom Communications Inc,Technology,Software - Application,USA,78.07,23.93
675,ZS,Zscaler Inc,Technology,Software - Infrastructure,USA,187.58,28.78


**Update stock list with newly added stocks in the SP500.** Since sp500 is self-cleansing, companies get aquired / delisted frequently.

In [4]:
# Get list of stocks that meet market cap criteria from finviz. 

foverview = Overview()
foverview.set_filter(filters_dict={'Market Cap.': '+Large (over $10bln)', 'Country': 'USA'})
df = foverview.screener_view() 
df['Market Cap (billions)'] = df['Market Cap'] / 1e9
df.drop(columns = ['P/E', 'Change', 'Market Cap', 'Volume'], inplace = True)

stock_list = pd.concat([df,stock_list]).drop_duplicates(subset=['Ticker'])        # dropped duplicates will be the older ones (in stock_list)
stock_list.to_csv('../stocks/stocks.csv', index = False) 

stock_list

[Info] loading page [#######-----------------------] 8/34 

KeyboardInterrupt: 

Mine news for stocks and gather price changes

In [58]:

def get_news(ticker):       # sinks new news sources to csv files
    try:
        time.sleep(0.2)
        whitelist = ['Bloomberg', 'Reuters']
        news_df = finvizfinance(ticker).ticker_news()
        news_df = news_df[news_df['Source'].isin(whitelist)]              # whitelist

        file = f"{STOCK_PATH}/{ticker}.csv"
        last_modified = datetime.fromtimestamp(os.path.getmtime(file))
        news_df = news_df[(news_df['Date'] >= last_modified)].sort_values('Date')       # filter new news inbetween modified date and today

        print(f'Found {len(news_df)} new sources of ${ticker} ...')   
        return news_df

    except Exception as e:
        print(e)

df = get_news(stock)
df


Found 3 new sources of $AAPL ...


Unnamed: 0,Date,Title,Link,Source
24,2025-01-18 23:21:00,US TikTok Users Lose Access to App Ahead of Su...,https://finance.yahoo.com/m/4c39988c-fa5a-3189...,Bloomberg
17,2025-01-19 07:19:00,"TikTok Goes Dark for US Users, Disappears From...",https://finance.yahoo.com/m/4c39988c-fa5a-3189...,Bloomberg
11,2025-01-19 10:36:00,Trump Promises Executive Order Aiding TikTok A...,https://finance.yahoo.com/m/4c39988c-fa5a-3189...,Bloomberg


In [59]:
def get_price_changes(stock, oldest_date):      
    df = yf.download(stock, period = 'max', start = oldest_date - timedelta(weeks = 1))       # news from [oldest_date, today]
    df.columns = df.columns.droplevel(1)                             # drop the multi-level column index
    df.columns.name = None 

    df['Prev Close'] = df['Close'].shift(1)     # get previous close by shifting it down by 1
    df['% Change'] = ((df['Close'] - df['Prev Close']) / df['Prev Close'])  * 100
    df['% Change'] = df['% Change'].round(2)
    df.sort_values('% Change', ascending = False, inplace = True)
    return df

price_changes = get_price_changes(stock, df['Date'].min())
lookup_date = lambda timestamp : price_changes.loc[str(timestamp.date()), '% Change'] if str(timestamp.date()) in price_changes.index else None
df['% Change'] = df['Date'].apply(lookup_date)         

df


[*********************100%***********************]  1 of 1 completed


Unnamed: 0,Date,Title,Link,Source,% Change
24,2025-01-18 23:21:00,US TikTok Users Lose Access to App Ahead of Su...,https://finance.yahoo.com/m/4c39988c-fa5a-3189...,Bloomberg,
17,2025-01-19 07:19:00,"TikTok Goes Dark for US Users, Disappears From...",https://finance.yahoo.com/m/4c39988c-fa5a-3189...,Bloomberg,
11,2025-01-19 10:36:00,Trump Promises Executive Order Aiding TikTok A...,https://finance.yahoo.com/m/4c39988c-fa5a-3189...,Bloomberg,


In [None]:
# news_df.to_csv(file, mode='a', header=False, index=False)         # append enriched news back to local file storage