This notebook enriches `stock` news sources with price movement (price change during date of publish), and search hits. (how 'popular' a news article was)

In [1]:
import requests 
from datetime import timedelta, datetime
import pandas as pd
import yfinance as yf 
import requests
from investor_watch.constants import TICKER_PATH, GOOGLE_API_KEY, SEARCH_ENGINE_ID

# pd.set_option('display.max_rows', None)  # Display all rows

# pd.set_option('display.max_columns', None)  # Display all columns
stock = 'AAPL'
append_mode = False

file = f'{TICKER_PATH}/{stock}.csv'
news_df = pd.read_csv(file, parse_dates=['Date'])
snapshot = news_df.copy()

news_df.head(5)

c:\users\patrick4\documents\projects\investor_watch c:\users\patrick4\documents\projects\investor_watch\stocks\tickers c:\users\patrick4\documents\projects\investor_watch\stocks\stocks.csv


Unnamed: 0,Date,Title,Link,Source,% Change,hits
0,2024-11-20 19:20:00,Indonesia to Assess $100 Million Apple Bid to ...,https://finance.yahoo.com/news/indonesia-asses...,Bloomberg,0.32,
1,2024-11-21 23:00:00,Western Australia on Track for Unexpectedly La...,https://finance.yahoo.com/news/western-austral...,Bloomberg,-0.21,
2,2024-11-21 16:00:00,Apple Readies More Conversational Siri in Bid ...,https://finance.yahoo.com/news/apple-readies-m...,Bloomberg,-0.21,
3,2024-11-21 04:49:00,Apple Rival Oppo Bets on Markets Beyond China ...,https://finance.yahoo.com/news/apple-rival-opp...,Bloomberg,-0.21,
4,2024-11-22 08:30:00,"Apple, Google Risk UK Probe Over Mobile Browse...",https://finance.yahoo.com/news/apple-google-ri...,Bloomberg,0.59,


Populate `news_df` with price changes

In [72]:
# calculate intraday return in yfinance
def get_stock_quotes(stock, oldest_date): 
    data = yf.download(stock, period = 'max', start = oldest_date - timedelta(weeks = 1))       # news from [oldest_date, today]
    data.columns = data.columns.droplevel(1)                             # drop the multi-level column index
    data.columns.name = None 

    data['Prev Close'] = data['Close'].shift(1)     # get previous close by shifting it down by 1
    data['% Change'] = ((data['Close'] - data['Prev Close']) / data['Prev Close'])  * 100
    data['% Change'] = data['% Change'].round(2)
    data.sort_values('% Change', ascending = False, inplace = True)
    return data

quotes = get_stock_quotes(stock, news_df['Date'].min())               # largest % price changes 

def lookup_date(timestamp):
    '''Looks up the price movement on a given date and populates news df'''
    try:
        return quotes.loc[str(timestamp.date()), '% Change']
    except KeyError:
        return None     # print(f'No price movement data for {timestamp.date()}')

news_df['% Change'] = news_df['Date'].apply(lookup_date)         

news_df.head(5)


[*********************100%***********************]  1 of 1 completed


Unnamed: 0,Date,Title,Link,Source,% Change,hits
0,2024-11-20 19:20:00,Indonesia to Assess $100 Million Apple Bid to ...,https://finance.yahoo.com/news/indonesia-asses...,Bloomberg,0.32,
1,2024-11-21 23:00:00,Western Australia on Track for Unexpectedly La...,https://finance.yahoo.com/news/western-austral...,Bloomberg,-0.21,
2,2024-11-21 16:00:00,Apple Readies More Conversational Siri in Bid ...,https://finance.yahoo.com/news/apple-readies-m...,Bloomberg,-0.21,
3,2024-11-21 04:49:00,Apple Rival Oppo Bets on Markets Beyond China ...,https://finance.yahoo.com/news/apple-rival-opp...,Bloomberg,-0.21,
4,2024-11-22 08:30:00,"Apple, Google Risk UK Probe Over Mobile Browse...",https://finance.yahoo.com/news/apple-google-ri...,Bloomberg,0.59,


Populate `news_df` with search hits

In [None]:
def get_google_results(query):      # using google Custom Search JSON API
    params = {
        'q': f'"{query}"',
        'key': GOOGLE_API_KEY, 
        'cx': SEARCH_ENGINE_ID,         
    }

    try: 
        response = requests.get('https://www.googleapis.com/customsearch/v1', params = params).json() 
        if 'totalResults' not in response['queries']['request'][0]: return 0 
        num_results = int(response['queries']['request'][0]['totalResults'])
        print(query, num_results)
        return num_results
    
    except Exception as e:
        print(response)
        raise e

    
'''
Mines the top N days with the highest price change for news catalysts
'''
n = 3          # set this to a low number because of google api limits :)
largest_price_changes = news_df['% Change'].sort_values(key=abs, ascending=False).drop_duplicates().head(n)
largest_price_changes
news_df.loc[news_df['% Change'].isin(largest_price_changes), 'hits'] = news_df.loc[news_df['% Change'].isin(largest_price_changes), 'Title'].apply(get_google_results)

news_df[news_df['hits'] > 0]