In [16]:
import logging
import requests 
from datetime import timedelta, datetime
import time
import pandas as pd
import yfinance as yf 
from bs4 import BeautifulSoup
import requests
import re 
import constants

stock = 'NFLX'
PATH = 'stocks/tickers'

# lookup stock in file 
file = f'{PATH}/{stock}.csv'
news = pd.read_csv(file, parse_dates=['Date'])

pd.set_option('display.max_rows', None)  # Display all rows
pd.set_option('display.max_columns', None)  # Display all columns




In [17]:


# calculate intraday return in yfinance
def get_intraday_return(stock, oldest_date): 
    data = yf.download(stock, period = 'max', start = oldest_date - timedelta(weeks = 1))       # news from [oldest_date, today]
    data.columns = data.columns.droplevel(1)                             # drop the multi-level column index
    data.columns.name = None 

    data['Prev Close'] = data['Close'].shift(1)     # get previous close by shifting it down by 1
    data['% Change'] = ((data['Close'] - data['Prev Close']) / data['Prev Close'])  * 100
    data['% Change'] = data['% Change'].round(2)
    data.sort_values('% Change', ascending = False, inplace = True)
    return data

swings = get_intraday_return(stock, news['Date'].min())               # largest % price changes 

def lookup_date(timestamp):
    '''Looks up the price movement on a given date and populates news df'''
    try:
        return swings.loc[str(timestamp.date()), '% Change']
    except KeyError:
        return None     # print(f'No price movement data for {timestamp.date()}')

news['% Change'] = news['Date'].apply(lookup_date)         

news


[*********************100%***********************]  1 of 1 completed


Unnamed: 0,Date,Title,Link,Source,% Change
0,2024-11-22 13:12:00,Netflix set for biggest weekly gain since Janu...,https://finance.yahoo.com/news/netflix-set-big...,Reuters,0.03
1,2024-11-16 19:05:00,Netflix says 60 million households worldwide t...,https://finance.yahoo.com/news/netflix-says-50...,Reuters,
2,2024-11-15 21:28:00,Netflix back up for most users in US after out...,https://finance.yahoo.com/news/netflix-down-th...,Reuters,-1.59
3,2024-11-14 21:58:00,Netflix hopes for live sports knockout with Pa...,https://finance.yahoo.com/news/netflix-hopes-l...,Reuters,0.82
4,2024-11-14 06:00:00,Squid Game Returns in Test of Netflix Global M...,https://finance.yahoo.com/news/squid-game-retu...,Bloomberg,0.82
5,2024-11-12 09:02:00,Netflix's ad-supported tier hits 70 million users,https://finance.yahoo.com/news/netflixs-ad-sup...,Reuters,1.75
6,2024-11-26 15:57:00,Netflix defeats shareholder lawsuit over growt...,https://finance.yahoo.com/news/netflix-defeats...,Reuters,0.81
7,2024-12-03 17:32:00,Netflix Japan subscribers top 10 million with ...,https://finance.yahoo.com/news/netflix-japan-s...,Reuters,0.49
8,2024-12-07 12:52:00,South Koreas Yoon Apologizes Hours Before Impe...,https://finance.yahoo.com/m/3261e8b7-a733-32b5...,Bloomberg,
9,2024-12-07 20:46:00,South Koreas Yoon Survives Impeachment After M...,https://finance.yahoo.com/m/7d45d9ef-4174-3e46...,Bloomberg,


In [18]:
def get_google_results(query):      # using google Custom Search JSON API
    params = {
        'q': f'"{query}"',
        'key': constants.GOOGLE_API_KEY, 
        'cx': constants.SEARCH_ENGINE_ID, 
    }

    try: 
        response = requests.get('https://www.googleapis.com/customsearch/v1', params = params).json() 
        if 'totalResults' not in response['queries']['request'][0]: return 0 
        num_results = int(response['queries']['request'][0]['totalResults'])
        print(query, num_results)
        return num_results
    
    except Exception as e:
        print(response)
        raise e

    
'''
Mines the top N days with the highest price change for news catalysts
'''
n = 1           # set this to a low number because of google api limits :)
volatility = news['% Change'].drop_duplicates().dropna().sort_values(key=abs, ascending=False).head(n)
news.loc[news['% Change'].isin(volatility), 'hits'] = news.loc[news['% Change'].isin(volatility), 'Title'].apply(get_google_results)



Dutch watchdog fines Netflix for not properly informing customers about data use 1920


In [19]:
news[news['hits'] == news['hits'].max()]

Unnamed: 0,Date,Title,Link,Source,% Change,hits
12,2024-12-18 04:33:00,Dutch watchdog fines Netflix for not properly ...,https://finance.yahoo.com/news/dutch-watchdog-...,Reuters,-3.22,1920.0


In [None]:
# write back enriched news data back to csv 
OUTPUT_PATH = 'stocks/tickers_e'
news.to_csv(f'{OUTPUT_PATH}/{stock}.csv', index = False)

