In [29]:
import pandas as pd
import requests
import json
from datetime import datetime
import time

In [49]:
def get_stock_news_headlines(ticker, news_count=800, sleep=2):
    """
    Function that scrapes stock news headlines from NASDAQ. -> https://www.nasdaq.com/market-activity/stocks/aapl/news-headlines
    Make sure news_count is not greater than the number of available news.
    
    
    ticker: str
    news_count: int
    sleep: int
    
    returns: list[dict]
    """
    _offset = 0
    _limit = 8
    _pages = int(news_count / limit)
    _headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/107.0'}
    
    news = []
    
    for i in range(_pages):
        api_call = f"https://api.nasdaq.com/api/news/topic/articlebysymbol?q={ticker.lower()}|stocks&offset={_offset}&limit={_limit}&fallback=false"
        _offset += _limit
        
        r = requests.get(api_call, headers=_headers).json()
        
        for row in r["data"]["rows"]:
            news.append(
                {
                    "title": row["title"],
                    "uploaded": row["ago"],
                    "url": row["url"],
                    "timestamp": datetime.now().strftime("%d.%m.%Y;%H:%M:%S"),
                }
            )
            
        print(f"No. of scraped news: {len(news)}")
        print("______________________________________________")
        print(f"API call -> {api_call}...")
        time.sleep(sleep)
        
    return news


def export_news_data(news, out_file="output.csv"):
    """
    Function that exports news data returned from 'get_stock_news'.
    
    news: list[dict]
    out_file: str
    
    returns: pd.DataFrame
    """
    df = pd.DataFrame(news)
    _file_extension = out_file.split(".")[-1]
    
    if _file_extension == "xlsx":
        df.to_excel(out_file, sheet_name="news")
    else:
        df.to_csv(out_file)
   
    return pd.DataFrame(news)

In [None]:
news = get_stock_news("aapl", news_count=2000)

In [None]:
export_news_data(news)