In [2]:
import pandas as pd
import yfinance as yf
from datetime import datetime, timedelta

In [3]:
# Function to collect historical stock data
def get_stock_data(tickers, start_date, end_date):
    """
    Fetch historical stock data for multiple tickers from Yahoo Finance.

    Args:
        tickers (list): List of stock ticker symbols (e.g., ['AAPL', 'MSFT']).
        start_date (str): Start date in 'YYYY-MM-DD' format.
        end_date (str): End date in 'YYYY-MM-DD' format.

    Returns:
        pd.DataFrame: Stock price data for all tickers.
    """
    stock_data = yf.download(tickers, start=start_date, end=end_date, group_by='ticker')
    all_data = []
    for ticker in tickers:
        if ticker in stock_data.columns.levels[0]:
            df = stock_data[ticker].copy()
            df.reset_index(inplace=True)
            df['Ticker'] = ticker
            all_data.append(df)
    return pd.concat(all_data, ignore_index=True)

In [4]:
# Generating realistic news headlines for multiple stocks
def get_news_data(tickers, start_date, end_date):
    """
    Generate news headlines for multiple stocks.

    Args:
        tickers (list): List of stock ticker symbols.
        start_date (str): Start date in 'YYYY-MM-DD' format.
        end_date (str): End date in 'YYYY-MM-DD' format.

    Returns:
        pd.DataFrame: News data with dates, tickers, and headlines.
    """
    dates = pd.date_range(start=start_date, end=end_date, freq='W')
    all_news = []
    headlines = [
        "launches new product",
        "market declines",
        "Strong earnings report boosts stock",
        "Regulatory concerns affect sector",
        "Market rallies on positive sentiment"
    ]
    
    for ticker in tickers:
        news = pd.DataFrame({
            'Date': dates,
            'Ticker': ticker,
            'Headline': [f"{ticker} {headlines[i % len(headlines)]}" for i in range(len(dates))]
        })
        all_news.append(news)

    return pd.concat(all_news, ignore_index=True)

In [5]:
def align_data(stock_data, news_data):
    """
    Align stock price data with news data by merging based on the closest dates.

    Args:
        stock_data (pd.DataFrame): Historical stock price data.
        news_data (pd.DataFrame): News headlines with dates.

    Returns:
        pd.DataFrame: Merged dataset of stock prices and news headlines.
    """
    # Ensure Date columns are datetime
    news_data['Date'] = pd.to_datetime(news_data['Date'])
    stock_data['Date'] = pd.to_datetime(stock_data['Date'])

    # Validate presence of Ticker column
    if 'Ticker' not in stock_data.columns:
        raise KeyError("'Ticker' column is missing from stock_data. Ensure the data is properly formatted.")

    # Sort both DataFrames by Ticker and Date
    news_data = news_data.sort_values(['Ticker', 'Date']).reset_index(drop=True)
    stock_data = stock_data.sort_values(['Ticker', 'Date']).reset_index(drop=True)

    # Debugging: Print sorted DataFrames
    print("News Data (Sorted):")
    print(news_data.head())
    print("\nStock Data (Sorted):")
    print(stock_data.head())

    # Perform merge_asof
    merged_data = pd.merge_asof(
        news_data,
        stock_data,
        on='Date',
        by='Ticker',
        direction='backward'
    )
    return merged_data

In [7]:
if __name__ == "__main__":
    tickers = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA']  # Example tickers
    start_date = '2015-01-01'
    end_date = '2023-01-01'

    # Fetch stock data for multiple tickers
    stock_data = get_stock_data(tickers, start_date, end_date)

    # Generate news data for multiple tickers
    news_data = get_news_data(tickers, start_date, end_date)

    # Align the datasets
    try:
        merged_data = align_data(stock_data, news_data)
        print(merged_data.head())
        merged_data.to_csv("stock_news_data_multi.csv", index=False)
    except Exception as e:
        print(f"Error: {e}")

[*********************100%***********************]  5 of 5 completed

News Data (Sorted):
        Date Ticker                                   Headline
0 2015-01-04   AAPL                  AAPL launches new product
1 2015-01-11   AAPL                       AAPL market declines
2 2015-01-18   AAPL   AAPL Strong earnings report boosts stock
3 2015-01-25   AAPL     AAPL Regulatory concerns affect sector
4 2015-02-01   AAPL  AAPL Market rallies on positive sentiment

Stock Data (Sorted):
Price       Date       Open       High        Low      Close     Volume Ticker
0     2015-01-02  24.805914  24.817049  23.906229  24.347164  212818400   AAPL
1     2015-01-05  24.115569  24.195739  23.474210  23.661272  257142000   AAPL
2     2015-01-06  23.725858  23.924056  23.300511  23.663504  263188400   AAPL
3     2015-01-07  23.872831  24.095525  23.761484  23.995314  160423600   AAPL
4     2015-01-08  24.324895  24.975162  24.206866  24.917261  237458000   AAPL
Error: left keys must be sorted



