### Yahoo finance data collection

In [2]:
import yfinance as yf
import pandas as pd
import os

COMPANIES = ['MSFT', 'AAPL', 'NVDA', 'AMZN', 'GOOG', 'META', 'LLY', 'TSM', 'AVGO', 'V', 'NVO', 'TSLA', 'WMT', 'XOM', 'MA', 'UNH', 'ASML', 'JNJ', 'PG']

# Define 1 month of data due to limitations
start_date = "2023-05-01"
end_date = "2024-05-10"

data_folder = 'Data'  # Folder to store CSV files

# Ensure the directory exists or create it
if not os.path.exists(data_folder):
    os.makedirs(data_folder)

for ticker_symbol in COMPANIES:
    try:
        ticker = yf.Ticker(ticker_symbol)
        hist_data = ticker.history(start=start_date, end=end_date)
        csv_filename = os.path.join(data_folder, f"{ticker_symbol}_historical_data.csv")
        hist_data.to_csv(csv_filename)
        print(f"Saved data for {ticker_symbol} to {csv_filename}")
    except Exception as e:
        print(f"Failed to get or save data for {ticker_symbol}: {str(e)}")


Saved data for MSFT to Data\MSFT_historical_data.csv
Saved data for AAPL to Data\AAPL_historical_data.csv
Saved data for NVDA to Data\NVDA_historical_data.csv
Saved data for AMZN to Data\AMZN_historical_data.csv
Saved data for GOOG to Data\GOOG_historical_data.csv
Saved data for META to Data\META_historical_data.csv
Saved data for LLY to Data\LLY_historical_data.csv
Saved data for TSM to Data\TSM_historical_data.csv
Saved data for AVGO to Data\AVGO_historical_data.csv
Saved data for V to Data\V_historical_data.csv
Saved data for NVO to Data\NVO_historical_data.csv
Saved data for TSLA to Data\TSLA_historical_data.csv
Saved data for WMT to Data\WMT_historical_data.csv
Saved data for XOM to Data\XOM_historical_data.csv
Saved data for MA to Data\MA_historical_data.csv
Saved data for UNH to Data\UNH_historical_data.csv
Saved data for ASML to Data\ASML_historical_data.csv
Saved data for JNJ to Data\JNJ_historical_data.csv
Saved data for PG to Data\PG_historical_data.csv


### FInance news data collection

In [1]:
import json
import os
import requests
from datetime import datetime, timedelta

API_KEY = ''
COMPANIES = ['MSFT', 'AAPL', 'NVDA', 'AMZN', 'GOOG', 'META', 'LLY', 'TSM', 'AVGO', 'V', 'NVO', 'TSLA', 'WMT', 'XOM', 'MA', 'UNH', 'ASML', 'JNJ', 'PG']

# Define the start and end dates
start_date = datetime(2023, 8, 31)
end_date = datetime(2023, 9, 30)

# Create the "Data" folder if it doesn't exist
if not os.path.exists('Data'):
    os.makedirs('Data')

for STOCK_SYMBOL in COMPANIES:
    # Define the API endpoint
    API_ENDPOINT = 'https://www.alphavantage.co/query'
    
    # Set the parameters for the request
    params = {
        'function': 'NEWS_SENTIMENT',
        'tickers': STOCK_SYMBOL,
        'time_from': start_date.strftime('%Y%m%dT%H%M'),
        'time_to': end_date.strftime('%Y%m%dT%H%M'),
        'limit': 1000,
        'apikey': API_KEY
    }

    # Send the request to the Alpha Vantage API
    response = requests.get(API_ENDPOINT, params=params)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Parse the response JSON
        news_data = response.json()

        # Define the JSON file name with the date included
        file_date = start_date.strftime('%Y%m%d') + '_' + end_date.strftime('%Y%m%d')
        json_file_name = f"Data/{STOCK_SYMBOL}_news_data_{file_date}.json"

        # Save the data to a JSON file
        with open(json_file_name, 'w', encoding='utf-8') as json_file:
            json.dump(news_data, json_file, ensure_ascii=False, indent=4)
    else:
        print(f"Failed to retrieve news for {STOCK_SYMBOL}: ", response.status_code)
