In [11]:
#!pip install pandas
#!pip install finnhub-python
#!pip install yfinance

In [53]:
import os
import finnhub
import pandas as pd
import time

# Initialize Finnhub client with your API key
finnhub_client = finnhub.Client(api_key="css2cf9r01qj3u0ovmegcss2cf9r01qj3u0ovmf0")

# Define the base directory path where all data files will be saved
path = "/Users/maxwe/portfolio_optimizer/news_data"

# Define function to download and save news data for multiple companies individually
def download_news_for_multiple_companies(symbols, from_date, to_date, output_directory=path):
    # Ensure the output directory exists
    os.makedirs(output_directory, exist_ok=True)
    
    for symbol in symbols:
        try:
            # Fetch news data for each symbol
            news_data = finnhub_client.company_news(symbol, _from=from_date, to=to_date)
            
            if news_data:
                # Convert news data to DataFrame
                news_df = pd.DataFrame(news_data)
                
                # Set the file path for each individual company's data
                output_file = os.path.join(output_directory, f"{symbol}_news.csv")
                
                # Save each company's data to its own CSV file
                news_df.to_csv(output_file, index=False)
                print(f"Data for {symbol} saved to {output_file}")
            else:
                print(f"No data available for {symbol} in the specified date range.")
            
            # Optional: Pause to avoid hitting API rate limits
            time.sleep(1)  # Adjust sleep time based on API rate limits
        except Exception as e:
            print(f"An error occurred for {symbol}: {e}")

# Example usage
company_symbols = ["AAPL", "MSFT", "GOOGL", "AMZN", "TSLA"]
from_date = "2023-12-01"
to_date = "2024-11-17"
download_news_for_multiple_companies(company_symbols, from_date, to_date)

Data for AAPL saved to /Users/maxwe/portfolio_optimizer/news_data/AAPL_news.csv
Data for MSFT saved to /Users/maxwe/portfolio_optimizer/news_data/MSFT_news.csv
Data for GOOGL saved to /Users/maxwe/portfolio_optimizer/news_data/GOOGL_news.csv
Data for AMZN saved to /Users/maxwe/portfolio_optimizer/news_data/AMZN_news.csv
Data for TSLA saved to /Users/maxwe/portfolio_optimizer/news_data/TSLA_news.csv


In [57]:
import pandas as pd

# Define the unique paths to each company's news data CSV file
aapl_news_path = path + "/AAPL_news.csv"
msft_news_path = path + "/MSFT_news.csv"
googl_news_path = path + "/GOOGL_news.csv"
amzn_news_path = path + "/AMZN_news.csv"
tsla_news_path = path + "/TSLA_news.csv"

In [59]:
# Load each CSV file as a Pandas DataFrame
aapl_df = pd.read_csv(aapl_news_path)
msft_df = pd.read_csv(msft_news_path)
googl_df = pd.read_csv(googl_news_path)
amzn_df = pd.read_csv(amzn_news_path)
tsla_df = pd.read_csv(tsla_news_path)

In [61]:
aapl_df.head()

Unnamed: 0,category,datetime,headline,id,image,related,source,summary,url
0,company,1731861041,"IBM CEO on Trump: ‘Less regulation, more innov...",131411474,,AAPL,Yahoo,Tech executives may be warming to President-el...,https://finnhub.io/api/news?id=b4f6822d195b9ae...
1,company,1731856400,Analyst: Apple (AAPL)’s iPhone Sell-Throughs ‘...,131411475,https://s.yimg.com/ny/api/res/1.2/e1QwhqQiBt5e...,AAPL,Yahoo,We recently published a list of 10 AI Stocks t...,https://finnhub.io/api/news?id=54c311ce2c806c0...
2,company,1731835800,STK: A Technology Equity Fund With Call Overwr...,131410145,https://static.seekingalpha.com/cdn/s3/uploads...,AAPL,SeekingAlpha,Discover why STK is a solid investment for inc...,https://finnhub.io/api/news?id=5e857f9d2489713...
3,company,1731828384,Tata seals deal with Pegatron for iPhone plant...,131408365,https://media.zenfs.com/en/reuters-finance.com...,AAPL,Yahoo,India's Tata Electronics has agreed to buy a m...,https://finnhub.io/api/news?id=ca7d6af4e9ddf57...
4,company,1731819660,China’s smartphone makers head upmarket in Eur...,131404195,https://s.yimg.com/cv/apiv2/social/images/yaho...,AAPL,Yahoo,Chinese smartphone manufacturers are intensify...,https://finnhub.io/api/news?id=6d2ce466b7c42d1...


In [101]:
# Start with the original DataFrame, aapl_df
# Rename columns first in aapl_df
aapl_df = aapl_df.rename(columns={'headline': 'title', 'datetime': 'date', 'related': 'stock'})

# Now select specific columns and make a copy
df = aapl_df[['title', 'date', 'stock']].copy()

df.head()

Unnamed: 0,title,date,stock
0,"IBM CEO on Trump: ‘Less regulation, more innov...",1731861041,AAPL
1,Analyst: Apple (AAPL)’s iPhone Sell-Throughs ‘...,1731856400,AAPL
2,STK: A Technology Equity Fund With Call Overwr...,1731835800,AAPL
3,Tata seals deal with Pegatron for iPhone plant...,1731828384,AAPL
4,China’s smartphone makers head upmarket in Eur...,1731819660,AAPL


In [103]:
# Define a reasonable range for Unix timestamps (between 1970-01-01 and a future date within bounds)
min_valid_timestamp = 0              # Start of Unix epoch (1970-01-01)
max_valid_timestamp = 2147483647      # Roughly represents dates up to 2038-01-19

# Filter out-of-bounds dates
df = df[(df['date'] >= min_valid_timestamp) & (df['date'] <= max_valid_timestamp)]

In [105]:
import datetime as dt

In [109]:
# Convert 'date' column from Unix timestamp to datetime
df['date'] = pd.to_datetime(df['date'], unit='s')

# Function to remove timezone information
def localize_time(date):
    return date.tz_localize(None)

# Remove timezone info
df['date'] = df['date'].apply(localize_time)

# Extract 'time' and 'day' parts
df['time'] = df['date'].apply(lambda x: x.time())
df['day'] = df['date'].apply(lambda x: x.date())

In [111]:
df.head()

Unnamed: 0,title,date,stock,time,day
0,"IBM CEO on Trump: ‘Less regulation, more innov...",2024-11-17 16:30:41,AAPL,16:30:41,2024-11-17
1,Analyst: Apple (AAPL)’s iPhone Sell-Throughs ‘...,2024-11-17 15:13:20,AAPL,15:13:20,2024-11-17
2,STK: A Technology Equity Fund With Call Overwr...,2024-11-17 09:30:00,AAPL,09:30:00,2024-11-17
3,Tata seals deal with Pegatron for iPhone plant...,2024-11-17 07:26:24,AAPL,07:26:24,2024-11-17
4,China’s smartphone makers head upmarket in Eur...,2024-11-17 05:01:00,AAPL,05:01:00,2024-11-17


In [163]:
market_open = dt.time(9, 30, 0)
market_close = dt.time(16, 0, 0)

import yfinance as yf

aapl_price = yf.download("AAPL", start="2023-12-01", end="2024-11-17")

[*********************100%***********************]  1 of 1 completed


In [165]:
nvda_price.head()

Price,Adj Close,Close,High,Low,Open,Volume
Ticker,NVDA,NVDA,NVDA,NVDA,NVDA,NVDA
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2011-01-03 00:00:00+00:00,0.362732,0.3955,0.39925,0.3875,0.388,817448000
2011-01-04 00:00:00+00:00,0.361586,0.39425,0.398,0.3855,0.39625,651384000
2011-01-05 00:00:00+00:00,0.389329,0.4245,0.425,0.3975,0.4015,1428216000
2011-01-06 00:00:00+00:00,0.443212,0.48325,0.4835,0.43425,0.4355,3493312000
2011-01-07 00:00:00+00:00,0.455593,0.49675,0.49825,0.467,0.47775,2579984000


In [213]:
# Ensure the index is a datetime type
aapl_price.index = pd.to_datetime(aapl_price.index)

# Strip the time and timezone, leaving only the date
aapl_price.index = aapl_price.index.normalize()  # Removes time, keeps only the date part

# For what we are looking at, we only need the "Open" and "Adj Close" columns
aapl = aapl_price[['Open', 'Adj Close']].copy()

# If there are multiple levels in the column names, flatten them by renaming
aapl.columns = ['Open', 'Adj Close']

In [215]:
aapl.head()

Unnamed: 0,Open,Adj Close
2023-12-01,190.330002,190.307693
2023-12-04,189.979996,188.5065
2023-12-05,190.210007,192.477051
2023-12-06,194.449997,191.382416
2023-12-07,193.630005,193.322906


In [217]:
# We also need to make sure that the Date index in this dataframe is a datetime.date
# object so that we can compare it with our ['day'] column in the dataframe with out data

aapl.index = aapl.index.to_series().apply(get_day)

In [219]:
# Lets check our index type now to see make sure that it is a datetime.date object so that we
# can use ti for the correct comparisons

type(aapl.index[0])

datetime.date

In [221]:
from datetime import timedelta

def price_change_for_one_day(day, time, aapl):
    # Check if the day is a trading day in the `aapl` index
    if day in aapl.index:
        # Case: Before market close
        if time < market_close:
            dollar_change = aapl.loc[day]['Adj Close'] - aapl.loc[day]['Open']
            pct_change = dollar_change / aapl.loc[day]['Open']
        else:  # After market close
            close_price = aapl.loc[day]['Adj Close']
            next_day = day + timedelta(days=1)
            
            # Find the next trading day, stopping if we go past available dates
            while next_day not in aapl.index:
                next_day += timedelta(days=1)
                if next_day > aapl.index[-1]:  # No next trading day available
                    return None
            
            next_day_open = aapl.loc[next_day]['Open']
            pct_change = (next_day_open - close_price) / close_price
    
    else:  # Non-trading day case
        next_day = day + timedelta(days=1)
        
        # Find the next trading day
        while next_day not in aapl.index:
            next_day += timedelta(days=1)
            if next_day > aapl.index[-1]:  # No next trading day available
                return None
        
        previous_day = day - timedelta(days=1)
        
        # Find the previous trading day
        while previous_day not in aapl.index:
            previous_day -= timedelta(days=1)
            if previous_day < aapl.index[0]:  # No previous trading day available
                return None

        next_day_open = aapl.loc[next_day]['Open']
        previous_day_close = aapl.loc[previous_day]['Adj Close']
        pct_change = (next_day_open - previous_day_close) / previous_day_close

    return pct_change * 100 if pct_change is not None else None

In [223]:
# We apply this function to the dataframe and sort the price changes in a new column called pct_change

df['pct_change'] = df.apply(lambda row: price_change_for_one_day(row['day'], row['time'], aapl), axis=1)

In [229]:
# Display the first 20 rows of the DataFrame
df.head(50)

Unnamed: 0,title,date,stock,time,day,pct_change
0,"IBM CEO on Trump: ‘Less regulation, more innov...",2024-11-17 16:30:41,AAPL,16:30:41,2024-11-17,
1,Analyst: Apple (AAPL)’s iPhone Sell-Throughs ‘...,2024-11-17 15:13:20,AAPL,15:13:20,2024-11-17,
2,STK: A Technology Equity Fund With Call Overwr...,2024-11-17 09:30:00,AAPL,09:30:00,2024-11-17,
3,Tata seals deal with Pegatron for iPhone plant...,2024-11-17 07:26:24,AAPL,07:26:24,2024-11-17,
4,China’s smartphone makers head upmarket in Eur...,2024-11-17 05:01:00,AAPL,05:01:00,2024-11-17,
5,"Qualcomm Q4: Look Beyond Short-Term Headwinds,...",2024-11-17 03:09:14,AAPL,03:09:14,2024-11-17,
6,How To Earn $500 A Month From Apple Stock,2024-11-16 15:00:13,AAPL,15:00:13,2024-11-16,
7,"Nvidia, Meta, Apple, and Microsoft Could Help ...",2024-11-16 10:21:00,AAPL,10:21:00,2024-11-16,
8,Prediction: This Warren Buffett Stock Will Out...,2024-11-16 10:05:00,AAPL,10:05:00,2024-11-16,
9,Will Trump’s tariffs cause price hikes in elec...,2024-11-16 09:32:00,AAPL,09:32:00,2024-11-16,
