In [1]:
import pandas as pd
import yfinance as yf

from rich import print

%load_ext rich

In [2]:
def get_stock_price(ticker_name: str, start_date="2020-06-30", end_date="2021-06-30"):
    ticker = yf.Ticker(ticker_name)

    prices = (
        ticker.history(
            interval="3mo",
            start=start_date,
            end=end_date,
        )
        .iloc[[0, -1], :][["Close"]]
        .reset_index()
    )

    prices["Ticker"] = ticker_name

    return prices

In [3]:
def get_stock_prices(
    ticker_names: list, start_date="2020-06-30", end_date="2021-06-30"
):
    print(f"Getting stock prices for {ticker_names} from {start_date} to {end_date}")

    prices = []

    for ticker in ticker_names:
        try:
            prices.append(get_stock_price(ticker, start_date, end_date))
        except Exception as e:
            print(f"Error getting data for {ticker}")

    prices = pd.concat(prices)

    prices.to_csv("./data/stock_prices.csv", index=False)

    return prices


In [4]:
top_100_companies = pd.read_csv('./data/s&p500_top100.csv')[['Company', 'Symbol', 'Portfolio%']]
top_100_companies['Symbol'] = top_100_companies['Symbol'].str.replace('.', '-')

In [8]:
get_stock_prices(top_100_companies['Symbol'].tolist())

Unnamed: 0,Date,Close,Ticker
0,2020-06-01 00:00:00-04:00,218.529816,MSFT
1,2021-06-01 00:00:00-04:00,265.033173,MSFT
0,2020-06-01 00:00:00-04:00,126.193336,AAPL
1,2021-06-01 00:00:00-04:00,134.841141,AAPL
0,2020-06-01 00:00:00-04:00,133.304092,NVDA
...,...,...,...
1,2021-06-01 00:00:00-04:00,83.541824,MU
0,2020-06-01 00:00:00-04:00,57.171944,GILD
1,2021-06-01 00:00:00-04:00,61.500305,GILD
0,2020-06-01 00:00:00-04:00,45.996845,BX
