In [None]:
from dotenv import load_dotenv
from alpha_vantage.timeseries import TimeSeries
from alpha_vantage.fundamentaldata import FundamentalData
import pandas as pd
import os
import threading

api_key = os.getenv("ALPHA_VANTAGE_KEY")

ts = TimeSeries(key=api_key, output_format="pandas")
fd = FundamentalData(key=api_key, output_format="pandas")

In [21]:
tickers = ["AMZN"]

In [None]:
data_dict = {}  # Dictionary to store the data for each ticker


def fetch_data(ticker):
    data, meta_data = ts.get_daily(symbol=ticker, outputsize="full")
    data_dict[ticker] = data


threads = list()
for ticker in tickers:
    ticker_thread = threading.Thread(target=fetch_data, args=(ticker,))
    threads.append(ticker_thread)
    ticker_thread.start()

for tick_thread in threads:
    tick_thread.join()

In [None]:
symbol = "AAPL"
prices, _ = ts.get_daily(symbol=symbol, outputsize="full")

In [None]:
overview, _ = fd.get_company_overview(symbol)

In [None]:
bs = fd.get_balance_sheet_quarterly(symbol)[0]

In [None]:
bs

In [None]:
# Fetch fundamental data
overview, _ = fd.get_company_overview(symbol)

# Combine the data
# Reshape the fundamental data to match the daily data index
overview = overview.T
overview.columns = [symbol]
fundamental_data = pd.concat([daily_data, overview], axis=1)

# Save to a CSV file
fundamental_data.to_csv(f"{symbol}_daily_fundamental_data.csv")

print(fundamental_data.head())

In [19]:
import yfinance as yf
import pandas as pd
import config


def pull_prices():
    data = yf.download(config.TICKERS, start=config.START_DATE, end=config.END_DATE)[
        "Adj Close"
    ].reset_index()
    return data


def pull_fundamental_data(ticker):
    stock = yf.Ticker(ticker)

    # Extract fundamental data (quarterly)
    info = stock.info
    financials = stock.quarterly_financials
    balance_sheet = stock.quarterly_balance_sheet
    cash_flow = stock.quarterly_cashflow

    # Create DataFrames for each type of fundamental data
    info_df = pd.DataFrame([info])
    info_df["ticker"] = ticker  # Add ticker identifier

    financials_df = financials.T.reset_index()
    financials_df["ticker"] = ticker  # Add ticker identifier

    balance_sheet_df = balance_sheet.T.reset_index()
    balance_sheet_df["ticker"] = ticker  # Add ticker identifier

    cash_flow_df = cash_flow.T.reset_index()
    cash_flow_df["ticker"] = ticker  # Add ticker identifier

    return info_df, financials_df, balance_sheet_df, cash_flow_df


def create_full_dataset():
    # Pull stock prices
    prices_df = pull_prices()

    # Initialize lists to store fundamental data
    info_list = []
    financials_list = []
    balance_sheet_list = []
    cash_flow_list = []

    # Pull fundamental data for each ticker
    for ticker in config.TICKERS:
        info_df, financials_df, balance_sheet_df, cash_flow_df = pull_fundamental_data(
            ticker
        )
        info_list.append(info_df)
        financials_list.append(financials_df)
        balance_sheet_list.append(balance_sheet_df)
        cash_flow_list.append(cash_flow_df)

    # Concatenate all fundamental data into single DataFrames
    info_df = pd.concat(info_list, ignore_index=True)
    financials_df = pd.concat(financials_list, ignore_index=True)
    balance_sheet_df = pd.concat(balance_sheet_list, ignore_index=True)
    cash_flow_df = pd.concat(cash_flow_list, ignore_index=True)

    # Merge all DataFrames into a single dataset
    full_dataset = {
        "prices": prices_df,
        "info": info_df,
        "financials": financials_df,
        "balance_sheet": balance_sheet_df,
        "cash_flow": cash_flow_df,
    }

    return full_dataset


# Example usage
full_dataset = create_full_dataset()

# Access the different parts of the dataset
prices_df = full_dataset["prices"]
info_df = full_dataset["info"]
financials_df = full_dataset["financials"]
balance_sheet_df = full_dataset["balance_sheet"]
cash_flow_df = full_dataset["cash_flow"]

[*********************100%%**********************]  30 of 30 completed
  info_df = pd.concat(info_list, ignore_index=True)


In [20]:
balance_sheet_df[["ticker", "index", "Total Debt"]].loc[lambda x: x.ticker == "AAPL"]

Unnamed: 0,ticker,index,Total Debt
0,AAPL,2024-03-31,104590000000.0
1,AAPL,2023-12-31,108040000000.0
2,AAPL,2023-09-30,111088000000.0
3,AAPL,2023-06-30,109280000000.0
4,AAPL,2023-03-31,109615000000.0
5,AAPL,2022-12-31,
