In [5]:
import pandas as pd
from pathlib import Path
import os
from urllib import parse
import dotenv

dotenv.load_dotenv("../../../.env")
ALPHA_VANTAGE_API_KEY = os.environ["ALPHA_VANTAGE_API_KEY"]

ALPHA_VANTAGE_API = "https://www.alphavantage.co/query"
ALPHA_VANTAGE_PARAMS = {"function": "TIME_SERIES_DAILY", "outputsize": "full", "datatype": "csv", "apikey": ALPHA_VANTAGE_API_KEY}

ASSETS = ["VT", "VOO", "VO", "VB", "VXUS", "VWO", "COIN", "HOOD", "AAAU", "VNQ"]
DATA_DIR = Path(os.getcwd()).parent.parent.parent / "data" 

INPUT_DIR = DATA_DIR / "prices" / "raw"
OUTPUT_FILE = DATA_DIR / "prices" / "clean" / "crypto_clean.csv"

In [6]:
def format_asset_df(asset: str) -> pd.DataFrame:
    params = parse.urlencode(dict(**ALPHA_VANTAGE_PARAMS, **{"symbol": asset}))
    df = pd.read_csv(f"{ALPHA_VANTAGE_API}?{params}")

    df["asset"] = asset
    df["date"] = df["timestamp"]
    df["price"] = df["close"]

    return df[["asset", "date", "price"]]


stock_df = pd.concat([format_asset_df(asset) for asset in ASSETS])
stock_df = stock_df.sort_values(["date", "asset"]).reset_index(drop=True)
stock_df.head()

Unnamed: 0,asset,date,price
0,VB,2004-01-30,49.0
1,VO,2004-01-30,49.35
2,VB,2004-02-02,49.2
3,VO,2004-02-02,49.71
4,VB,2004-02-03,49.07


In [9]:
assert stock_df.shape == (36774, 3)

In [10]:
stock_df.to_csv(OUTPUT_FILE, index=False)