In [1]:
import pandas as pd
from pathlib import Path
import os
from urllib import parse
import dotenv

dotenv.load_dotenv("../../../.env")
ALPHA_VANTAGE_API_KEY = os.environ["ALPHA_VANTAGE_API_KEY"]

ALPHA_VANTAGE_API = "https://www.alphavantage.co/query"
ALPHA_VANTAGE_PARAMS = {"function": "TIME_SERIES_DAILY", "outputsize": "full", "datatype": "csv", "apikey": ALPHA_VANTAGE_API_KEY}

ASSETS = ["VT", "VOO", "VO", "VB", "VXUS", "VWO", "COIN", "HOOD", "AAAU", "VNQ"]
DATA_DIR = Path(os.getcwd()).parent.parent.parent / "data" 

INPUT_DIR = DATA_DIR / "prices" / "raw"
OUTPUT_FILE = DATA_DIR / "prices" / "clean" / "stocks_clean.csv"

In [None]:
def format_asset_df(asset: str) -> pd.DataFrame:
    params = parse.urlencode(dict(**ALPHA_VANTAGE_PARAMS, **{"symbol": asset}))
    df = pd.read_csv(f"{ALPHA_VANTAGE_API}?{params}")

    df["asset"] = asset
    df["date"] = df["timestamp"]
    df["price"] = df["close"]

    return df[["asset", "date", "price"]]


stock_df = pd.concat([format_asset_df(asset) for asset in ASSETS])
stock_df = stock_df.sort_values(["date", "asset"]).reset_index(drop=True)
stock_df.head()

Unnamed: 0,timestamp,open,high,low,close,volume
0,2025-08-07,131.77,131.880,130.4800,131.04,2398660
1,2025-08-06,130.36,130.920,130.0600,130.80,2104937
2,2025-08-05,130.52,130.625,129.6091,129.95,1751894
3,2025-08-04,129.50,130.230,129.3500,130.21,2642351
4,2025-08-01,129.08,129.080,127.7900,128.41,7426971
...,...,...,...,...,...,...
4301,2008-07-02,49.60,49.690,48.3800,48.38,74700
4302,2008-07-01,49.00,49.600,47.7400,49.10,55100
4303,2008-06-30,49.50,51.720,49.3600,49.54,23900
4304,2008-06-27,49.41,51.450,49.2600,49.50,26800


Unnamed: 0,timestamp,open,high,low,close,volume
0,2025-08-07,584.87,585.52,578.3127,581.290,4594260
1,2025-08-06,578.25,582.27,577.4100,581.640,4545015
2,2025-08-05,580.79,581.50,576.4000,577.350,4647381
3,2025-08-04,575.17,580.21,575.0800,580.140,6640089
4,2025-08-01,575.73,575.74,569.2900,571.450,8781254
...,...,...,...,...,...,...
3746,2010-09-15,51.31,51.69,51.2000,51.650,18400
3747,2010-09-14,51.42,51.74,51.1900,51.519,118800
3748,2010-09-13,51.48,51.57,51.2500,51.530,67400
3749,2010-09-10,50.84,50.93,50.6480,50.890,17200


Unnamed: 0,timestamp,open,high,low,close,volume
0,2025-08-07,286.54,286.7400,283.0701,284.36,525455
1,2025-08-06,284.97,285.1935,283.2950,284.37,376942
2,2025-08-05,286.21,286.4530,283.5600,284.67,454159
3,2025-08-04,283.50,286.4500,283.5000,286.21,738155
4,2025-08-01,283.05,283.1200,279.0000,281.94,680665
...,...,...,...,...,...,...
5410,2004-02-05,49.00,49.1500,49.0000,49.09,700
5411,2004-02-04,49.30,49.3000,48.9600,48.96,2600
5412,2004-02-03,49.48,49.6000,49.4400,49.51,4100
5413,2004-02-02,49.53,49.7900,49.2500,49.71,8100


Unnamed: 0,timestamp,open,high,low,close,volume
0,2025-08-07,243.25,243.2500,239.0567,240.33,439666
1,2025-08-06,241.84,241.8400,239.9800,240.73,568169
2,2025-08-05,242.00,242.3100,239.2900,241.39,542158
3,2025-08-04,239.26,241.4601,238.9350,241.30,504048
4,2025-08-01,238.72,238.9500,234.4900,237.58,2422841
...,...,...,...,...,...,...
5410,2004-02-05,48.39,48.4900,48.1000,48.20,4000
5411,2004-02-04,48.83,48.8300,48.0500,48.05,1600
5412,2004-02-03,49.15,49.2500,49.0700,49.07,1200
5413,2004-02-02,49.25,49.6000,49.1000,49.20,3400


Unnamed: 0,timestamp,open,high,low,close,volume
0,2025-08-07,70.36,70.4600,69.9450,70.21,3182763
1,2025-08-06,69.51,69.7750,69.4650,69.71,3021012
2,2025-08-05,69.28,69.3800,69.0250,69.26,3469204
3,2025-08-04,68.98,69.1050,68.8600,69.08,3399095
4,2025-08-01,68.41,68.4137,67.8500,68.24,4699342
...,...,...,...,...,...,...
3648,2011-02-03,50.99,50.9900,50.3600,50.78,51100
3649,2011-02-02,51.00,51.0000,50.5400,50.69,47700
3650,2011-02-01,50.62,50.9500,50.1500,50.95,26300
3651,2011-01-31,49.75,50.0000,49.6299,49.88,46200


Unnamed: 0,timestamp,open,high,low,close,volume
0,2025-08-07,50.830,50.9350,50.6500,50.75,5728731
1,2025-08-06,50.270,50.4200,50.1200,50.38,6444001
2,2025-08-05,50.230,50.3299,50.0700,50.16,9528826
3,2025-08-04,50.110,50.1200,49.8700,50.00,7863418
4,2025-08-01,49.765,49.7850,49.3699,49.54,14479361
...,...,...,...,...,...,...
5131,2005-03-16,48.630,48.7700,48.2500,48.30,98200
5132,2005-03-15,49.250,49.3600,48.7000,48.72,63900
5133,2005-03-14,49.960,50.1100,49.6000,49.80,56400
5134,2005-03-11,50.380,50.6285,50.0900,50.10,92200


Unnamed: 0,timestamp,open,high,low,close,volume
0,2025-08-07,311.230,317.9400,306.9800,310.79,13857155
1,2025-08-06,297.550,304.7700,293.3101,303.58,15982739
2,2025-08-05,307.110,312.8500,297.2000,297.99,16863874
3,2025-08-04,319.540,324.0000,311.0000,318.17,12106371
4,2025-08-01,335.140,337.5825,310.5500,314.69,31342212
...,...,...,...,...,...,...
1080,2021-04-20,333.425,334.8300,312.0201,320.82,18082257
1081,2021-04-19,337.260,341.0100,326.7900,333.00,11405556
1082,2021-04-16,327.500,345.9900,321.0300,342.00,22654542
1083,2021-04-15,348.900,349.2000,317.2701,322.75,39777858


Unnamed: 0,timestamp,open,high,low,close,volume
0,2025-08-07,107.890,112.6344,107.4000,111.23,44736013
1,2025-08-06,104.505,105.9000,101.1800,105.65,33023516
2,2025-08-05,105.970,107.3000,103.2200,105.46,33460912
3,2025-08-04,101.229,106.7900,97.1532,106.37,46466004
4,2025-08-01,97.800,104.5900,93.3600,99.90,76745146
...,...,...,...,...,...,...
1006,2021-08-04,54.450,85.0000,54.4000,70.39,172986777
1007,2021-08-03,37.911,48.5900,37.1594,46.80,93819198
1008,2021-08-02,35.970,37.9800,35.3000,37.68,19948534
1009,2021-07-30,34.930,36.9795,33.2500,35.15,40632228


Unnamed: 0,timestamp,open,high,low,close,volume
0,2025-08-07,33.42,33.5800,33.32,33.580,2209741
1,2025-08-06,33.28,33.3800,33.23,33.300,3072892
2,2025-08-05,33.29,33.4850,33.27,33.365,2948772
3,2025-08-04,33.34,33.4262,33.28,33.335,1850385
4,2025-08-01,33.09,33.1900,32.97,33.150,4025816
...,...,...,...,...,...,...
1749,2018-08-21,11.92,11.9500,11.89,11.930,30625
1750,2018-08-20,11.88,11.9100,11.85,11.900,28728
1751,2018-08-17,11.80,11.8200,11.77,11.820,52373
1752,2018-08-16,11.78,11.8000,11.74,11.740,428440


Unnamed: 0,timestamp,open,high,low,close,volume
0,2025-08-07,90.00,90.000,89.240,89.80,3345583
1,2025-08-06,90.30,90.390,89.500,89.54,3604809
2,2025-08-05,89.90,90.335,89.510,90.24,3752663
3,2025-08-04,89.11,90.055,89.075,89.83,3385713
4,2025-08-01,90.03,90.095,88.290,88.92,5395749
...,...,...,...,...,...,...
5243,2004-10-05,51.40,51.500,51.350,51.39,11900
5244,2004-10-04,51.39,51.550,51.390,51.40,8100
5245,2004-10-01,50.35,51.250,50.250,51.18,129800
5246,2004-09-30,50.00,50.330,49.990,50.25,27900


Unnamed: 0,asset,date,price
0,VB,2004-01-30,49.0
1,VO,2004-01-30,49.35
2,VB,2004-02-02,49.2
3,VO,2004-02-02,49.71
4,VB,2004-02-03,49.07


In [3]:
assert stock_df.shape == (36774, 3)

In [4]:
stock_df.to_csv(OUTPUT_FILE, index=False)