
# Stock Correlation Explorer — Data Acquisition (Alpha Vantage)

This notebook implements the **Data Acquisition** stage of the Stock Correlation Explorer project.  
It fetches daily adjusted stock prices for selected tickers using the [Alpha Vantage API](https://www.alphavantage.co/).

Notes:
- You need a free API key from Alpha Vantage.  
- Free tier allows 5 requests per minute.  


In [29]:

import os, time, requests
import pandas as pd
from pathlib import Path
from dotenv import load_dotenv
# Load API key from .env if available
load_dotenv()
API_KEY = os.getenv("ALPHAVANTAGE_API_KEY")

# Directory for saving prices
DATA_DIR_RAW = Path(os.getcwd() + '/../' + os.getenv("DATA_DIR_RAW"))
DATA_DIR_RAW.mkdir(parents=True, exist_ok=True)

# Example tickers
TICKERS = ["AAPL", "MSFT", "GOOGL", "AMZN", "META"]



## 2. Helper Functions


In [30]:
def fetch_daily_adjusted(symbol: str, api_key: str) -> pd.DataFrame:
    url = 'https://www.alphavantage.co/query'
    params = {'function':'TIME_SERIES_DAILY','symbol':symbol,'apikey':api_key}

    r = requests.get(url, params=params, timeout=30)
    data = r.json()
    print(data)
    if "Time Series (Daily)" not in data:
        import yfinance as yf
        df = yf.download(symbol, period='3mo', interval='1d').reset_index()[['Date', 'Close']]
        df.columns = ['date','close']
        df["ticker"] = symbol
    
    else:
        ts = data["Time Series (Daily)"]
        df = pd.DataFrame.from_dict(ts, orient="index").rename(columns={
            'index':'date',
            '4. close':'close'
        })
        df.index = pd.to_datetime(df.index)
        for col in df.columns:
            df[col] = pd.to_numeric(df[col], errors="coerce")
        df["ticker"] = symbol
    return df.sort_index()

def cache_ticker(symbol: str, api_key: str, pause: int = 15) -> Path:
    out_path = DATA_DIR_RAW.absolute() / f"{symbol}_daily.csv"
    if out_path.exists():
        print(f"Cached file exists for {symbol}, skipping fetch.")
        return out_path
    df = fetch_daily_adjusted(symbol, api_key)
    df.to_csv(out_path, index=False)
    print(f"Saved {symbol} → {out_path}")
    time.sleep(pause)  # respect rate limits
    return out_path



## 3. Fetch Data


In [31]:

for ticker in TICKERS:
    try:
        cache_ticker(ticker, API_KEY)
    except Exception as e:
        print("Error:", e)


{'Information': 'We have detected your API key as HCRXXKWCVDCGJENI and our standard API rate limit is 25 requests per day. Please subscribe to any of the premium plans at https://www.alphavantage.co/premium/ to instantly remove all daily rate limits.'}


  df = yf.download(symbol, period='3mo', interval='1d').reset_index()[['Date', 'Close']]
[*********************100%***********************]  1 of 1 completed


Saved AAPL → /Users/sanjangadde/bootcamp_sanjan_gadde/project/notebooks/../data/raw/AAPL_daily.csv


  df = yf.download(symbol, period='3mo', interval='1d').reset_index()[['Date', 'Close']]
[*********************100%***********************]  1 of 1 completed

{'Information': 'We have detected your API key as HCRXXKWCVDCGJENI and our standard API rate limit is 25 requests per day. Please subscribe to any of the premium plans at https://www.alphavantage.co/premium/ to instantly remove all daily rate limits.'}
Saved MSFT → /Users/sanjangadde/bootcamp_sanjan_gadde/project/notebooks/../data/raw/MSFT_daily.csv



  df = yf.download(symbol, period='3mo', interval='1d').reset_index()[['Date', 'Close']]
[*********************100%***********************]  1 of 1 completed

{'Information': 'We have detected your API key as HCRXXKWCVDCGJENI and our standard API rate limit is 25 requests per day. Please subscribe to any of the premium plans at https://www.alphavantage.co/premium/ to instantly remove all daily rate limits.'}
Saved GOOGL → /Users/sanjangadde/bootcamp_sanjan_gadde/project/notebooks/../data/raw/GOOGL_daily.csv



  df = yf.download(symbol, period='3mo', interval='1d').reset_index()[['Date', 'Close']]
[*********************100%***********************]  1 of 1 completed

{'Information': 'We have detected your API key as HCRXXKWCVDCGJENI and our standard API rate limit is 25 requests per day. Please subscribe to any of the premium plans at https://www.alphavantage.co/premium/ to instantly remove all daily rate limits.'}
Saved AMZN → /Users/sanjangadde/bootcamp_sanjan_gadde/project/notebooks/../data/raw/AMZN_daily.csv



  df = yf.download(symbol, period='3mo', interval='1d').reset_index()[['Date', 'Close']]
[*********************100%***********************]  1 of 1 completed

{'Information': 'We have detected your API key as HCRXXKWCVDCGJENI and our standard API rate limit is 25 requests per day. Please subscribe to any of the premium plans at https://www.alphavantage.co/premium/ to instantly remove all daily rate limits.'}
Saved META → /Users/sanjangadde/bootcamp_sanjan_gadde/project/notebooks/../data/raw/META_daily.csv



