In [2]:
# Import necessary modules
import os
import requests
import pandas as pd
from sqlalchemy import create_engine
from dotenv import load_dotenv  # To load environment variables from the .env file

In [3]:
# Load environment variables from .env
load_dotenv()

True

In [4]:
# Read the Tiingo API key from the environment variables
TIINGO_KEY = os.getenv('TIINGO_KEY')
if TIINGO_KEY is None:
    raise Exception("TIINGO_KEY not set in the .env file.")

In [5]:
def get_eod_prices(symbol, start="2019-01-01", end=None, resample_freq="daily"):
    """
    Fetches historical end-of-day (EOD) stock price data for the provided symbol from the Tiingo API.
    
    Parameters:
        symbol (str): The stock symbol to fetch data for (e.g., 'AAPL').
        start (str): The start date for historical data in YYYY-MM-DD format.
        end (str): The end date for historical data in YYYY-MM-DD format. If None, defaults to today.
        resample_freq (str): Frequency for data resampling. Options include "daily", "weekly", "monthly".
        
    Returns:
        pd.DataFrame: DataFrame containing the price data with the selected columns.
    """
    # Construct the URL for Tiingo's historical price endpoint.
    # Reference documentation: https://api.tiingo.com/documentation/end-of-day
    base_url = f"https://api.tiingo.com/tiingo/daily/{symbol}/prices"
    
    params = {
        'startDate': start,
        'format': 'json',
        'token': TIINGO_KEY,
        'resampleFreq': resample_freq  # "daily" will return full daily data including holidays
    }
    
    # Include the endDate parameter if provided
    if end is not None:
        params['endDate'] = end
    
    response = requests.get(base_url, params=params, timeout=30)
    try:
        response.raise_for_status()  # Will raise an HTTPError for a bad response (e.g., 403)
    except requests.exceptions.HTTPError as http_err:
        # Provide a more detailed error message
        raise Exception(f"HTTP error occurred for {symbol}: {http_err}\nResponse text: {response.text}") from http_err

    data = response.json()
    if not data:
        raise Exception(f"No data returned from Tiingo for symbol: {symbol}")
    
    # Convert the JSON list of dictionaries into a DataFrame
    df = pd.DataFrame(data)
    
    # Select only the necessary columns, if available
    # Typical fields include: date, open, high, low, close, volume
    columns_to_keep = ['date', 'open', 'high', 'low', 'close', 'volume']
    # Sometimes additional columns may be returned – keep only those in columns_to_keep.
    df = df[[col for col in columns_to_keep if col in df.columns]]
    
    # Convert data types: date to datetime and numeric fields to float/int
    df['date'] = pd.to_datetime(df['date'])
    for col in ['open', 'high', 'low', 'close']:
        df[col] = pd.to_numeric(df[col], errors='coerce')
    if 'volume' in df.columns:
        df['volume'] = pd.to_numeric(df['volume'], errors='coerce')
    
    # Add a column to store the symbol if it is missing
    df['symbol'] = symbol
    
    # Sort the DataFrame by date in ascending order
    df = df.sort_values('date').reset_index(drop=True)
    
    return df

In [6]:
# --- Development Mode ---
# Set this flag to True to only fetch for one symbol during development to avoid exhausting your API calls.
development_mode = True

# Define the list of symbols to fetch. For development, you may keep it short.
if development_mode:
    symbols = ["AAPL"]  # Only one symbol during development
else:
    # Later, you might fetch for your full list, e.g. the S&P 500 or sector ETFs
    symbols = ["AAPL", "MSFT", "GOOG"]  # Example; extend this list as needed.

In [7]:
# Establish a database connection using SQLAlchemy and credentials from the .env file
DB_USER = os.getenv('DB_USER')
DB_PASSWORD = os.getenv('DB_PASSWORD')
DB_HOST = os.getenv('DB_HOST')
DB_PORT = os.getenv('DB_PORT')
DB_NAME = os.getenv('DB_NAME')

if None in (DB_USER, DB_PASSWORD, DB_HOST, DB_PORT, DB_NAME):
    raise Exception("Database credentials are not fully set in the .env file.")

In [8]:
engine = create_engine(f'mysql+pymysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}')

In [9]:
# Loop over the symbols and fetch data, then load it into the MySQL table 'raw_tiingo_prices'
for symbol in symbols:
    try:
        print(f"Fetching data for {symbol}...")
        df = get_eod_prices(symbol)
        print(f"Sample data for {symbol}:")
        print(df.head())
        
        # Load the data into the MySQL table.
        # Using if_exists='append' to accumulate data for multiple symbols.
        df.to_sql(name='raw_tiingo_prices', con=engine, if_exists='append', index=False)
        print(f"Data for {symbol} loaded successfully into 'raw_tiingo_prices' table.\n")
    except Exception as e:
        print(f"Error fetching or loading data for {symbol}: {e}")

Fetching data for AAPL...
Sample data for AAPL:
                       date    open    high     low   close    volume symbol
0 2019-01-02 00:00:00+00:00  154.89  158.85  154.23  157.92  37039737   AAPL
1 2019-01-03 00:00:00+00:00  143.98  145.72  142.00  142.19  91312195   AAPL
2 2019-01-04 00:00:00+00:00  144.53  148.55  143.80  148.26  58607070   AAPL
3 2019-01-07 00:00:00+00:00  148.70  148.83  145.90  147.93  54777764   AAPL
4 2019-01-08 00:00:00+00:00  149.56  151.82  148.52  150.75  41025314   AAPL
Data for AAPL loaded successfully into 'raw_tiingo_prices' table.

