In [None]:
#| default_exp data.yahoo

In [None]:
#| hide
%load_ext autoreload
%autoreload 2

# Yahoo data download

Simple code that downloads data from yahoo finance to illustrate backtest. Normally the data should be stored in a database or in an infrastructure that allows for ETL to be controled by custom code. Notice that ticker is probably not the best unique ID ... Again, this is a demo to get free data.

In [None]:
#| export
import yfinance as yf
import pandas as pd

In [None]:
#| export
class yahoo:
    def __init__(self,ticker,start_date="1900-01-01"):
        self.ticker = ticker
        self.start_date = start_date
        self.data = None
    def _read_yahoo(self):
        """
        Reads data from Yahoo Finance.

        Args:
            ticker: The stock ticker symbol.
            start_date: The start date for the historical data.

        Returns:
            A pandas DataFrame containing the historical data.
        """
        try:
            self.data = yf.download(self.ticker, start=self.start_date)
        except Exception as e:
            print(f"Error reading data from Yahoo Finance: {e}")
    def _adj_yho(self):
        data = self.data    
        if data is not None:
            # Calculate adjustment factors
            data['adj_factor'] = data['Adj Close'] / data['Close']
            # Adjust open, high, and low prices
            data['Open'] = data['Open'] * data['adj_factor']
            data['High'] = data['High'] * data['adj_factor']
            data['Low'] = data['Low'] * data['adj_factor']
            data['Volume'] = data['Volume']/data['adj_factor'] # Vol increases if price decreases to keep V*P = cnst
            data['Volume'] = data['Volume'].apply(lambda z: int(z))
            # Remove the temporary 'adj_factor' column
            data = data.drop('adj_factor', axis=1)  
            # Keep original Close for future debug         
            return data
        else:
            return None

    def raw_data(self):
        self._read_yahoo()
        return self.data   
    def sim_data(self):
        if self.data is None:
            self._read_yahoo()
        return self._adj_yho()
        

## Example

In [None]:
#| eval: false
from backtest_sample.data.yahoo import yahoo

yahoo('SPY').raw_data()

[*********************100%***********************]  1 of 1 completed




Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1993-01-29,43.968750,43.968750,43.750000,43.937500,24.684105,1003200
1993-02-01,43.968750,44.250000,43.968750,44.250000,24.859669,480500
1993-02-02,44.218750,44.375000,44.125000,44.343750,24.912336,201300
1993-02-03,44.406250,44.843750,44.375000,44.812500,25.175676,529400
1993-02-04,44.968750,45.093750,44.468750,45.000000,25.281004,531500
...,...,...,...,...,...,...
2024-09-09,544.650024,547.710022,542.679993,546.409973,546.409973,40445800
2024-09-10,548.359985,549.150024,543.380005,548.789978,548.789978,36394600
2024-09-11,548.700012,555.359985,539.960022,554.419983,554.419983,75248600
2024-09-12,555.010010,559.400024,552.739990,559.090027,559.090027,51892700


In [None]:
yahoo('SPY').sim_data()

[*********************100%***********************]  1 of 1 completed




Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1993-01-29,24.701665,24.701665,24.578771,43.937500,24.684109,1785687
1993-02-01,24.701664,24.859671,24.701664,44.250000,24.859671,855285
1993-02-02,24.842106,24.929887,24.789437,44.343750,24.912331,358312
1993-02-03,24.947454,25.193242,24.929898,44.812500,25.175686,942327
1993-02-04,25.263448,25.333673,24.982548,45.000000,25.281004,946066
...,...,...,...,...,...,...
2024-09-09,544.650024,547.710022,542.679993,546.409973,546.409973,40445800
2024-09-10,548.359985,549.150024,543.380005,548.789978,548.789978,36394600
2024-09-11,548.700012,555.359985,539.960022,554.419983,554.419983,75248600
2024-09-12,555.010010,559.400024,552.739990,559.090027,559.090027,51892700
