<a href="https://colab.research.google.com/github/maberf/colabs/blob/main/yahoofinance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import yfinance as yf
import datetime as dt

In [None]:
# yahoo finance reading historic series function
def yfSeries(list, period, pricetype):
    """
    function download yahoo finannce historic seris and load into pandas dataframe
    args:
      list with assets tickers - [type]: [list]
      period in years such as '1y' or '2y' - [type]: [str]
      pricetype 'Close' os 'Adj Close' - [type]: [str]
    returns:
      [type]: [pandas.core.frame.DataFrame]
    """
    # series reading
    df = yf.download(list, period=period, auto_adjust=True)[pricetype]
    # remove timezone from index
    df.index = pd.to_datetime(df.index).tz_localize(None)
    # Excluding .SA, renaming ^BVSP to IBOV, ^GSPC to SP500
    df.columns = [col.replace('.SA', '') for col in df.columns]
    df.columns = [col.replace('^BVSP', 'IBOV') for col in df.columns]
    df.columns = [col.replace('^GSPC', 'SP500') for col in df.columns]
    df.columns = [col.replace('USDBRL=X', 'USDBRL') for col in df.columns]

    # dataframe organization
    # priorized columns
    columns_priorized = ['IBOV','IFIX','SP500','USRT','USDBRL']
    # priorized coluns filter
    priorized_existing = [col for col in columns_priorized if col in df.columns]
    # other columns list
    columns_others = [col for col in df.columns if col not in priorized_existing]
    # dataframe reorganization
    df = df[priorized_existing + columns_others]

    # return dataframe
    return df

In [None]:
# US STOCKS YAHOO FINANCE ROUTINE
#
# Example: tickersus = ['^GSPC','USDBRL=X','AAPL','AIG','BAC','RIO','DHI','EXC','KMB','KO','LOPE','LYB','MGA','MSFT','MSTR','NUE','NVDA','TGT','TMUS','UPS','UNH','XOM']
#
def yfStockUsData(tickersus):
    """
    function download yahoo finannce historic seris and load into pandas dataframe
    args:
      list with assets tickers - [type]: [list]
      period in years such as '1y' or '2y' - [type]: [str]
      pricetype 'Close' os 'Adj Close' - [type]: [str]
    returns:
      [type]: [pandas.core.frame.DataFrame]
    """
    # dictionary to store the data (each value is a dict for each ticker)
    yfstockusdata = {}

    # set to accumulate all keys found across all tickers
    all_keys = set()

    for t in tickersus:
        try:
            tk = yf.Ticker(t)
            info = tk.info or {}
            # ensures the ticker is always present as a field
            info_row = {'Ticker': t}
            # add all key/value pairs returned by .info
            for k, v in info.items():
                info_row[k] = v
                all_keys.add(k)
            # store the row
            yfstockusdata[t] = info_row
        except Exception as e:
            # in case of failure, register the ticker with only the 'Ticker' field
            # and leave the remaining keys absent (they will become NaN in the DataFrame)
            yfstockusdata[t] = {'Ticker': t}
            # (optional) you may log the error if needed:
            # print(f"Error for {t}: {e}")

    # to guarantee ordered columns with 'Ticker' first,
    # we explicitly build the list of columns
    cols_other = sorted(all_keys - {'Ticker'})  # alphabetical sorting of other keys (optional)
    cols_final = ['Ticker'] + cols_other

    # Convert the dictionary to a DataFrame keeping numeric index (0,1,2,...)
    # Note: using list(yfstockusdata.values()) to keep each dict as one row
    yfstockus = pd.DataFrame(list(yfstockusdata.values()))

    # Reindex columns to ensure 'Ticker' first and all remaining columns included
    # Some keys might not appear in all tickers â€” this will not happen for missing keys,
    # because all_keys was built from .info for each ticker.
    # However, just to be safe:
    existing_cols = [c for c in cols_final if c in yfstockus.columns]
    yfstockus = yfstockus[existing_cols + [c for c in yfstockus.columns if c not in existing_cols]]
    # Renaming ^GSPC to SP500 and USDBRLX to USDBRL
    yfstockus['Ticker'] = yfstockus['Ticker'].str.replace('^GSPC', 'SP500', regex=False)
    yfstockus['Ticker'] = yfstockus['Ticker'].str.replace('USDBRL=X', 'USDBRL', regex=False)
    # display(yfstockus)
    return yfstockus

In [None]:
# info values sanitization function
def yfSanitizeInfos(yfstockusinfos):
    """
    function to sanitize info values
    args:
      dataframe with US stocks tickers infos from YF - [type]: [pandas.core.frame.DataFrame]
      returns:
      [type]: [pandas.core.frame.DataFrame]
    """

    def sanitize_value(v, maxlen=500):
        """
        nested internal function to sanitization info values by columns
        args:
        dataframe with US stocks tickers infos from YF - [type]: [pandas.core.frame.DataFrame]
        returns:
        [type]: [pandas.core.frame.DataFrame]
        """
        # None / NaN
        if v is None:
            return ''
        # floats: reject NaN/Inf
        if isinstance(v, float):
            if not np.isfinite(v):
               return ''
            return float(v)
        # integers and booleans are OK
        if isinstance(v, (int, bool, np.integer, np.bool_)):
            return int(v) if isinstance(v, (int, np.integer)) else bool(v)
        # short strings are OK (truncate if too long)
        if isinstance(v, str):
            return v if len(v) <= maxlen else v[:maxlen]
        # time series / numpy types converted to truncated string
        # for all other types (dict, list, Timestamp, ndarray, Decimal, etc.)
        try:
            s = str(v)
            return s if len(s) <= maxlen else s[:maxlen]
        except Exception:
          return ''
        return

    # sanitize column by column using Series.map (avoids applymap)
    yfstockusinfos_sanitized = yfstockusinfos.copy()

    for col in yfstockusinfos_sanitized.columns:
        # use map to apply the function in a vectorized way per column
        yfstockusinfos_sanitized[col] = yfstockusinfos_sanitized[col].map(lambda x: sanitize_value(x))

    return yfstockusinfos_sanitized