# App Overview

In [172]:
# 🛠 Imports and logging setup
import logging
import datetime
import pandas as pd
import yfinance as yf
import requests
from bs4 import BeautifulSoup
import datetime
import json
from yfinance import Ticker
from typing import List
from pandas import DataFrame
from datetime import datetime, timezone
from typing import List
from yfinance import Ticker
from tqdm import tqdm
from tqdm.auto import tqdm as with_progress

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)

def log(msg, emoji="ℹ️"):
    logging.info(f"{emoji} {msg}")

In [None]:
# 🌍 OBSOLETE: Define sectors & scrape tickers (placeholder: extend over time)
todays_date = datetime.date.today().strftime("%Y-%m-%d")
log(f"Fetching stock lists for {todays_date}", "📅")

# For demo: hardcode sample tickers (can extend by scraping)
tech_tickers = ["AAPL", "MSFT", "NVDA", "GOOGL", "AMZN"]
renewable_tickers = ["NEE", "ENPH", "PLUG", "FSLR", "SEDG"]
manufacturing_tickers = ["GE", "CAT", "DE", "BA", "HON"]

pruned_tickers = tech_tickers + renewable_tickers + manufacturing_tickers
log(f"Collected {len(pruned_tickers)} tickers (sample universe)", "📊")

universe_df = pd.DataFrame({
    "Ticker": pruned_tickers,
    "Sector": ["Tech"]*len(tech_tickers) + ["Renewable"]*len(renewable_tickers) + ["Manufacturing"]*len(manufacturing_tickers)
})
universe_df


In [185]:
# simple in-memory cache { "YYYY-MM-DD": { "TICKER": TickerObj } }
stock_mem_cache: dict[str, dict[str, yf.Ticker]] = {}

In [199]:
def get_stock(ticker_symbol:str)->Ticker:
    global stock_mem_cache

    todays_date = datetime.today().date().isoformat()  # YYYY-MM-DD
    if todays_date not in stock_mem_cache:
        stock_mem_cache[todays_date] = {}
    if ticker_symbol not in stock_mem_cache[todays_date]:
        #log(f"Stock cache miss for '{ticker_symbol}' with date '{todays_date}'", "⚠️")
        stock_mem_cache[todays_date][ticker_symbol] = yf.Ticker(ticker_symbol)
    return stock_mem_cache[todays_date][ticker_symbol]


In [205]:
# 🌍 Fetch top 50 live high dividend tickers (via screener + yield filter)
today_str = datetime.today().strftime("%Y-%m-%d")
log(f"Fetching live high dividend stocks for {today_str}", "📅")

import requests, yfinance as yf

gross_ticker_count = 200
scrId = "most_actives"  # wide enough universe
api_url = (
    f"https://query2.finance.yahoo.com/v1/finance/screener/predefined/saved?"
    f"formatted=true&scrIds={scrId}&count={gross_ticker_count}&start=0"
)

resp = requests.get(api_url, headers={"User-Agent": "Mozilla/5.0"})
data = resp.json()
quotes = (
    data.get("finance", {})
        .get("result", [{}])[0]
        .get("quotes", [])
)

log(f"Collected {len(quotes)} tickers from Yahoo screener '{scrId}'", "📊")

# ✅ Filter tickers by dividend yield
high_dividend_tickers = []
yield_floor = 3.0
low_yield_stock_cnt = 0
for quote in with_progress(quotes, desc=f"Searching for high dividend stocks above {yield_floor}%"):
    ticker_symbol = quote.get("symbol")
    if not ticker_symbol:
        continue
    #try:
    stock = get_stock(ticker_symbol)
    stock_info = stock.info
    dy = stock_info.get("dividendYield")
    if dy and dy > yield_floor:  # only >3% dividend yield
        high_dividend_tickers.append(ticker_symbol)
        #log(f"{t} passes dividend filter ({dy:.2f}%)", "✅")
    else:
        low_yield_stock_cnt+=1
    #except Exception as e:
        #log(f"Warning {ticker_symbol}: {e}", "⚠️")

log(f"Found {len(high_dividend_tickers)} stocks above {yield_floor}%.", "🎯")
log(f"Skipped {low_yield_stock_cnt} stocks as they were below the yield floor of {yield_floor}%.", "⏭️")
pruned_tickers = high_dividend_tickers[:50]  # 🔑 <-- your original flow preserved
log(f"Final selection: {len(pruned_tickers)} high dividend tickers", "📉")

universe_df = pd.DataFrame({
    "Ticker": pruned_tickers,
    "Sector": "High Dividend"
})


2025-09-18 13:16:48,127 - INFO - 📅 Fetching live high dividend stocks for 2025-09-18
2025-09-18 13:16:48,750 - INFO - 📊 Collected 200 tickers from Yahoo screener 'most_actives'
Searching for high dividend stocks above 3.0%: 100%|██████████| 200/200 [00:00<?, ?it/s]
2025-09-18 13:16:48,767 - INFO - 🎯 Found 45 stocks above 3.0%.
2025-09-18 13:16:48,768 - INFO - ⏭️ Skipped 155 stocks as they were below the yield floor of 3.0%.
2025-09-18 13:16:48,768 - INFO - 📉 Final selection: 45 high dividend tickers


In [None]:
universe_df

In [None]:
# TODO: Seems to be obsolete
# 🌍 Fetch tickers from Yahoo screener (generic)
todays_date = datetime.date.today().strftime("%Y-%m-%d")
log(f"Fetching screener results for {todays_date}", "📅")

# You can change scrId here: "day_gainers", "most_actives", "day_losers", etc.
scrId = "day_gainers"

api_url = (
    f"https://query2.finance.yahoo.com/v1/finance/screener/predefined/saved?"
    f"formatted=true&scrIds={scrId}&count=50&start=0"
)

resp = requests.get(api_url, headers={"User-Agent": "Mozilla/5.0"})
log(f"HTTP status code: {resp.status_code}", "🌐")

data = resp.json()
quotes = (
    data.get("finance", {})
        .get("result", [{}])[0]
        .get("quotes", [])
)

log(f"Found {len(quotes)} quotes under screener '{scrId}'", "📑")

high_dividend_tickers = [
    {"Ticker": q.get("symbol"), "Name": q.get("longName", q.get("shortName"))}
    for q in quotes if q.get("symbol")
]

universe_df = pd.DataFrame(high_dividend_tickers)
universe_df["Sector"] = scrId
log(f"Collected {len(universe_df)} tickers from Yahoo screener '{scrId}'", "📊")

universe_df


In [None]:
data


In [158]:
def calc_current_div_yield(stock: Ticker)->float:
    stock_info = stock.info or {}
    return round((stock_info.get("dividendRate")/stock_info.get("currentPrice"))*100,2)

In [154]:
def stocks_to_pd(stocks: List[Ticker])->DataFrame:
    rows = []
    #for stock in stocks:
    for stock in tqdm(stocks, desc="Converting stock list to DataFrame"):
        #stock = yf.Ticker(t)
        stock_info_json = stock.info or {}
        rows.append({
            "🔍": stock_info_json.get("longName"),
            "displayName": stock_info_json.get("displayName"),
            "symbol": stock_info_json.get("symbol"),
            "website": stock_info_json.get("website"),
            "exchange": stock_info_json.get("exchange"),
            "country": stock_info_json.get("country"),
            "fiveYearAvgDividendYield": stock_info_json.get("fiveYearAvgDividendYield"),
            "dividendRate": stock_info_json.get("dividendRate"),
            "dividendYield": stock_info_json.get("dividendYield"),
            "currentDividendYield": calc_current_div_yield(stock),
            "lastDividendDate": to_date(stock_info_json.get("lastDividendDate")),
            "dividendDate": to_date(stock_info_json.get("dividendDate")),
            "exDividendDate": to_date(stock_info_json.get("exDividendDate")),
            "lastDividendValue": stock_info_json.get("lastDividendValue"),
            "currentPrice": stock_info_json.get("currentPrice"),
            "quoteType": stock_info_json.get("quoteType"),
            "industry": stock_info_json.get("industry"),
            "sharesOutstanding": stock_info_json.get("sharesOutstanding"),
            "currency": stock_info_json.get("currency"),
            "ask": stock_info_json.get("ask"),
            "askSize": stock_info_json.get("askSize"),
            "previousClose": stock_info_json.get("previousClose"),
            "market": stock_info_json.get("market"),
            "marketCap": stock_info_json.get("marketCap"),
            "fiftyDayAverage": stock_info_json.get("fiftyDayAverage"),
            "fiftyTwoWeekHigh": stock_info_json.get("fiftyTwoWeekHigh"),
            "fiftyTwoWeekLow": stock_info_json.get("fiftyTwoWeekLow"),
            "boardRisk": stock_info_json.get("boardRisk"),
        })

    return pd.DataFrame(rows)

In [None]:
def from_unix_datetime(ts: int):
    """Convert UNIX timestamp (seconds) → UTC timezone-aware datetime."""
    if ts is None:
        return None
    try:
        return datetime.fromtimestamp(int(ts), tz=timezone.utc)
    except Exception:
        return None
    
def to_date(unix_ts: int):
    """Convert UNIX timestamp (seconds) → UTC timezone-aware datetime."""
    if unix_ts is None:
        return None
    try:
        return datetime.fromtimestamp(int(unix_ts), tz=timezone.utc).date()
    except Exception:
        return None    



In [None]:
def print_stock_info(stock:Ticker):
    stock_info_json = stock.info or {}
    print(pretty_print_json(stock_info_json))
    print("=======================")
    print(f"\n🔍 {ticker_symbol} — {stock_info_json.get('longName')}")
    #print("dividendYield raw:", stock_info.get("dividendYield"))
    print("displayName:", stock_info_json.get("displayName"))
    print("symbol:", stock_info_json.get("symbol"))
    print("website:", stock_info_json.get("website"))
    print("exchange:", stock_info_json.get("exchange"))
    print("country:", stock_info_json.get("country"))
    print("fiveYearAvgDividendYield:", stock_info_json.get("fiveYearAvgDividendYield"))
    print("dividendRate:", stock_info_json.get("dividendRate")) # Expected
    print("dividendYield:", stock_info_json.get("dividendYield")) # Based on price between previous
    print(f"currentDividendYield:{calc_current_div_yield(stock):.2f}") # Forward Dividend Rate ÷ Current Share Price × 100 %.
    print("lastDividendDate:", to_date(stock_info_json.get("lastDividendDate")))
    print("dividendDate:", to_date(stock_info_json.get("dividendDate")))
    print("exDividendDate:", to_date(stock_info_json.get("exDividendDate")))
    print("lastDividendValue:", stock_info_json.get("lastDividendValue"))
    print("currentPrice:", stock_info_json.get("currentPrice"))
    print("quoteType:", stock_info_json.get("quoteType"))
    print("industry:", stock_info_json.get("industry"))
    #print("sharesOutstanding:", stock_info.get("sharesOutstanding"))
    #print("currency:", stock_info.get("currency"))
    #print("ask:", stock_info.get("ask"))
    #print("askSize:", stock_info.get("askSize"))
    #print("previousClose:", stock_info.get("previousClose"))
    #print("market:", stock_info.get("market"))
    #print("marketCap:", stock_info.get("marketCap"))
    #print("fiftyDayAverage:", stock_info.get("fiftyDayAverage"))
    #print("fiftyTwoWeekHigh:", stock_info.get("fiftyTwoWeekHigh"))
    #print("fiftyTwoWeekLow:", stock_info.get("fiftyTwoWeekLow"))
    #print("boardRisk:", stock_info.get("boardRisk"))

In [56]:
def to_pct(raw):
    if raw is None:
        pct = None
    else:
        pct = round(raw * 100, 2) if raw <= 1 else round(raw, 2)
    return pct

In [209]:
# 💰 Download dividend + yield + totals (non-breaking, robust)
log("Downloading dividend, yields, and sharesOutstanding…", "⬇️")

data = []
stocks: List[Ticker] = []
try:
    for ticker_symbol in with_progress(pruned_tickers, desc="Fetching stock details"):
        stock = get_stock(ticker_symbol)
        stocks.append(stock)
        #log(f"Downloaded {ticker_symbol}", "✅")
        #print_stock_info(stock)
except Exception as e:
    log(f"Failed to download stock information for ticker symbol '{ticker_symbol}': {e}", "❌")

log(f"Converting stock list to dataframe...")
stocks_pd = stocks_to_pd(stocks)
log(f"Sorting dataframe...")
stocks_pd.sort_values("currentDividendYield", ascending=False, inplace=True)

    


2025-09-18 13:20:16,309 - INFO - ⬇️ Downloading dividend, yields, and sharesOutstanding…
Fetching stock details: 100%|██████████| 45/45 [00:00<00:00, 71629.48it/s]
2025-09-18 13:20:16,315 - INFO - ℹ️ Converting stock list to dataframe...
Converting stock list to DataFrame: 100%|██████████| 45/45 [00:00<?, ?it/s]
2025-09-18 13:20:16,326 - INFO - ℹ️ Sorting dataframe...


In [210]:
stocks_pd

Unnamed: 0,🔍,displayName,symbol,website,exchange,country,fiveYearAvgDividendYield,dividendRate,dividendYield,currentDividendYield,...,currency,ask,askSize,previousClose,market,marketCap,fiftyDayAverage,fiftyTwoWeekHigh,fiftyTwoWeekLow,boardRisk
15,AGNC Investment Corp.,AGNC Inv,AGNC,https://agnc.com,NMS,United States,13.11,1.44,14.09,14.1,...,USD,10.26,35,10.13,us_market,10754422784,9.6972,10.85,7.85,4.0
11,Petróleo Brasileiro S.A. - Petrobras,,PBR,https://petrobras.com.br,NYQ,Brazil,21.69,1.82,14.1,14.09,...,USD,12.95,292,12.94,us_market,80121782272,12.4602,15.34,11.03,
43,The Western Union Company,,WU,https://www.westernunion.com,NYQ,United States,6.77,0.94,11.27,11.27,...,USD,8.44,152,8.2,us_market,2693547264,8.387,12.4,7.89,1.0
5,Vale S.A.,Vale,VALE,https://vale.com,NYQ,Brazil,9.08,1.18,10.86,10.85,...,USD,11.0,262,10.91,us_market,46444314624,10.1106,12.05,8.06,
13,Stellantis N.V.,Stellantis,STLA,https://www.stellantis.com,NYQ,Netherlands,,0.77,7.95,7.95,...,USD,9.96,256,9.72,us_market,27962849280,9.5012,16.29,8.39,4.0
3,Ambev S.A.,Ambev,ABEV,https://www.ambev.com.br,NYQ,Brazil,4.4,0.18,7.45,7.63,...,USD,2.37,1420,2.36,us_market,37264105472,2.2726,2.64,1.76,
39,Energy Transfer LP,,ET,https://energytransfer.com,NYQ,United States,8.59,1.32,7.53,7.53,...,USD,0.0,75,17.58,us_market,60174909440,17.5394,21.45,14.6,
2,Pfizer Inc.,Pfizer,PFE,https://www.pfizer.com,NYQ,United States,4.69,1.72,7.15,7.15,...,USD,24.21,52,23.9,us_market,136737488896,24.7184,30.43,20.92,6.0
30,"Medical Properties Trust, Inc.",Medical Properties Trust,MPW,https://www.medicalpropertiestrust.com,NYQ,United States,9.97,0.32,6.45,6.45,...,USD,0.0,202,4.85,us_market,2981456128,4.3112,6.34,3.51,5.0
32,Verizon Communications Inc.,Verizon Communications,VZ,https://www.verizon.com,NYQ,United States,5.86,2.76,6.24,6.24,...,USD,44.23,33,43.74,us_market,186403717120,43.2548,47.36,37.59,6.0


In [None]:
data

In [145]:
# 📊 Create overview DataFrame
df = pd.DataFrame(data)
df

In [None]:
# 🔄 Pivot examples for analysis
pivot_sector = df.pivot_table(
    values="Annual Net (M)",
    index="Sector",
    aggfunc="sum"
).sort_values("Annual Net (M)", ascending=False)

pivot_sector


In [None]:
# TODO: Remove obsolete
# 💰 Download dividend & yield info from Yahoo Finance
log("Downloading dividend info from Yahoo Finance...", "⬇️")
high_dividend_tickers = pruned_tickers[:50]
data = []
for ticker_symbol in with_progress(high_dividend_tickers, desc="Downloading stock information"):
    try:
        stock = get_stock(ticker_symbol)
        stock_info_json = stock.info
        dividends = stock.dividends

        dividend_yield = stock_info_json.get("dividendYield") or 0
        five_year_yield = stock_info_json.get("fiveYearAvgDividendYield") or 0
        currency = stock_info_json.get("currency") or "USD"
        annual_div = dividends[-252:].sum() if not dividends.empty else 0

        data.append({
            "Ticker": ticker_symbol,
            "Name": stock_info_json.get("longName"),
            "Sector": universe_df.loc[universe_df["Ticker"]==ticker_symbol, "Sector"].values[0],
            "Currency": currency,
            "Dividend Yield %": round(dividend_yield*100, 2),
            "5Y Avg Yield %": round(five_year_yield, 2),
            "Annual Dividend": round(annual_div, 2)
        })
        #log(f"Processed {ticker_symbol}", "✅")
    except Exception as e:
        log(f"Failed for {ticker_symbol}: {e}", "❌")


In [None]:
# TODO: Throwaway code
vym = yf.Ticker("VYM")
vym_holdings = vym.funds_holdings
top50 = vym_holdings.head(50)

universe_df = pd.DataFrame({
    "Ticker": top50["symbol"],
    "Name": top50["holdingName"],
    "Sector": "High Dividend"
})
log(f"Collected {len(universe_df)} tickers from VYM ETF", "📊")
universe_df


In [None]:
# TODO: Throwaway code
test_url = "https://query2.finance.yahoo.com/v1/finance/screener/predefined/saved?scrIds=day_gainers&count=5"
print(requests.get(test_url, headers={"User-Agent": "Mozilla/5.0"}).json())

In [None]:
# TODO: Possibly throwaway code
# 📈 Add yield % if present
if "dividendYield" in df.columns:
    df["Dividend Yield %"] = (df["dividendYield"] * 100).round(2)

if "fiveYearAvgDividendYield" in df.columns:
    df["5Y Avg Yield %"] = (df["fiveYearAvgDividendYield"] * 100).round(2)

# 🧮 Per-share dividend
if "dividendRate" in df.columns:
    df["Annual Dividend (per share)"] = df["dividendRate"].round(2)

# 🏢 Company-wide totals (USD M)
if "dividendRate" in df.columns and "sharesOutstanding" in df.columns:
    df["Annual Gross (USD M)"] = (
        df["dividendRate"] * df["sharesOutstanding"] / 1_000_000
    ).round(2)
    df["Annual Net (USD M)"] = (df["Annual Gross (USD M)"] * 0.7).round(2)

log("Final dividend overview with explicit units ready", "📈")
df


# 🐞 Troubleshooting

In [71]:
def pretty_print_json(data: dict) -> str:
    """Return JSON string with indentation and sorted keys."""
    return json.dumps(data, indent=3, sort_keys=True, default=str)

from IPython.display import JSON

def pretty_print_json_new(data: dict):
    """Display JSON with indentation in Jupyter."""
    json_str = json.dumps(data, indent=2, sort_keys=True, default=str)
    display(JSON(json.loads(json_str)))  # still collapsible & colored


In [None]:
# 🐞 Troubleshoot WMT, GOOG, AG
test_tickers = ["WMT", "GOOG", "AG", "BBD", "NOK", "NDA-DK.CO", "NDA-FI.HE"]

stocks: List[Ticker] = []
for ticker_symbol in test_tickers:
    stock = get_stock(ticker_symbol)
    stocks.append(stock)
    print_stock_info(stock)
stocks_pd = stocks_to_pd(stocks)
stocks_pd


[yfinance.Ticker object <WMT>, yfinance.Ticker object <GOOG>, yfinance.Ticker object <AG>, yfinance.Ticker object <BBD>, yfinance.Ticker object <NOK>, yfinance.Ticker object <NDA-DK.CO>, yfinance.Ticker object <NDA-FI.HE>]


Converting stock list to DataFrame: 100%|██████████| 7/7 [00:00<?, ?it/s]


In [95]:
0.17/3.28


0.05182926829268293