In [2]:
# getting all of the stocks in the US
import investpy
import yfinance as yf
import pandas as pd
import pytz

# Get list of all stocks in the US
stocks = investpy.stocks.get_stocks(country='united states')
stocks.head()

Unnamed: 0,country,name,full_name,isin,currency,symbol
0,united states,Boeing,Boeing Co,US0970231058,USD,BA
1,united states,General Motors,General Motors Company,US37045V1008,USD,GM
2,united states,Chevron,Chevron Corp,US1667641005,USD,CVX
3,united states,Citigroup,Citigroup Inc,US1729674242,USD,C
4,united states,Bank of America,Bank of America Corp,US0605051046,USD,BAC


In [5]:
russel = pd.read_csv("russel1000.csv")
russel_tick = russel['Ticker'].to_list()


In [8]:
# getting the dividend data for the above stocks


def get_dividend_data(tickers):
    """
    Fetches dividend data and calculates dividend yield for a list of tickers over the past 5 years.

    Parameters:
    tickers (list): List of stock tickers.

    Returns:
    pd.DataFrame: DataFrame with columns 'Date', 'Ticker', 'Dividend', 'Stock Price', and 'Dividend Yield (%)'.
    """
    all_dividends = []

    for ticker in tickers:
        print(f"Fetching data for {ticker}")
        try:
            stock = yf.Ticker(ticker)
            dividends = stock.dividends
            stock_prices = stock.history(period="5y")['Close']  # Get past 5 years of stock price data
        except:
            print(f"Failed to fetch data for {ticker}")
            continue

        if dividends.empty:
            print(f"No dividend data for {ticker}")
            continue

        # Create a dataframe for dividends
        df = pd.DataFrame({
            'Date': dividends.index,
            'Ticker': ticker,
            'Dividend': dividends.values
        })

        # Fetch stock price at the closest available date before or on dividend date
        df['Stock Price'] = df['Date'].apply(lambda x: stock_prices.loc[:x].iloc[-2] if len(stock_prices.loc[:x]) > 1 else None)

        # Calculate Dividend Yield (%)
        df['Dividend Yield (%)'] = (df['Dividend'] / df['Stock Price']) * 100

        all_dividends.append(df)

    if all_dividends:
        result = pd.concat(all_dividends, ignore_index=True)
        result.sort_values(by='Date', inplace=True)
        return result.reset_index(drop=True)
    else:
        print("No dividends found for any ticker.")
        return pd.DataFrame(columns=['Date', 'Ticker', 'Dividend', 'Stock Price', 'Dividend Yield (%)'])

# Example usage:
#tickers = stocks['symbol'].to_list()
df = get_dividend_data(russel_tick)

df.head()


Fetching data for AAPL
Fetching data for NVDA
Fetching data for MSFT
Fetching data for AMZN
No dividend data for AMZN
Fetching data for META
Fetching data for GOOGL
Fetching data for AVGO
Fetching data for TSLA
No dividend data for TSLA
Fetching data for GOOG
Fetching data for BRKB


$BRKB: possibly delisted; no timezone found
$BRKB: possibly delisted; no price data found  (period=5y) (Yahoo error = "No data found, symbol may be delisted")


No dividend data for BRKB
Fetching data for JPM
Fetching data for LLY
Fetching data for V
Fetching data for XOM
Fetching data for UNH
Fetching data for COST
Fetching data for MA
Fetching data for NFLX
No dividend data for NFLX
Fetching data for WMT
Fetching data for HD
Fetching data for PG
Fetching data for JNJ
Fetching data for ABBV
Fetching data for BAC
Fetching data for CRM
Fetching data for ORCL
Fetching data for CVX
Fetching data for KO
Fetching data for WFC
Fetching data for CSCO
Fetching data for ACN
Fetching data for PLTR
No dividend data for PLTR
Fetching data for IBM
Fetching data for PM
Fetching data for ABT
Fetching data for GE
Fetching data for MCD
Fetching data for LIN
Fetching data for ISRG
No dividend data for ISRG
Fetching data for MRK
Fetching data for ADBE
Fetching data for TMO
Fetching data for GS
Fetching data for NOW
No dividend data for NOW
Fetching data for DIS
Fetching data for PEP
Fetching data for QCOM
Fetching data for T
Fetching data for AMD
No dividend dat

$XTSLA: possibly delisted; no timezone found
$XTSLA: possibly delisted; no price data found  (period=5y) (Yahoo error = "No data found, symbol may be delisted")


No dividend data for XTSLA
Fetching data for PNC
Fetching data for MCK
Fetching data for COF
Fetching data for ORLY
No dividend data for ORLY
Fetching data for AON
Fetching data for ITW
Fetching data for DASH
No dividend data for DASH
Fetching data for CRH
Fetching data for USB
Fetching data for EOG
Fetching data for MSI
Fetching data for ZTS
Fetching data for TDG
Fetching data for FTNT
No dividend data for FTNT
Fetching data for CTAS
Fetching data for EMR
Fetching data for REGN
No dividend data for REGN
Fetching data for ABNB
No dividend data for ABNB
Fetching data for APD
Fetching data for CL
Fetching data for WMB
Fetching data for ECL
Fetching data for GD
Fetching data for ADSK
Fetching data for BDX
Fetching data for MAR
Fetching data for BK
Fetching data for CSX
Fetching data for HLT
Fetching data for TFC
Fetching data for RCL
Fetching data for ROP
Fetching data for FDX
Fetching data for NOC
Fetching data for SPG
Fetching data for JCI
Fetching data for TGT
Fetching data for SNOW
No

Could not get exchangeTimezoneName for ticker 'ETR' reason: 'chart'
$ETR: possibly delisted; no timezone found
$ETR: possibly delisted; no price data found  (period=5y)


No dividend data for ETR
Fetching data for RBLX
No dividend data for RBLX
Fetching data for IQV
No dividend data for IQV
Fetching data for DXCM
No dividend data for DXCM
Fetching data for DD
Fetching data for EA
Fetching data for ARES
Fetching data for IR
Fetching data for VEEV
No dividend data for VEEV
Fetching data for UAL
Fetching data for RMD
Fetching data for CHTR
No dividend data for CHTR
Fetching data for ROK
Fetching data for EBAY
Fetching data for NDAQ
Fetching data for MTB
Fetching data for ALNY
No dividend data for ALNY
Fetching data for ED
Fetching data for NUE
Fetching data for GRMN
Fetching data for HPQ
Fetching data for GIS
Fetching data for OXY
Fetching data for HIG
Fetching data for WTW
Fetching data for PCG
Fetching data for WEC
Fetching data for EXR
Fetching data for MLM
Fetching data for KEYS
No dividend data for KEYS
Fetching data for MPWR
Fetching data for WAB
Fetching data for VICI
Fetching data for ACGL
Fetching data for USD
Fetching data for EQT
Fetching data f

Could not get exchangeTimezoneName for ticker 'LEN' reason: 'chart'
$LEN: possibly delisted; no timezone found
$LEN: possibly delisted; no price data found  (period=5y)


No dividend data for LEN
Fetching data for MCHP
Fetching data for IP
Fetching data for RJF
Fetching data for CPNG
No dividend data for CPNG
Fetching data for STT
Fetching data for CNC
No dividend data for CNC
Fetching data for FTV
Fetching data for HPE
Fetching data for BR
Fetching data for VTR
Fetching data for IRM
Fetching data for SW
Fetching data for LPLA
Fetching data for DOV
Fetching data for PPG
Fetching data for WSM
Fetching data for GPN
Fetching data for DOW
Fetching data for TYL
Fetching data for CCL
Fetching data for EQR
Fetching data for MTD
No dividend data for MTD
Fetching data for DTE
Fetching data for BRO
Fetching data for AEE
Fetching data for STZ
Fetching data for GDDY
No dividend data for GDDY
Fetching data for TPL
Fetching data for CPAY
No dividend data for CPAY
Fetching data for CHD
Fetching data for KHC
Fetching data for SYF
Fetching data for EXPE
Fetching data for FCNCA
Fetching data for CDW
Fetching data for WBD
No dividend data for WBD
Fetching data for PPL
Fet

$HEIA: possibly delisted; no timezone found
$HEIA: possibly delisted; no price data found  (period=5y) (Yahoo error = "No data found, symbol may be delisted")


No dividend data for HEIA
Fetching data for EG
Fetching data for CF
Fetching data for GGG
Fetching data for ROL
Fetching data for UDR
Fetching data for BJ
No dividend data for BJ
Fetching data for KMX
No dividend data for KMX
Fetching data for JBHT
Fetching data for RVTY
Fetching data for TKO
Fetching data for EWBC
Fetching data for FIX
Fetching data for ACM
Fetching data for JLL
Fetching data for DOC
Fetching data for DKS
Fetching data for SWK
Fetching data for TXT
Fetching data for PFGC
No dividend data for PFGC
Fetching data for REG
Fetching data for ITCI
No dividend data for ITCI
Fetching data for RGA
Fetching data for VTRS
Fetching data for UNM
Fetching data for CIEN
No dividend data for CIEN
Fetching data for FTI
Fetching data for POOL
Fetching data for WPC
Fetching data for SAIA
No dividend data for SAIA
Fetching data for Z
No dividend data for Z
Fetching data for FOXA
Fetching data for RPRX
Fetching data for THC
Fetching data for ROKU
No dividend data for ROKU
Fetching data for

Could not get exchangeTimezoneName for ticker 'CBSH' reason: 'chart'
$CBSH: possibly delisted; no timezone found
$CBSH: possibly delisted; no price data found  (period=5y)


No dividend data for CBSH
Fetching data for ZION
Fetching data for INGR
Fetching data for BERY
Fetching data for BRX
Fetching data for AXTA
No dividend data for AXTA
Fetching data for TTEK
Fetching data for AOS
Fetching data for TFX
Fetching data for DVA
No dividend data for DVA
Fetching data for GPK
Fetching data for DBX
No dividend data for DBX
Fetching data for SNV
Fetching data for FOUR
No dividend data for FOUR
Fetching data for MASI
Fetching data for CRL
No dividend data for CRL
Fetching data for AAON
Fetching data for VOYA
Fetching data for ADC
Fetching data for VNO
Fetching data for ATI
Fetching data for CPB
Fetching data for NNN
Fetching data for MAT
Fetching data for CACI
No dividend data for CACI
Fetching data for CE
Fetching data for SSD
Fetching data for TREX
No dividend data for TREX
Fetching data for FR
Fetching data for BFAM
No dividend data for BFAM
Fetching data for QRVO
No dividend data for QRVO
Fetching data for ESAB
Fetching data for HRB
Fetching data for SN
Fetchi

$UHALB: possibly delisted; no timezone found
$UHALB: possibly delisted; no price data found  (period=5y) (Yahoo error = "No data found, symbol may be delisted")


No dividend data for UHALB
Fetching data for ETSY
No dividend data for ETSY
Fetching data for SLM
Fetching data for BSY
Fetching data for OMF
Fetching data for OLED
Fetching data for FCN
No dividend data for FCN
Fetching data for AWI
Fetching data for CROX
No dividend data for CROX
Fetching data for PARA
Fetching data for WEX
No dividend data for WEX
Fetching data for STAG
Fetching data for OLLI
No dividend data for OLLI
Fetching data for FAF
Fetching data for DAR
No dividend data for DAR
Fetching data for NFG
Fetching data for KEX
Fetching data for RLI
Fetching data for STWD
Fetching data for VNOM
Fetching data for MHK
No dividend data for MHK
Fetching data for MTN
Fetching data for U
No dividend data for U
Fetching data for H
Fetching data for VMI
Fetching data for HII
Fetching data for AGCO
Fetching data for NOV
Fetching data for RHI
Fetching data for HR
Fetching data for RITM
Fetching data for DINO
Fetching data for BILL
No dividend data for BILL
Fetching data for COLD
Fetching dat

$SGAFT: possibly delisted; no timezone found
$SGAFT: possibly delisted; no price data found  (period=5y) (Yahoo error = "No data found, symbol may be delisted")


No dividend data for SGAFT
Fetching data for LOPE
No dividend data for LOPE
Fetching data for BFB


$BFB: possibly delisted; no timezone found
$BFB: possibly delisted; no price data found  (period=5y) (Yahoo error = "No data found, symbol may be delisted")


No dividend data for BFB
Fetching data for LEA
Fetching data for BYD
Fetching data for GTES
No dividend data for GTES
Fetching data for BIO
No dividend data for BIO
Fetching data for LITE
No dividend data for LITE
Fetching data for SAIC
Fetching data for VVV
Fetching data for WFRD
Fetching data for AL
Fetching data for APPF
No dividend data for APPF
Fetching data for CIVI
Fetching data for SEE
Fetching data for TPG
Fetching data for NXST
Fetching data for AN
Fetching data for HCP
No dividend data for HCP
Fetching data for FIVE
No dividend data for FIVE
Fetching data for SON
Fetching data for IONS
No dividend data for IONS
Fetching data for DLB
Fetching data for CCCS
No dividend data for CCCS
Fetching data for W
No dividend data for W
Fetching data for LYFT
No dividend data for LYFT
Fetching data for WTM
Fetching data for SLGN
Fetching data for EEFT
No dividend data for EEFT
Fetching data for AGO
Fetching data for BC
Fetching data for CUZ
Fetching data for GXO
No dividend data for GXO
F

Could not get exchangeTimezoneName for ticker 'MDU' reason: 'chart'
$MDU: possibly delisted; no timezone found
$MDU: possibly delisted; no price data found  (period=5y)


No dividend data for MDU
Fetching data for FHB
Fetching data for IAC
Fetching data for FLO
Fetching data for PSN
No dividend data for PSN
Fetching data for PRGO
Fetching data for LCID
No dividend data for LCID
Fetching data for YETI
No dividend data for YETI
Fetching data for ASH
Fetching data for HOG
Fetching data for DNB
Fetching data for VKTX
No dividend data for VKTX
Fetching data for CACC
No dividend data for CACC
Fetching data for OLN
Fetching data for ZI
No dividend data for ZI
Fetching data for ALGM
No dividend data for ALGM
Fetching data for HIW
Fetching data for MP
No dividend data for MP
Fetching data for VAC
Fetching data for VIRT
Fetching data for PAG
Fetching data for DV
No dividend data for DV
Fetching data for WU
Fetching data for MRP
No dividend data for MRP
Fetching data for COLM
Fetching data for NWL
Fetching data for AMED
No dividend data for AMED
Fetching data for SMG
Fetching data for CXT
Fetching data for MPW
Fetching data for RNG
No dividend data for RNG
Fetchin

  result = pd.concat(all_dividends, ignore_index=True)


Unnamed: 0,Date,Ticker,Dividend,Stock Price,Dividend Yield (%)
0,1962-01-16 00:00:00-05:00,PG,0.005859,,
1,1962-01-16 00:00:00-05:00,CAT,0.010417,,
2,1962-01-23 00:00:00-05:00,KR,0.008594,,
3,1962-01-30 00:00:00-05:00,ED,0.09375,,
4,1962-01-31 00:00:00-05:00,CVX,0.029762,,


In [9]:
df.to_csv('russel1000_dividend_last5years.csv')
df.dropna(inplace=True)
df.sort_values(by='Date', ascending=False, inplace=True)
df

Unnamed: 0,Date,Ticker,Dividend,Stock Price,Dividend Yield (%)
69160,2025-02-13 00:00:00-05:00,RMD,0.530000,236.070007,0.224510
69159,2025-02-13 00:00:00-05:00,DCI,0.270000,69.550003,0.388210
69158,2025-02-13 00:00:00-05:00,TJX,0.375000,124.035004,0.302334
69157,2025-02-13 00:00:00-05:00,ZION,0.430000,55.040001,0.781250
69152,2025-02-12 00:00:00-05:00,NXST,1.860000,151.199997,1.230159
...,...,...,...,...,...
56653,2020-02-19 00:00:00-05:00,EXC,0.273181,30.052973,0.908998
56651,2020-02-19 00:00:00-05:00,LFUS,0.480000,177.092285,0.271045
56655,2020-02-19 00:00:00-05:00,CNP,0.290000,23.579014,1.229907
56656,2020-02-19 00:00:00-05:00,DAL,0.403000,57.414345,0.701915


In [11]:
# computing the last year dividend
df["Date"] = pd.to_datetime(df["Date"])

# Define the date range for filtering
start_date = "2024-02-17"
end_date = "2025-02-17"

# Filter data for the last year
df_filtered = df[(df["Date"] >= start_date) & (df["Date"] <= end_date)]

# Group by Ticker and sum Dividend Yield (%) for the last year
df_grouped = df_filtered.groupby("Ticker", as_index=False).agg(
    {"Dividend Yield (%)": "sum"}
)

# Rename the column to "Last year Dividend Yield (%)"
df_grouped.rename(columns={"Dividend Yield (%)": "Last year Dividend Yield (%)"}, inplace=True)

# Merge back with the original df to retain all tickers
df = df.merge(df_grouped, on="Ticker", how="left")

# Fill NaN with 0 for tickers with no dividends in the date range
df["Last year Dividend Yield (%)"].fillna(0, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Last year Dividend Yield (%)"].fillna(0, inplace=True)


In [55]:
# sorted list of the last year dividend
import heapq
Threshold = 5
stack = []
heapq.heapify(stack)
df.sort_values(by='Last year Dividend Yield (%)', ascending=False, inplace=True)
for gr, gr_df in df.groupby("Ticker"):
    gr_df.reset_index(drop=True, inplace=True)
    gr_df.sort_values(by="Date", ascending=True, inplace=True)
    dividend = gr_df.loc[0, 'Last year Dividend Yield (%)']
    if dividend > Threshold:
        heapq.heappush(stack, (-dividend, gr, gr_df))

In [56]:
len(stack)

56

In [62]:
# balance sheet extractor
# pip install edgartools
import sys
from edgar import *
from edgar.financials import Financials
import pandas as pd

set_identity("sarashs arash.sheikh65@gmail.com")

from edgar import Company

def fetch_10K_and_10Q_filings(ticker: str, start_date: str, end_date: str, form: list = ['10-K']):
    """
    Fetches the 10-K and 10-Q filings for the given ticker within the specified date range.

    Note:
      - Make sure you have set your EDGAR identity (using set_identity) before calling this function.
      - The date filter should be in the form "YYYY-MM-DD:YYYY-MM-DD".

    Parameters:
        ticker (str): The stock ticker (e.g., "AAPL").
        start_date (str): The start date in "YYYY-MM-DD" format.
        end_date (str): The end date in "YYYY-MM-DD" format.

    Returns:
        list: A list-like object of filing objects (or an empty list if no filings are found).
    """
    try:
        # Create a Company object for the given ticker
        company = Company(ticker)
        # Retrieve both 10-K and 10-Q filings for the company
        filings = company.get_filings(form=form) #"10-K",
        # Filter the filings based on the provided date range
        # The filter date string uses the format "start_date:end_date"
        filtered_filings = filings.filter(date=f"{start_date}:{end_date}")
        
        if not filtered_filings:
            print(f"No 10-K or 10-Q filings found for {ticker} between {start_date} and {end_date}.")
            return []
            
        return filtered_filings

    except Exception as e:
        print(f"An error occurred while fetching filings for {ticker}: {e}")
        return []


def extract_financials(filings):
    """
    Extracts financial statements from a list of filings.
    
    For each filing, the function:
      - Calls filing.obj() to get the data object (e.g. TenK/TenQ).
      - Checks that the object has a 'financials' attribute.
      - Extracts the balance sheet, income statement, and cashflow statement using:
            financials.get_balance_sheet()
            financials.get_income_statement()
            financials.get_cash_flow_statement()
    
    Parameters:
        filings (list): A list-like object of filing objects (e.g. from Company.get_filings()).
    
    Returns:
        tuple: Three lists containing the extracted financial statements:
               (balance_sheets, income_statements, cashflow_statements).
               Filings that do not have a data object or the requested financial statement(s) are skipped.
    """
    balance_sheets = []
    income_statements = []
    cashflow_statements = []
    
    for filing in filings:
        try:
            # Convert the filing to its data object (e.g., TenK or TenQ)
            data_obj = filing.obj()
            if data_obj is None:
                print("Filing has no data object. Skipping...")
                continue
            
            # Check that the data object contains financials
            if not hasattr(data_obj, "financials") or data_obj.financials is None:
                print("Filing has no financials. Skipping...")
                continue

            financials = data_obj.financials
            
            # Extract the individual financial statements.
            # If any of these methods are unavailable or return None, skip that particular statement.
            balance_sheet = financials.get_balance_sheet() if hasattr(financials, "get_balance_sheet") else None
            income_statement = financials.get_income_statement() if hasattr(financials, "get_income_statement") else None
            cashflow_statement = financials.get_cash_flow_statement() if hasattr(financials, "get_cash_flow_statement") else None
            
            if balance_sheet is not None:
                balance_sheets.append(balance_sheet)
            if income_statement is not None:
                income_statements.append(income_statement)
            if cashflow_statement is not None:
                cashflow_statements.append(cashflow_statement)
        
        except Exception as e:
            print(f"Error extracting financials from filing: {e}")
            continue

        # convert to string to be digested by OpenAI model
        balance_sheets_str = ''
        income_statements_str = ''
        cashflow_statements_str = ''
        balance_sheets_str = '\n'.join([item.to_dataframe().to_string() for item in balance_sheets])
        income_statements_str = '\n'.join([item.to_dataframe().to_string() for item in income_statements])
        cashflow_statements_str = '\n'.join([item.to_dataframe().to_string() for item in cashflow_statements])
    
    return balance_sheets, income_statements, cashflow_statements, balance_sheets_str, income_statements_str, cashflow_statements_str


filings_list = fetch_10K_and_10Q_filings("AGNC", "2023-01-01", "2025-2-15",form=["10-Q", "10-K"])
balance_sheets, income_statements, cashflow_statements, balance_sheets_str, income_statements_str, cashflow_statements_str = extract_financials(filings_list)#

In [58]:
# Open ai financial health assessment

from pydantic import BaseModel
from openai import OpenAI

client = OpenAI()

class Step(BaseModel):
    justification: str

class Assessment(BaseModel):
    steps: list[Step]
    trends: str
    approve: bool
    
def assess(income_statements_str, cashflow_statements_str, balance_sheets_str):
    completion = client.beta.chat.completions.parse(
        model="o3-mini",
        reasoning_effort= "high",
        messages=[
            {"role": "system", "content": """You are a financial advisor. You are provided the last two years worth of financial data based on 10-K and 10-K documnent for a company.
            The goal is to buy the most promising dividend stock. Whether or not a stock is a dividend stock is not for you to extract but you are looking at the financial health of the company and your primary goal is to avoid risk.
            You must provide me with the trends over the past two years, your step by step justification of your assessment and whether or not you approve this purchase considering the potential risks to this company."""},
            {"role": "user", "content": f"""Here are the balance sheets, income statements, and cashflow statements for the past three years.\n\n
            Income statements:\n{income_statements_str}\n\nCashflow statements:\n{cashflow_statements_str}\n\nBalance sheets:\n{balance_sheets_str}
            """},
        ],
        response_format=Assessment,
    )
    
    assessment = completion.choices[0].message.parsed
    return assessment

In [57]:
from pprint import pp
pp(assessment.approve)

False


In [63]:
# processing the balance sheets
from tqdm import tqdm
balance_sheet_approved = {}
for i in tqdm(range(len(stack))):
    try:
        dividend, gr, gr_df = stack[i]
        filings_list = fetch_10K_and_10Q_filings(gr, "2023-01-01", "2025-2-15",form=["10-Q", "10-K"])
        _, _, _, balance_sheets_str, income_statements_str, cashflow_statements_str = extract_financials(filings_list)
        assessment = assess(income_statements_str, cashflow_statements_str, balance_sheets_str)
        if assessment.approve:
            balance_sheet_approved[gr] = (dividend, assessment.trends, gr_df)
    except:
        pass
    

 36%|████████████████████▎                                    | 20/56 [18:50<33:08, 55.23s/it]

Error extracting financials from filing: The read operation timed out
Error extracting financials from filing: [Errno -3] Temporary failure in name resolution


 52%|██████████████████████████▉                         | 29/56 [1:17:49<1:01:11, 135.99s/it]

No 10-K or 10-Q filings found for BEPC between 2023-01-01 and 2025-2-15.


100%|██████████████████████████████████████████████████████| 56/56 [1:44:58<00:00, 112.47s/it]


In [65]:
print(balance_sheet_approved)

{'OHI': (np.float64(-7.645732593987214), 'Over the past two years, the company has experienced consistent revenue and rental income growth, improved operating cash flow, and a strengthening balance sheet with increased cash and equity. Despite some net income volatility caused by non‐recurring expense items, dividend payouts have remained robust, underscoring its commitment to shareholder returns.',                         Date Ticker  Dividend  Stock Price  \
18 2020-04-29 00:00:00-04:00    OHI      0.67    18.344259   
1  2020-07-30 00:00:00-04:00    OHI      0.67    22.318399   
6  2020-10-30 00:00:00-04:00    OHI      0.67    20.389904   
11 2021-02-05 00:00:00-05:00    OHI      0.67    26.105255   
5  2021-04-30 00:00:00-04:00    OHI      0.67    27.656843   
9  2021-07-30 00:00:00-04:00    OHI      0.67    27.191164   
2  2021-11-04 00:00:00-04:00    OHI      0.67    22.138201   
17 2022-02-04 00:00:00-05:00    OHI      0.67    21.524303   
19 2022-04-29 00:00:00-04:00    OHI    

In [None]:
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt

# Load Russell 3000 tickers
# You would typically get an updated list from an official source or file
# Here is a placeholder list for demonstration purposes
tickers = ['ASML', 'MSFT']  # Replace with the full Russell 3000 list

# Initialize an empty DataFrame to store dividend data
all_dividends = pd.DataFrame()
all_prices = pd.DataFrame()

for ticker in tickers:
    stock = yf.Ticker(ticker)
    
    # Get dividend history
    dividends = stock.dividends.reset_index()
    dividends['Ticker'] = ticker
    
    # Get historical price data
    prices = stock.history(period="max").reset_index()[['Date', 'Close']]
    prices['Ticker'] = ticker

    # Append to the main DataFrames
    all_dividends = pd.concat([all_dividends, dividends], ignore_index=True)
    all_prices = pd.concat([all_prices, prices], ignore_index=True)

# Set the date as the index
all_dividends.set_index('Date', inplace=True)
all_prices.set_index('Date', inplace=True)

# Plotting dividends and prices for a specific ticker (e.g., 'AAPL')
ticker_to_plot = 'ASML'

div_data = all_dividends[all_dividends['Ticker'] == ticker_to_plot]
price_data = all_prices[all_prices['Ticker'] == ticker_to_plot]

# Aligning the indices to ensure proper plotting
combined_data = pd.merge(price_data, div_data[['Dividends']], left_index=True, right_index=True, how='outer')

fig, ax1 = plt.subplots(figsize=(12, 6))

# Plot dividends as stem plot for better visibility
ax1.stem(combined_data.index, combined_data['Dividends'].fillna(0), linefmt='blue', markerfmt='bo', basefmt=" ", label='Dividends')
ax1.set_xlabel('Date')
ax1.set_ylabel('Dividends', color='blue')
ax1.tick_params(axis='y', labelcolor='blue')

# Plot price on the same graph with a different y-axis
ax2 = ax1.twinx()
ax2.plot(combined_data.index, combined_data['Close'], color='red', label='Close Price')
ax2.set_ylabel('Close Price', color='red')
ax2.tick_params(axis='y', labelcolor='red')

# Add title and show the plot
plt.title(f'{ticker_to_plot} Dividends and Close Price Over Time')
fig.tight_layout()
plt.show()


# Save to CSV (optional)
# all_dividends.to_csv('russell3000_dividends.csv', index=False)
