In [None]:
# getting all of the stocks in the US
import investpy

# Get list of all stocks in the US
stocks = investpy.stocks.get_stocks(country='united states')
stocks.head()

In [None]:
# getting the dividend data for the above stocks
import yfinance as yf
import pandas as pd
import pytz

def get_dividend_data(tickers, start_date):
    """
    Fetches dividend data for a list of tickers starting from the specified date.

    Parameters:
    tickers (list): List of stock tickers.
    start_date (str): The start date in 'YYYY-MM-DD' format.

    Returns:
    pd.DataFrame: DataFrame with columns 'Date', 'Ticker', and 'Dividend', sorted by date.
    """
    all_dividends = []

    # Convert start_date to timezone-aware datetime
    start_date_tz = pd.to_datetime(start_date).tz_localize('America/New_York')

    for ticker in tickers:
        print(f"Fetching data for {ticker}")
        stock = yf.Ticker(ticker)
        dividends = stock.dividends

        if dividends.empty:
            print(f"No dividend data for {ticker}")
            continue

        print(f"Raw dividend data for {ticker}:")
        print(dividends.head())

        # Ensure comparison is between timezone-aware datetimes
        dividends = dividends[dividends.index >= start_date_tz]

        if dividends.empty:
            print(f"No dividends for {ticker} after {start_date}")
            continue

        df = pd.DataFrame({
            'Date': dividends.index,
            'Ticker': ticker,
            'Dividend': dividends.values
        })
        all_dividends.append(df)

    if all_dividends:
        result = pd.concat(all_dividends, ignore_index=True)
        result.sort_values(by='Date', inplace=True)
        return result.reset_index(drop=True)
    else:
        print("No dividends found for any ticker.")
        return pd.DataFrame(columns=['Date', 'Ticker', 'Dividend'])

# Example usage:
tickers = ['ASML', 'MSFT', 'KO']
start_date = '2020-01-01'
df = get_dividend_data(tickers, start_date)
df.head()


In [141]:
# balance sheet extractor
# pip install edgartools
import sys
from edgar import *
from edgar.financials import Financials
import pandas as pd

set_identity("sarashs arash.sheikh65@gmail.com")

from edgar import Company

def fetch_10K_and_10Q_filings(ticker: str, start_date: str, end_date: str, form: list = ['10-K']):
    """
    Fetches the 10-K and 10-Q filings for the given ticker within the specified date range.

    Note:
      - Make sure you have set your EDGAR identity (using set_identity) before calling this function.
      - The date filter should be in the form "YYYY-MM-DD:YYYY-MM-DD".

    Parameters:
        ticker (str): The stock ticker (e.g., "AAPL").
        start_date (str): The start date in "YYYY-MM-DD" format.
        end_date (str): The end date in "YYYY-MM-DD" format.

    Returns:
        list: A list-like object of filing objects (or an empty list if no filings are found).
    """
    try:
        # Create a Company object for the given ticker
        company = Company(ticker)
        # Retrieve both 10-K and 10-Q filings for the company
        filings = company.get_filings(form=form) #"10-K",
        # Filter the filings based on the provided date range
        # The filter date string uses the format "start_date:end_date"
        filtered_filings = filings.filter(date=f"{start_date}:{end_date}")
        
        if not filtered_filings:
            print(f"No 10-K or 10-Q filings found for {ticker} between {start_date} and {end_date}.")
            return []
            
        return filtered_filings

    except Exception as e:
        print(f"An error occurred while fetching filings for {ticker}: {e}")
        return []


def extract_financials(filings):
    """
    Extracts financial statements from a list of filings.
    
    For each filing, the function:
      - Calls filing.obj() to get the data object (e.g. TenK/TenQ).
      - Checks that the object has a 'financials' attribute.
      - Extracts the balance sheet, income statement, and cashflow statement using:
            financials.get_balance_sheet()
            financials.get_income_statement()
            financials.get_cash_flow_statement()
    
    Parameters:
        filings (list): A list-like object of filing objects (e.g. from Company.get_filings()).
    
    Returns:
        tuple: Three lists containing the extracted financial statements:
               (balance_sheets, income_statements, cashflow_statements).
               Filings that do not have a data object or the requested financial statement(s) are skipped.
    """
    balance_sheets = []
    income_statements = []
    cashflow_statements = []
    
    for filing in filings:
        try:
            # Convert the filing to its data object (e.g., TenK or TenQ)
            data_obj = filing.obj()
            if data_obj is None:
                print("Filing has no data object. Skipping...")
                continue
            
            # Check that the data object contains financials
            if not hasattr(data_obj, "financials") or data_obj.financials is None:
                print("Filing has no financials. Skipping...")
                continue

            financials = data_obj.financials
            
            # Extract the individual financial statements.
            # If any of these methods are unavailable or return None, skip that particular statement.
            balance_sheet = financials.get_balance_sheet() if hasattr(financials, "get_balance_sheet") else None
            income_statement = financials.get_income_statement() if hasattr(financials, "get_income_statement") else None
            cashflow_statement = financials.get_cash_flow_statement() if hasattr(financials, "get_cash_flow_statement") else None
            
            if balance_sheet is not None:
                balance_sheets.append(balance_sheet)
            if income_statement is not None:
                income_statements.append(income_statement)
            if cashflow_statement is not None:
                cashflow_statements.append(cashflow_statement)
        
        except Exception as e:
            print(f"Error extracting financials from filing: {e}")
            continue

        # convert to string to be digested by OpenAI model
        balance_sheets_str = '\n'.join([item.to_dataframe().to_string() for item in balance_sheets])
        income_statement_str = '\n'.join([item.to_dataframe().to_string() for item in income_statements])
        cashflow_statement_str = '\n'.join([item.to_dataframe().to_string() for item in cashflow_statements])
    
    return balance_sheets, income_statements, cashflow_statements, balance_sheets_str, income_statements_str, cashflow_statements_str


filings_list = fetch_10K_and_10Q_filings("LUMN", "2023-01-01", "2025-2-15",form=["10-Q", "10-K"])
balance_sheets, income_statements, cashflow_statements, balance_sheets_str, income_statements_str, cashflow_statements_str = extract_financials(filings_list)

In [143]:
# Open ai financial health assessment

from pydantic import BaseModel
from openai import OpenAI

client = OpenAI()

class Step(BaseModel):
    justification: str

class Assessment(BaseModel):
    steps: list[Step]
    trends: str
    approve: bool
    

completion = client.beta.chat.completions.parse(
    model="o3-mini",
    reasoning_effort= "high",
    messages=[
        {"role": "system", "content": """You are a financial advisor. You are provided the last two years worth of financial data based on 10-K and 10-K documnent for a company.
        The goal is to buy the most promising dividend stock. Whether or not a stock is a dividend stock is not for you to extract but you are looking at the financial health of the company and your primary goal is to avoid risk.
        You must provide me with the trends over the past two years, your step by step justification of your assessment and whether or not you approve this purchase considering the potential risks to this company."""},
        {"role": "user", "content": f"""Here are the balance sheets, income statements, and cashflow statements for the past three years.\n\n
        Income statements:\n{income_statements_str}\n\nCashflow statements:\n{cashflow_statements_str}\n\nBalance sheets:\n{balance_sheets_str}
        """},
    ],
    response_format=Assessment,
)

assessment = completion.choices[0].message.parsed

In [146]:
from pprint import pp
pp(assessment.trends)

('Over the past two years the company has seen improved liquidity with '
 'increasing cash balances; however, it has experienced a marked decline in '
 'total assets and a dramatic erosion of stockholders’ equity, largely due to '
 'significant goodwill impairments. High long-term debt persists, leading to a '
 'highly leveraged and unstable financial profile.')


In [None]:
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt

# Load Russell 3000 tickers
# You would typically get an updated list from an official source or file
# Here is a placeholder list for demonstration purposes
tickers = ['ASML', 'MSFT']  # Replace with the full Russell 3000 list

# Initialize an empty DataFrame to store dividend data
all_dividends = pd.DataFrame()
all_prices = pd.DataFrame()

for ticker in tickers:
    stock = yf.Ticker(ticker)
    
    # Get dividend history
    dividends = stock.dividends.reset_index()
    dividends['Ticker'] = ticker
    
    # Get historical price data
    prices = stock.history(period="max").reset_index()[['Date', 'Close']]
    prices['Ticker'] = ticker

    # Append to the main DataFrames
    all_dividends = pd.concat([all_dividends, dividends], ignore_index=True)
    all_prices = pd.concat([all_prices, prices], ignore_index=True)

# Set the date as the index
all_dividends.set_index('Date', inplace=True)
all_prices.set_index('Date', inplace=True)

# Plotting dividends and prices for a specific ticker (e.g., 'AAPL')
ticker_to_plot = 'ASML'

div_data = all_dividends[all_dividends['Ticker'] == ticker_to_plot]
price_data = all_prices[all_prices['Ticker'] == ticker_to_plot]

# Aligning the indices to ensure proper plotting
combined_data = pd.merge(price_data, div_data[['Dividends']], left_index=True, right_index=True, how='outer')

fig, ax1 = plt.subplots(figsize=(12, 6))

# Plot dividends as stem plot for better visibility
ax1.stem(combined_data.index, combined_data['Dividends'].fillna(0), linefmt='blue', markerfmt='bo', basefmt=" ", label='Dividends')
ax1.set_xlabel('Date')
ax1.set_ylabel('Dividends', color='blue')
ax1.tick_params(axis='y', labelcolor='blue')

# Plot price on the same graph with a different y-axis
ax2 = ax1.twinx()
ax2.plot(combined_data.index, combined_data['Close'], color='red', label='Close Price')
ax2.set_ylabel('Close Price', color='red')
ax2.tick_params(axis='y', labelcolor='red')

# Add title and show the plot
plt.title(f'{ticker_to_plot} Dividends and Close Price Over Time')
fig.tight_layout()
plt.show()


# Save to CSV (optional)
# all_dividends.to_csv('russell3000_dividends.csv', index=False)
