In [2]:
import yfinance as yf
import pandas as pd
from utils import get_sp500_tickers

In [3]:
# List of example tickers (can load from file later)
TICKERS = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA', 'META', 'NVDA']

In [15]:
TICKERS = get_sp500_tickers()[:100]

In [16]:
# Factors we care about
FIELDS = {
    'trailingPE': 'P/E',
    'priceToBook': 'P/B',
    'returnOnEquity': 'ROE',
    'debtToEquity': 'D/E',
    'earningsGrowth': 'EPS Growth',
    'marketCap': 'Market Cap'
}

In [17]:
def fetch_fundamentals(tickers):
    all_data = []
    for ticker in tickers:
        try:
            info = yf.Ticker(ticker).info
            row = {
                'Ticker': ticker,
                'Company Name': info.get('longName', 'N/A')  # <- New field
            }
            for field in FIELDS:
                row[FIELDS[field]] = info.get(field, None)
            all_data.append(row)
        except Exception as e:
            print(f"Error fetching {ticker}: {e}")
    return pd.DataFrame(all_data)


In [18]:
if __name__ == "__main__":
    df = fetch_fundamentals(TICKERS)
    print(df.head())
    df.to_csv("data/fundamentals_snapshot.csv", index=False)

  Ticker             Company Name        P/E         P/B      ROE       D/E  \
0    MMM               3M Company  18.415941   17.827608  0.93824   312.492   
1    AOS  A. O. Smith Corporation  19.092178    5.235943  0.27970    15.945   
2    ABT      Abbott Laboratories  16.696104    4.582427  0.30649    27.087   
3   ABBV              AbbVie Inc.  76.154510  220.696520  0.88400  4789.603   
4    ACN            Accenture plc  26.414675    6.863085  0.26965    26.706   

   EPS Growth    Market Cap  
0       0.221   79586205696  
1      -0.050    9713148928  
2       0.086  223673827328  
3      -0.063  313430016000  
4       0.072  200584986624  


In [12]:
def screen_stocks(df):
    # Drop rows with missing data
    df = df.dropna(subset=['P/E', 'P/B', 'ROE', 'D/E', 'EPS Growth'])

    # Optional: Apply hard filters (customize as needed)
    print("Before filter", df.shape)
    df = df[
        (df['P/E'] < 25) &
        (df['ROE'] > 0.10) &
        (df['D/E'] < 100.0)
    ]

    # Normalize factors (Z-score or min-max for simplicity)
    for col in ['P/E', 'P/B', 'ROE', 'EPS Growth']:
        if col in df.columns:
            df[col + '_score'] = (df[col] - df[col].min()) / (df[col].max() - df[col].min())

    # Scoring: higher ROE and EPS growth is better; lower P/E and P/B is better
    df['Factor Score'] = (
        (1 - df['P/E_score']) +
        (1 - df['P/B_score']) +
        df['ROE_score'] +
        df['EPS Growth_score']
    )

    df = df.sort_values(by='Factor Score', ascending=False)

    return df


In [13]:
if __name__ == "__main__":
    df = pd.read_csv("data/fundamentals_snapshot.csv")
    df_clean = screen_stocks(df)
    print(df_clean[['Ticker', 'P/E', 'ROE', 'EPS Growth', 'Factor Score']].head(10))
    df_clean.to_csv("data/screened_stocks.csv", index=False)

Before filter (80, 7)
   Ticker        P/E      ROE  EPS Growth  Factor Score
37    APA   6.477061  0.26176       1.181      3.204524
95     CF  11.268519  0.20794       0.796      2.443561
93    CNC   8.809734  0.12527       0.218      2.353086
42   ACGL   9.393846  0.18415      -0.493      2.048835
55    BKR  12.926621  0.18036      -0.111      1.949430
19  GOOGL  18.453733  0.34789       0.488      1.854200
20   GOOG  18.623884  0.34789       0.488      1.839032
2     ABT  16.696754  0.30649       0.086      1.821490
60  BRK-B  13.446428  0.13187      -0.637      1.814359
74   BF-B  16.612442  0.27985      -0.050      1.739746
