<a href="https://colab.research.google.com/github/ksanjay/ksanjay/blob/main/G_SCORE_Growth_Stock_Analyzer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
# G_SCORE Growth Stock Analyzer
#
# This script implements Mohanram's G_SCORE methodology to identify
# potentially undervalued growth stocks (low book-to-market firms)
# by analyzing financial fundamentals.

# 1. Setup Environment
# Make sure to install the required libraries before running:
# pip install yfinance pandas numpy

import yfinance as yf
import pandas as pd
import numpy as np

# 2. Data Fetching Functions
def get_financial_data(ticker_symbol):
    """Fetches financial statements for a given stock ticker."""
    stock = yf.Ticker(ticker_symbol)
    income_stmt = stock.income_stmt
    balance_sheet = stock.balance_sheet
    cash_flow = stock.cashflow
    return income_stmt, balance_sheet, cash_flow, stock

def get_industry_peers(stock):
    """Fetches industry peers for comparison (placeholder function)."""
    # Note: yfinance does not provide a reliable way to get SIC codes or a full list of industry peers.
    # This is a placeholder. In a real application, you would use a dedicated financial data provider
    # to get a list of tickers in the same 2-digit SIC code industry.
    print(f"Industry for {stock.info.get('symbol', '')}: {stock.info.get('industry', 'N/A')}")
    print("Peer comparison is a placeholder. You would need a commercial data source for SIC code peers.")
    # For demonstration, we will use a small, fixed list of comps for a tech company.
    return ['MSFT', 'GOOGL', 'AMZN']

# 3. G_SCORE Component Functions

# Category 1: Profitability Signals
def calculate_g1_roa(income_stmt, balance_sheet, industry_median_roa):
    """G1: Industry-Adjusted Return on Assets (ROA)."""
    try:
        net_income = income_stmt.loc['Net Income'].iloc[0]
        # Use total assets from the beginning of the period (end of prior period)
        beginning_total_assets = balance_sheet.loc['Total Assets'].iloc[1]
        roa = net_income / beginning_total_assets
        return 1 if roa > industry_median_roa else 0, roa
    except (KeyError, IndexError):
        return 0, np.nan

def calculate_g2_cfroa(cash_flow, balance_sheet, industry_median_cfroa):
    """G2: Industry-Adjusted Cash Flow ROA."""
    try:
        cfo = cash_flow.loc['Operating Cash Flow'].iloc[0]
        beginning_total_assets = balance_sheet.loc['Total Assets'].iloc[1]
        cfroa = cfo / beginning_total_assets
        return 1 if cfroa > industry_median_cfroa else 0, cfroa
    except (KeyError, IndexError):
        return 0, np.nan

def calculate_g3_earnings_quality(cash_flow, income_stmt):
    """G3: Cash Flow vs. Earnings Quality."""
    try:
        cfo = cash_flow.loc['Operating Cash Flow'].iloc[0]
        net_income = income_stmt.loc['Net Income'].iloc[0]
        return 1 if cfo > net_income else 0, cfo > net_income
    except (KeyError, IndexError):
        return 0, False

# Category 2: Stability Signals
def calculate_g4_earnings_stability(income_stmt, balance_sheet, industry_median_roa_variance):
    """G4: Earnings Stability (Variance of ROA)."""
    try:
        net_incomes = income_stmt.loc['Net Income'].iloc[:5]
        # Need beginning assets for each year, so we need 6 years of balance sheet data for 5 years of ROA
        assets = balance_sheet.loc['Total Assets'].iloc[1:6]

        # Ensure we have the same number of periods for both metrics to avoid shape mismatch
        common_len = min(len(net_incomes), len(assets))
        if common_len < 3:
            return 0, np.nan # Not enough data

        # Slice both series to the common length before calculation
        net_incomes = net_incomes.iloc[:common_len]
        assets = assets.iloc[:common_len]

        roas = net_incomes.values / assets.values
        roa_variance = np.var(roas)
        return 1 if roa_variance < industry_median_roa_variance else 0, roa_variance
    except (KeyError, IndexError):
        return 0, np.nan

def calculate_g5_sales_stability(income_stmt, industry_median_sales_growth_variance):
    """G5: Sales Growth Stability."""
    try:
        sales = income_stmt.loc['Total Revenue'].iloc[:5]
        if len(sales) < 3:
            return 0, np.nan # Not enough data
        sales_growth = sales.pct_change().dropna()
        sales_growth_variance = np.var(sales_growth)
        return 1 if sales_growth_variance < industry_median_sales_growth_variance else 0, sales_growth_variance
    except (KeyError, IndexError):
        return 0, np.nan

# Category 3: Conservative Accounting Signals
def calculate_g6_rd_intensity(income_stmt, balance_sheet, industry_median_rd_intensity):
    """G6: R&D Intensity."""
    try:
        rd_expense = income_stmt.loc['Research And Development'].iloc[0]
        beginning_total_assets = balance_sheet.loc['Total Assets'].iloc[1]
        rd_intensity = rd_expense / beginning_total_assets
        return 1 if rd_intensity > industry_median_rd_intensity else 0, rd_intensity
    except (KeyError, IndexError):
        # R&D is often not reported, treat as 0
        return 1 if 0 > industry_median_rd_intensity else 0, 0

def calculate_g7_capex_intensity(cash_flow, balance_sheet, industry_median_capex_intensity):
    """G7: Capital Expenditure Intensity."""
    try:
        capex = cash_flow.loc['Capital Expenditure'].iloc[0]
        beginning_total_assets = balance_sheet.loc['Total Assets'].iloc[1]
        # Capex is negative, so we take the absolute value
        capex_intensity = abs(capex) / beginning_total_assets
        return 1 if capex_intensity > industry_median_capex_intensity else 0, capex_intensity
    except (KeyError, IndexError):
        return 0, np.nan

def calculate_g8_advertising_intensity(income_stmt, balance_sheet, industry_median_adv_intensity):
    """G8: Advertising Intensity."""
    # Note: Advertising expense is rarely broken out. Often included in SG&A.
    # This will likely return 0 for most companies from Yahoo Finance data.
    try:
        # yfinance does not typically provide 'Advertising Expense'
        adv_expense = income_stmt.loc['Advertising Expense'].iloc[0]
        beginning_total_assets = balance_sheet.loc['Total Assets'].iloc[1]
        adv_intensity = adv_expense / beginning_total_assets
        return 1 if adv_intensity > industry_median_adv_intensity else 0, adv_intensity
    except (KeyError, IndexError):
        return 1 if 0 > industry_median_adv_intensity else 0, 0

# 4. Industry Median Calculation
def calculate_industry_medians(peer_tickers):
    """Calculates median financial ratios for a list of peer tickers."""
    peer_data = {
        'roa': [], 'cfroa': [], 'roa_variance': [], 'sales_growth_variance': [],
        'rd_intensity': [], 'capex_intensity': [], 'adv_intensity': []
    }

    for ticker in peer_tickers:
        try:
            inc, bal, cf, _ = get_financial_data(ticker)

            # Note: For simplicity, we are not comparing against industry medians for these calculations
            # themselves, which is a slight simplification.
            _, roa = calculate_g1_roa(inc, bal, 0)
            _, cfroa = calculate_g2_cfroa(cf, bal, 0)
            _, roa_var = calculate_g4_earnings_stability(inc, bal, 0)
            _, sgv = calculate_g5_sales_stability(inc, 0)
            _, rd_int = calculate_g6_rd_intensity(inc, bal, 0)
            _, capex_int = calculate_g7_capex_intensity(cf, bal, 0)
            _, adv_int = calculate_g8_advertising_intensity(inc, bal, 0)

            peer_data['roa'].append(roa)
            peer_data['cfroa'].append(cfroa)
            peer_data['roa_variance'].append(roa_var)
            peer_data['sales_growth_variance'].append(sgv)
            peer_data['rd_intensity'].append(rd_int)
            peer_data['capex_intensity'].append(capex_int)
            peer_data['adv_intensity'].append(adv_int)
        except Exception as e:
            print(f"Could not process peer {ticker}: {e}")

    medians = {key: np.nanmedian(values) for key, values in peer_data.items()}
    return medians

# 5. Main G_SCORE Analyzer Function
def analyze_g_score(ticker_symbol):
    """Performs a full G_SCORE analysis for a given stock."""
    print(f"--- Analyzing {ticker_symbol} ---")
    try:
        income_stmt, balance_sheet, cash_flow, stock = get_financial_data(ticker_symbol)
    except Exception as e:
        print(f"Could not fetch data for {ticker_symbol}. Error: {e}")
        return None, None

    # Check Book-to-Market Ratio
    try:
        # Note: yfinance info can be unreliable for bookValue and marketCap
        b_to_m = stock.info.get('bookValue', 0) / stock.info.get('marketCap', 1)
        print(f"Book-to-Market Ratio: {b_to_m:.4f}")
        if b_to_m > 0.5: # Example threshold
            print("Warning: Book-to-Market ratio may be high for a typical growth stock.")
    except (KeyError, ZeroDivisionError, TypeError):
        print("Could not calculate Book-to-Market ratio.")

    # Get industry peers and calculate medians
    peers = get_industry_peers(stock)
    print(f"\nCalculating medians based on peers: {peers}...")
    industry_medians = calculate_industry_medians(peers)
    print(f"Industry Medians: {industry_medians}\n")

    # Calculate G_SCORE components
    g1, roa = calculate_g1_roa(income_stmt, balance_sheet, industry_medians['roa'])
    g2, cfroa = calculate_g2_cfroa(cash_flow, balance_sheet, industry_medians['cfroa'])
    g3, eq = calculate_g3_earnings_quality(cash_flow, income_stmt)
    g4, roa_var = calculate_g4_earnings_stability(income_stmt, balance_sheet, industry_medians['roa_variance'])
    g5, sgv = calculate_g5_sales_stability(income_stmt, industry_medians['sales_growth_variance'])
    g6, rd_int = calculate_g6_rd_intensity(income_stmt, balance_sheet, industry_medians['rd_intensity'])
    g7, capex_int = calculate_g7_capex_intensity(cash_flow, balance_sheet, industry_medians['capex_intensity'])
    g8, adv_int = calculate_g8_advertising_intensity(income_stmt, balance_sheet, industry_medians['adv_intensity'])

    g_score = g1 + g2 + g3 + g4 + g5 + g6 + g7 + g8

    # Display Results in a Dashboard
    results = {
        'Metric': ['G1: ROA', 'G2: CFROA', 'G3: Earnings Quality',
                   'G4: Earnings Stability', 'G5: Sales Stability', 'G6: R&D Intensity',
                   'G7: CapEx Intensity', 'G8: Ad Intensity'],
        'Score': [g1, g2, g3, g4, g5, g6, g7, g8],
        'Company Value': [f"{roa:.4f}", f"{cfroa:.4f}", str(eq),
                          f"{roa_var:.4f}", f"{sgv:.4f}", f"{rd_int:.4f}",
                          f"{capex_int:.4f}", f"{adv_int:.4f}"],
        'Industry Median': [f"{industry_medians['roa']:.4f}", f"{industry_medians['cfroa']:.4f}", 'N/A',
                            f"{industry_medians['roa_variance']:.4f}", f"{industry_medians['sales_growth_variance']:.4f}",
                            f"{industry_medians['rd_intensity']:.4f}", f"{industry_medians['capex_intensity']:.4f}",
                            f"{industry_medians['adv_intensity']:.4f}"],
        'Signal (Pass=1)': ['Company > Median', 'Company > Median', 'CFO > NI',
                            'Company < Median', 'Company < Median', 'Company > Median',
                            'Company > Median', 'Company > Median']
    }

    results_df = pd.DataFrame(results)
    print("--- G_SCORE Breakdown ---")
    print(results_df.to_string())
    print("\n---------------------------")
    print(f"Final G_SCORE: {g_score} / 8")
    print("---------------------------")

    return g_score, results_df

# 6. Stock Screening and Portfolio Construction
def screen_stocks(stock_list):
    """Screens a list of stocks and returns their G-Scores."""
    screened_results = {}
    print(f"\n--- Screening {len(stock_list)} stocks ---")
    for stock in stock_list:
        try:
            # analyze_g_score prints its own detailed breakdown
            score, _ = analyze_g_score(stock)
            if score is not None:
                screened_results[stock] = score
            else:
                screened_results[stock] = 'Error'
        except Exception as e:
            print(f"An error occurred while screening {stock}: {e}")
            screened_results[stock] = 'Error'
    return screened_results

def construct_portfolios(screened_results):
    """Constructs high and low G_SCORE portfolios from screened results."""
    high_g_score_portfolio = {k: v for k, v in screened_results.items() if isinstance(v, (int, float)) and not np.isnan(v) and v >= 6}
    low_g_score_portfolio = {k: v for k, v in screened_results.items() if isinstance(v, (int, float)) and not np.isnan(v) and v <= 1}

    print("\n\n--- Portfolio Construction Summary ---")
    print("High G_SCORE Portfolio (Score >= 6):", list(high_g_score_portfolio.keys()))
    print("Low G_SCORE Portfolio (Score <= 1):", list(low_g_score_portfolio.keys()))
    print("\nLong/Short Strategy Recommendation: LONG High G_SCORE, SHORT Low G_SCORE stocks.")
    return high_g_score_portfolio, low_g_score_portfolio

# Main execution block
if __name__ == "__main__":
    # Get user input for a list of stocks
    user_input = input("Enter a list of stock tickers separated by commas (e.g., AAPL, NVDA, TSLA): ")

    # Parse the input string into a list of tickers
    # This removes whitespace and converts to uppercase for consistency
    stock_tickers = [ticker.strip().upper() for ticker in user_input.split(',') if ticker.strip()]

    if stock_tickers:
        # Run the screener on the user-provided list
        screened_scores = screen_stocks(stock_tickers)

        print("\n\n--- Screener Results Summary ---")
        for ticker, score in screened_scores.items():
            print(f"{ticker}: {score}")

        # Construct portfolios based on the results
        construct_portfolios(screened_scores)
    else:
        print("No valid stock tickers entered. Exiting.")

Enter a list of stock tickers separated by commas (e.g., AAPL, NVDA, TSLA): tdup

--- Screening 1 stocks ---
--- Analyzing TDUP ---
Book-to-Market Ratio: 0.0000
Industry for TDUP: Internet Retail
Peer comparison is a placeholder. You would need a commercial data source for SIC code peers.

Calculating medians based on peers: ['MSFT', 'GOOGL', 'AMZN']...
Industry Medians: {'roa': np.float64(0.19882732645661635), 'cfroa': np.float64(0.2658567682554187), 'roa_variance': np.float64(nan), 'sales_growth_variance': np.float64(0.0010419995771217107), 'rd_intensity': np.float64(0.09300744520589976), 'capex_intensity': np.float64(0.13055677051233622), 'adv_intensity': np.float64(0.0)}

--- G_SCORE Breakdown ---
                   Metric  Score Company Value Industry Median   Signal (Pass=1)
0                 G1: ROA      0       -0.3080          0.1988  Company > Median
1               G2: CFROA      0        0.0196          0.2659  Company > Median
2    G3: Earnings Quality      1          True

  sales_growth = sales.pct_change().dropna()
  medians = {key: np.nanmedian(values) for key, values in peer_data.items()}
