In [None]:
#
# Equity Research and Scoring Engine
#
# This script performs the first major part of our project: identifying and ranking
# top investment opportunities from the NIFTY 50 based on a hybrid model.
#
# Workflow:
# 1. Fetches fundamental data for NIFTY 50 stocks.
# 2. Screens stocks based on strong fundamental criteria (P/E, ROE, ROA).
# 3. Calculates key technical indicators (RSI, MACD) for the filtered stocks.
# 4. Applies a weighted scoring system to rank the top recommendations.
#

# etup the Environment
# This command installs the necessary libraries for our analysis.
# The '-q' flag ensures a quiet installation with less output.
!pip install -q pandas yfinance ta

# Step 1: Import Libraries
import pandas as pd
import yfinance as yf
from ta import momentum, trend
import numpy as np
import warnings

# Suppress common warnings for a cleaner output.
warnings.filterwarnings('ignore')

# Define the Stock Universe
# We will analyze the stocks from India's NIFTY 50 index.
tickers = [
    "RELIANCE.NS", "TCS.NS", "HDFCBANK.NS", "ICICIBANK.NS", "INFY.NS",
    "BHARTIARTL.NS", "HINDUNILVR.NS", "ITC.NS", "SBIN.NS", "LICI.NS",
    "BAJFINANCE.NS", "HCLTECH.NS", "KOTAKBANK.NS", "MARUTI.NS", "LT.NS",
    "ASIANPAINT.NS", "AXISBANK.NS", "SUNPHARMA.NS", "NTPC.NS", "TATAMOTORS.NS",
    "TATASTEEL.NS", "BAJAJFINSV.NS", "ADANIENT.NS", "WIPRO.NS", "COALINDIA.NS",
    "ULTRACEMCO.NS", "ONGC.NS", "POWERGRID.NS", "ADANIPORTS.NS", "NESTLEIND.NS",
    "M&M.NS", "GRASIM.NS", "INDUSINDBK.NS", "JSWSTEEL.NS", "HINDALCO.NS",
    "ADANIGREEN.NS", "BRITANNIA.NS", "CIPLA.NS", "DRREDDY.NS", "EICHERMOT.NS",
    "HEROMOTOCO.NS", "DIVISLAB.NS", "APOLLOHOSP.NS", "BAJAJ-AUTO.NS",
    "BPCL.NS", "HDFCLIFE.NS", "SBILIFE.NS", "SHREECEM.NS", "TATACONSUM.NS", "TECHM.NS"
]

# Step 3: Fundamental Analysis
print("Starting Part 1: Fundamental Screening")

def get_fundamentals(ticker):
    try:
        stock = yf.Ticker(ticker)
        info = stock.info
        # We define the essential metrics needed for our analysis.
        required_keys = ['trailingPE', 'returnOnEquity', 'returnOnAssets', 'revenueGrowth']
        # If any key metric is missing, we skip the stock.
        if not all(key in info and info[key] is not None for key in required_keys):
            return None
        # We return a clean dictionary with our desired metrics.
        return {
            "Ticker": ticker,
            "PE": info.get("trailingPE"),
            "ROE": info.get("returnOnEquity") * 100,  # Convert to percentage
            "ROA": info.get("returnOnAssets") * 100,  # Convert to percentage
            "CAGR": info.get("revenueGrowth") * 100, # Represents revenue growth
        }
    except Exception:
        # Catch any other errors during the API call.
        return None

# Fetch fundamental data for all tickers in our list.
fundamental_data = [get_fundamentals(t) for t in tickers]
# Clean the list by removing any 'None' results from failed fetches.
fundamental_data_clean = [item for item in fundamental_data if item is not None]

# Proceed only if we have valid fundamental data.
if fundamental_data_clean:
    df_fund = pd.DataFrame(fundamental_data_clean)
    # Apply our strict filtering criteria as discussed in the presentation.
    filtered_stocks = df_fund[
        (df_fund["PE"] < 40) &
        (df_fund["ROE"] > 15) &
        (df_fund["ROA"] > 5)
    ].copy()
    print(f"Fundamental screening complete. {len(filtered_stocks)} stocks passed the initial filter.")
else:
    print("Could not fetch sufficient fundamental data to proceed.")
    filtered_stocks = pd.DataFrame() # Create an empty DataFrame to prevent errors.

# Step 4: Technical Analysis and Scoring

if not filtered_stocks.empty:
    print("\nStarting Part 2: Technical Analysis and Scoring")

    def get_technicals(ticker):
        try:
            # We use auto_adjust=False to manually handle 'Adj Close'.
            df = yf.download(ticker, period="14mo", auto_adjust=False, progress=False, timeout=10)
            if df.empty: return None

            # Handle cases where yfinance returns multi-level column headers.
            if isinstance(df.columns, pd.MultiIndex):
                df.columns = df.columns.get_level_values(0)

            # Use 'Adj Close' for accurate calculations that account for splits and dividends.
            df['Close'] = df['Adj Close']
            df.dropna(inplace=True)

            # Ensure we have enough data for a 200-day moving average.
            if len(df) < 200: return None

            close_series = df["Close"]

            # Calculate the indicators.
            rsi = momentum.RSIIndicator(close_series).rsi().iloc[-1]
            macd_line = trend.MACD(close_series).macd().iloc[-1]
            
            # Final check to ensure all calculated values are valid numbers.
            if any(pd.isna([rsi, macd_line])):
                return None

            return {"Ticker": ticker, "RSI": rsi, "MACD": macd_line}
        except Exception:
            return None

    # Fetch technical data only for our fundamentally strong stocks.
    techs_results = [get_technicals(t) for t in filtered_stocks["Ticker"]]
    techs_clean = [t for t in techs_results if t is not None]

    if techs_clean:
        df_tech = pd.DataFrame(techs_clean)
        # Merge our fundamental and technical data into a final DataFrame.
        final_df = pd.merge(filtered_stocks, df_tech, on="Ticker", how="inner")

        # Step 5: The Scoring Engine
        def normalize(series, inverse=False):
            """Normalizes a pandas series to a 0-100 scale for fair comparison."""
            min_val, max_val = series.min(), series.max()
            if max_val == min_val: return pd.Series(100, index=series.index)
            # The 'inverse' flag is for metrics where lower is better (like P/E).
            if inverse:
                return 100 * (max_val - series) / (max_val - min_val)
            else:
                return 100 * (series - min_val) / (max_val - min_val)

        # Apply normalization to create scores for each metric.
        final_df["score_pe"] = normalize(final_df["PE"], inverse=True)
        final_df["score_roe"] = normalize(final_df["ROE"])
        final_df["score_roa"] = normalize(final_df["ROA"])
        final_df["score_cagr"] = normalize(final_df["CAGR"])
        # For RSI, a lower value (oversold) is generally better for buying opportunities.
        final_df["score_rsi"] = normalize(final_df["RSI"], inverse=True)

        # Calculate the final, weighted Total Score.
        final_df["Total_Score"] = (
            final_df["score_pe"] * 0.25 +
            final_df["score_roe"] * 0.25 +
            final_df["score_roa"] * 0.20 +
            final_df["score_cagr"] * 0.20 +
            final_df["score_rsi"] * 0.10
        )

        # Sort by the final score to get our top recommendations.
        recommendations = final_df.sort_values("Total_Score", ascending=False)

        print(f"\nScoring complete. {len(recommendations)} stocks were successfully analyzed.")
        print("Top 5 Recommended Stocks")
        print(recommendations.head(5)[["Ticker", "PE", "ROE", "RSI", "Total_Score"]])

    else:
        print("\nCould not fetch valid technical data for any of the filtered stocks.")
else:
    print("\nProject finished. No stocks passed the fundamental screening.")




[notice] A new release of pip is available: 24.3.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


Starting Part 1: Fundamental Screening


In [4]:
#Backtesting the Strategy

# Proceed to backtesting only if we have recommendations.
if not recommendations.empty:
    print("\nStarting Part 3: Backtesting the Strategy")
    
    # Select the top 5 stocks for the backtest
    top_tickers = recommendations['Ticker'].head(5).tolist()
    
    # Backtesting parameters
    initial_cash = 100000.0
    stop_loss_pct = 0.05  # 5%
    take_profit_pct = 0.08 # 8%
    trailing_stop_pct = 0.03 # 3%
    
    processed = {}
    print(f"Preparing backtest data for: {top_tickers}")

    for ticker in top_tickers:
        try:
            # Fetch 5 years of data for a comprehensive backtest.
            df = yf.download(ticker, start='2020-04-01', end='2025-04-01', auto_adjust=False, progress=False)
            if df.empty: continue

            # Data Cleaning and Indicator Calculation
            if isinstance(df.columns, pd.MultiIndex):
                df.columns = df.columns.get_level_values(0)
            df['Close'] = df['Adj Close']
            
            lookback = 15
            df['UpperBand'] = df['High'].shift(1).rolling(window=lookback).max()
            df['LowerBand'] = df['Low'].shift(1).rolling(window=lookback).min()
            df['RSI'] = momentum.RSIIndicator(df['Close']).rsi()
            macd = trend.MACD(df['Close'])
            df['MACD'] = macd.macd()
            df['MACD_Signal'] = macd.macd_signal()
            
            # Define Buy/Sell Signals based on the reference logic
            entry_cond = (
                (df['Close'] > df['UpperBand'] * 1.005) &
                (df['RSI'] < 75) &
                (df['MACD'] > df['MACD_Signal'])
            )
            retest_cond = (
                (df['Close'] > df['UpperBand']) &
                (df['Volume'] > df['Volume'].rolling(30).mean())
            )
            df['Signal'] = 0
            df.loc[(entry_cond) | (df['RSI'] < 14) | (retest_cond), 'Signal'] = 1
            
            exit_cond = (
                (df['Close'] < df['LowerBand'] * 0.995) |
                (df['MACD'] < df['MACD_Signal'] * 0.995)
            )
            df.loc[exit_cond, 'Signal'] = -1
            
            processed[ticker] = df.dropna()
        except Exception as e:
            print(f"Error processing backtest data for {ticker}: {e}")

    if not any(not df.empty for df in processed.values()):
        print("Backtest could not be run as no stocks were successfully processed.")
    else:
        cash = initial_cash
        positions = {} # Using a dictionary to hold multiple positions
        portfolio_log = []
        
        # Create a master timeline of all trading days
        all_dates = pd.concat(processed.values()).index.unique().sort_values()

        for date in all_dates:
            # SELL LOGIC AND TRAILING STOP-LOSS
            for ticker in list(positions.keys()):
                if date in processed[ticker].index:
                    row = processed[ticker].loc[date]
                    entry_price, shares, trailing_stop = positions[ticker]
                    current_price = row['Close']
                    
                    # Update trailing stop-loss
                    new_trailing_stop = current_price * (1 - trailing_stop_pct)
                    trailing_stop = max(trailing_stop, new_trailing_stop)
                    positions[ticker] = (entry_price, shares, trailing_stop)

                    # Check for exit conditions
                    if (current_price <= trailing_stop or 
                        current_price <= entry_price * (1 - stop_loss_pct) or
                        current_price >= entry_price * (1 + take_profit_pct) or
                        row['Signal'] == -1):
                        cash += shares * current_price
                        print(f"Sell {ticker} on {date.date()} at {current_price:.2f}")
                        del positions[ticker]

            # --- BUY LOGIC WITH DYNAMIC CASH ALLOCATION ---
            # Find all buy signals for today
            buy_signals_today = []
            for ticker in top_tickers:
                if ticker not in positions and date in processed[ticker].index:
                    if processed[ticker].loc[date]['Signal'] == 1:
                        buy_signals_today.append(ticker)
            
            # Allocate available cash to new positions
            if buy_signals_today:
                cash_per_trade = cash / (len(buy_signals_today) + len(positions))
                for ticker in buy_signals_today:
                    if cash_per_trade > 0:
                        price = processed[ticker].loc[date]['Close']
                        shares = cash_per_trade // price
                        if shares > 0:
                            cost = shares * price
                            cash -= cost
                            trailing_stop = price * (1 - trailing_stop_pct)
                            positions[ticker] = (price, shares, trailing_stop)
                            print(f"Buy {ticker} on {date.date()} at {price:.2f}")
            
            # --- LOG PORTFOLIO VALUE ---
            current_portfolio_value = cash
            for ticker, (entry_price, shares, trailing_stop) in positions.items():
                if date in processed[ticker].index:
                    current_portfolio_value += shares * processed[ticker].loc[date]['Close']
            
            portfolio_log.append({'Date': date, 'Value': current_portfolio_value})

        # --- CALCULATE FINAL PERFORMANCE ---
        if portfolio_log:
            final_value = portfolio_log[-1]['Value']
            start_date = portfolio_log[0]['Date']
            end_date = portfolio_log[-1]['Date']
            years = (end_date - start_date).days / 365.25 if end_date > start_date else 0

            def cagr(initial, final, time_in_years):
                if time_in_years <= 0 or initial <= 0: return 0
                return (((final / initial) ** (1 / time_in_years)) - 1) * 100
            cagr_result = cagr(initial_cash, final_value, years)

            print("\n--- Backtest Results ---")
            print(f"Initial Portfolio Value: {initial_cash:,.2f}")
            print(f"Final Portfolio Value:   {final_value:,.2f}")
            print(f"Backtest Period:         {years:.2f} years")
            print(f"CAGR:                    {cagr_result:.2f}%")
        else:
            print("Backtest completed, but no trades were made.")
else:
    print("\nProject finished. No recommendations to backtest.")


--- Starting Part 3: Backtesting the Strategy ---
Preparing backtest data for: ['TCS.NS', 'INFY.NS', 'HCLTECH.NS', 'DRREDDY.NS', 'TATAMOTORS.NS']
Buy TATAMOTORS.NS on 2020-06-02 at 95.09
Sell TATAMOTORS.NS on 2020-06-05 at 109.13
Buy TATAMOTORS.NS on 2020-06-05 at 109.13
Sell TATAMOTORS.NS on 2020-06-09 at 109.82
Buy DRREDDY.NS on 2020-07-30 at 875.37
Buy TATAMOTORS.NS on 2020-08-10 at 122.04
Sell DRREDDY.NS on 2020-08-12 at 875.67
Sell TATAMOTORS.NS on 2020-08-14 at 122.78
Buy TATAMOTORS.NS on 2020-08-26 at 135.88
Sell TATAMOTORS.NS on 2020-09-02 at 148.10
Buy DRREDDY.NS on 2020-09-17 at 935.58
Sell DRREDDY.NS on 2020-09-18 at 1033.66
Buy DRREDDY.NS on 2020-09-18 at 1033.66
Sell DRREDDY.NS on 2020-09-21 at 996.58
Buy TATAMOTORS.NS on 2020-11-10 at 143.86
Sell TATAMOTORS.NS on 2020-11-13 at 144.21
Buy TATAMOTORS.NS on 2020-11-17 at 155.69
Sell TATAMOTORS.NS on 2020-11-18 at 170.96
Buy TATAMOTORS.NS on 2020-11-18 at 170.96
Sell TATAMOTORS.NS on 2020-11-19 at 165.49
Buy TATAMOTORS.NS on