<a href="https://colab.research.google.com/github/kakoriak-cloud/SA-Public/blob/main/5_5_stock_pick_via_RF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
import warnings

# Suppress verbose warnings
warnings.filterwarnings('ignore')

## ------------------------------------------------------------------ ##
##                      PART 1: GETTING THE STOCKS                    ##
## ------------------------------------------------------------------ ##
def get_nifty100_tickers():
    """Fetches the list of NIFTY 100 stock tickers from Wikipedia."""
    url = "https://en.wikipedia.org/wiki/NIFTY_50"
    tables = pd.read_html(url)
    constituents = tables[1]
    tickers = [symbol+".NS" for symbol in constituents['Symbol']]
    # For a quicker test, you can uncomment the line below to run on just 10 stocks
    # return tickers[:10]
    return tickers

## ------------------------------------------------------------------ ##
##             PART 2: PREDICTION FUNCTION FOR ONE STOCK              ##
## ------------------------------------------------------------------ ##
def predict_stock_price_rf(ticker):
    """
    Takes a stock ticker, trains a Random Forest model, and predicts the next day's price.
    Returns the predicted growth percentage.
    """
    try:
        # 1. Fetch Data
        stock_data = yf.download(ticker, start="2020-01-01", end="2025-08-15", progress=False)
        if len(stock_data) < 100: # Need enough data for features
            return None

        # 2. Feature Engineering
        df = stock_data[['Close']].copy()
        # Create features like lag and moving averages
        df['lag_1'] = df['Close'].shift(1)
        df['lag_3'] = df['Close'].shift(3)
        df['ma_7'] = df['Close'].rolling(window=7).mean()
        df['ma_30'] = df['Close'].rolling(window=30).mean()
        df.dropna(inplace=True) # Remove rows with missing values

        # 3. Define Features (X) and Target (y)
        y = df['Close']
        X = df.drop(columns=['Close'])

        # 4. Train the Model on the ENTIRE dataset
        # For prediction, we train on all available data to make the most informed guess.
        model = RandomForestRegressor(n_estimators=100, random_state=42)
        model.fit(X, y)

        # 5. Make a Prediction for the next day
        # Get the very last row of features to predict the future
        last_features = X.iloc[[-1]]
        predicted_price = model.predict(last_features)[0]

        # 6. Calculate and Return Growth
        last_known_price = stock_data['Close'].iloc[-1]
        growth_percent = ((predicted_price - last_known_price) / last_known_price) * 100
        return growth_percent
        print(growth_percent)
    except Exception as e:
        return None # Return None if any error occurs

## ------------------------------------------------------------------ ##
##                  PART 3: MAIN EXECUTION & RANKING                  ##
## ------------------------------------------------------------------ ##
if __name__ == "__main__":
    stock_list = get_nifty100_tickers()
    predictions = {}

    print(f"--- Starting Analysis for {len(stock_list)} Stocks with Random Forest ---")
    print("(This is much faster than the LSTM and will take about a minute...)")

    # Loop through all stocks, get prediction, and store it
    for i, ticker in enumerate(stock_list):
        print(f"Analyzing {i+1}/{len(stock_list)}: {ticker}...")
        growth = predict_stock_price_rf(ticker)
        if growth is not None:
            # Extract the scalar value from the Series
            predictions[ticker] = growth.iloc[0]


    print("\n--- Analysis Complete ---")

    if predictions:
        # Sort for the Top 5 Gainers (highest growth)
        top_gainers = sorted(predictions.items(), key=lambda item: item[1], reverse=True)

        print("\n--- 🏆 Top 5 High-Growth Stock Predictions ---")
        for i, (ticker, growth) in enumerate(top_gainers[:5]):
            print(f"{i+1}. {ticker}: Predicted Growth of {growth:.2f}%")

        # Sort for the Top 5 Losers (most negative growth)
        top_losers = sorted(predictions.items(), key=lambda item: item[1], reverse=False)

        print("\n--- 📉 Top 5 High-Reduction Stock Predictions ---")
        for i, (ticker, growth) in enumerate(top_losers[:5]):
            print(f"{i+1}. {ticker}: Predicted Reduction of {growth:.2f}%")
    else:
        print("Could not retrieve predictions for any stocks.")

--- Starting Analysis for 50 Stocks with Random Forest ---
(This is much faster than the LSTM and will take about a minute...)
Analyzing 1/50: ADANIENT.NS...
Analyzing 2/50: ADANIPORTS.NS...
Analyzing 3/50: APOLLOHOSP.NS...
Analyzing 4/50: ASIANPAINT.NS...
Analyzing 5/50: AXISBANK.NS...
Analyzing 6/50: BAJAJ-AUTO.NS...
Analyzing 7/50: BAJFINANCE.NS...
Analyzing 8/50: BAJAJFINSV.NS...
Analyzing 9/50: BEL.NS...
Analyzing 10/50: BHARTIARTL.NS...
Analyzing 11/50: CIPLA.NS...
Analyzing 12/50: COALINDIA.NS...
Analyzing 13/50: DRREDDY.NS...
Analyzing 14/50: EICHERMOT.NS...
Analyzing 15/50: ETERNAL.NS...
Analyzing 16/50: GRASIM.NS...
Analyzing 17/50: HCLTECH.NS...
Analyzing 18/50: HDFCBANK.NS...
Analyzing 19/50: HDFCLIFE.NS...
Analyzing 20/50: HEROMOTOCO.NS...
Analyzing 21/50: HINDALCO.NS...
Analyzing 22/50: HINDUNILVR.NS...
Analyzing 23/50: ICICIBANK.NS...
Analyzing 24/50: INDUSINDBK.NS...
Analyzing 25/50: INFY.NS...
Analyzing 26/50: ITC.NS...
Analyzing 27/50: JIOFIN.NS...
Analyzing 28/50: JS

In [2]:

import streamlit as st
import yfinance as yf
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import warnings

# Suppress verbose warnings
warnings.filterwarnings('ignore')

# Use st.cache_data to prevent re-running the analysis on every interaction
@st.cache_data
def get_predictions():
    """
    This function runs the entire stock analysis and prediction process.
    """
    # --- Helper Function to Get Tickers ---
    def get_nifty50_tickers():
        url = "https://en.wikipedia.org/wiki/NIFTY_50"
        tables = pd.read_html(url)
        constituents = tables[2] # Table index might change, 2 is often correct
        return [symbol + ".NS" for symbol in constituents['Symbol']]

    # --- Helper Function to Predict a Single Stock ---
    def predict_stock_price_rf(ticker):
        try:
            stock_data = yf.download(ticker, start="2020-01-01", progress=False)
            if len(stock_data) < 100:
                return None

            df = stock_data[['Close']].copy()
            df['lag_1'] = df['Close'].shift(1)
            df['ma_7'] = df['Close'].rolling(window=7).mean()
            df.dropna(inplace=True)

            y = df['Close']
            X = df.drop(columns=['Close'])

            model = RandomForestRegressor(n_estimators=100, random_state=42)
            model.fit(X, y)

            last_features = X.iloc[[-1]]
            predicted_price = model.predict(last_features)[0]

            last_known_price = stock_data['Close'].iloc[-1]
            growth_percent = ((predicted_price - last_known_price) / last_known_price) * 100
            return growth_percent
        except Exception:
            return None

    # --- Main Logic ---
    stock_list = get_nifty50_tickers()
    predictions = {}

    # Use a progress bar in the app
    progress_bar = st.progress(0)

    for i, ticker in enumerate(stock_list):
        growth = predict_stock_price_rf(ticker)
        if growth is not None:
            predictions[ticker] = growth
        # Update the progress bar
        progress_bar.progress((i + 1) / len(stock_list))

    if not predictions:
        return [], []

    # Sort and return the results
    top_gainers = sorted(predictions.items(), key=lambda item: item[1], reverse=True)
    top_losers = sorted(predictions.items(), key=lambda item: item[1], reverse=False)

    return top_gainers[:5], top_losers[:5]


# --- App Title & Main Interface ---
st.title('📈 NIFTY 50 Stock Prediction App')
st.write("This app uses a Random Forest model to predict the next day's price movement for NIFTY 50 stocks.")

# The button to trigger the analysis
if st.button('🚀 Get Today\'s Top 5 Predictions'):
    # Call the function to get the predictions
    gainers, losers = get_predictions()

    if gainers:
        st.subheader('🏆 Top 5 Predicted Gainers')
        gainer_df = pd.DataFrame(gainers, columns=['Ticker', 'Predicted Growth %'])
        st.dataframe(gainer_df, use_container_width=True)

        st.subheader('📉 Top 5 Predicted Losers')
        loser_df = pd.DataFrame(losers, columns=['Ticker', 'Predicted Reduction %'])
        st.dataframe(loser_df, use_container_width=True)
    else:
        st.error("Could not retrieve predictions. There might be an issue with the data source.")

2025-08-16 17:14:44.511 No runtime found, using MemoryCacheStorageManager
