In [None]:
!pip install yfinance ta

In [13]:
import yfinance as yf
from datetime import datetime, timedelta
import pandas as pd
import ta

In [11]:

# Set the stock symbol for Reliance Industries
symbol = "RELIANCE.NS"  # .NS for National Stock Exchange (India)

# Calculate the date range (100 days from today)
end_date = datetime.now()
start_date = end_date - timedelta(days=100)

# Fetch the stock data
stock = yf.Ticker(symbol)
df = stock.history(start=start_date, end=end_date)

# Reset index to make Date a column
df = df.reset_index()

# Select the columns we want, including Volume
df = df[['Date', 'Open', 'High', 'Low', 'Close', 'Volume']]

# Convert Date to date format (removing time)
df['Date'] = df['Date'].dt.date

# Calculate daily returns
df['Daily_Return'] = df['Close'].pct_change()

# Calculate Volume-Weighted Average Price (VWAP)
df['VWAP'] = (df['Close'] * df['Volume']).cumsum() / df['Volume'].cumsum()

# Identify days with unusually high volume (e.g., more than 2 standard deviations above the mean)
volume_mean = df['Volume'].mean()
volume_std = df['Volume'].std()
df['High_Volume'] = df['Volume'] > (volume_mean + 2 * volume_std)

# Identify significant price movements (e.g., daily returns more than 2 standard deviations from the mean)
return_mean = df['Daily_Return'].mean()
return_std = df['Daily_Return'].std()
df['Significant_Price_Move'] = abs(df['Daily_Return'] - return_mean) > (2 * return_std)

# Identify volume spikes coinciding with significant price movements
df['Volume_Spike_With_Price_Move'] = df['High_Volume'] & df['Significant_Price_Move']

# Display the first few rows of the DataFrame
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Daily_Return,VWAP,High_Volume,Significant_Price_Move,Volume_Spike_With_Price_Move
0,2024-03-18,2840.0,2883.449951,2833.050049,2878.949951,4584696,,2878.949951,False,False,False
1,2024-03-19,2857.5,2875.199951,2834.5,2850.5,4137882,-0.009882,2865.45365,False,False,False
2,2024-03-20,2855.899902,2890.0,2848.050049,2887.5,4244403,0.01298,2872.669948,False,False,False
3,2024-03-21,2905.050049,2915.800049,2889.350098,2901.949951,6503468,0.005004,2882.449977,False,False,False
4,2024-03-22,2899.949951,2920.0,2894.699951,2910.050049,9763804,0.002791,2891.667989,False,False,False


In [14]:
def fetch_stock_data(symbol, start_date, end_date):
    stock = yf.Ticker(symbol)
    df = stock.history(start=start_date, end=end_date)
    return df.reset_index()

def add_technical_indicators(df):
    # Trend Indicators
    # SMA: Simple Moving Average, helps identify trend direction
    # ML use: Can be used to create buy/sell signals or as a feature for trend prediction
    df['SMA_20'] = ta.trend.sma_indicator(df['Close'], window=20)
    df['SMA_50'] = ta.trend.sma_indicator(df['Close'], window=50)
    df['SMA_100'] = ta.trend.sma_indicator(df['Close'], window=100)
    df['SMA_200'] = ta.trend.sma_indicator(df['Close'], window=200)
    
    # EMA: Exponential Moving Average, gives more weight to recent prices
    # ML use: Similar to SMA, but may react faster to recent price changes
    df['EMA_20'] = ta.trend.ema_indicator(df['Close'], window=20)
    df['EMA_50'] = ta.trend.ema_indicator(df['Close'], window=50)
    df['EMA_63'] = ta.trend.ema_indicator(df['Close'], window=63)
    df['EMA_100'] = ta.trend.ema_indicator(df['Close'], window=100)
    df['EMA_200'] = ta.trend.ema_indicator(df['Close'], window=200)
    
    # MACD: Moving Average Convergence Divergence, helps identify trend changes
    # ML use: Can be used to predict trend reversals or as a feature for buy/sell decisions
    df['MACD'] = ta.trend.macd_diff(df['Close'])
    
    # ADX: Average Directional Index, measures trend strength
    # ML use: Can help in identifying strong trends, useful for trend-following strategies
    df['ADX'] = ta.trend.adx(df['High'], df['Low'], df['Close'])

    # Momentum Indicators
    # RSI: Relative Strength Index, measures the speed and change of price movements
    # ML use: Can help predict overbought or oversold conditions
    df['RSI'] = ta.momentum.rsi(df['Close'])
    
    # Stochastic Oscillator: Compares a closing price to its price range over time
    # ML use: Another indicator for overbought/oversold conditions and potential reversals
    df['Stoch_Osc'] = ta.momentum.stoch(df['High'], df['Low'], df['Close'])
    
    # Williams %R: Measures overbought and oversold levels
    # ML use: Similar to RSI and Stochastic, can be used to predict potential price reversals
    df['Williams_R'] = ta.momentum.williams_r(df['High'], df['Low'], df['Close'])

    # Volatility Indicators
    # Bollinger Bands: Measure market volatility and overbought/oversold conditions
    # ML use: Can be used to predict potential breakouts or mean reversion
    df['BBlow'], df['BBmid'], df['BBupp'] = ta.volatility.bollinger_hband_indicator(df['Close']), ta.volatility.bollinger_mavg(df['Close']), ta.volatility.bollinger_lband_indicator(df['Close'])
    
    # ATR: Average True Range, measures market volatility
    # ML use: Can be used to set stop-loss levels or as a feature for volatility prediction
    df['ATR'] = ta.volatility.average_true_range(df['High'], df['Low'], df['Close'])

    # Volume Indicators
    # OBV: On-Balance Volume, relates volume to price change
    # ML use: Can be used to confirm price trends or predict potential reversals
    df['OBV'] = ta.volume.on_balance_volume(df['Close'], df['Volume'])
    
    # CMF: Chaikin Money Flow, measures buying and selling pressure
    # ML use: Can help in predicting potential trend reversals or continuation
    df['CMF'] = ta.volume.chaikin_money_flow(df['High'], df['Low'], df['Close'], df['Volume'])

    return df


In [15]:
def add_derived_features(df):
    # Price change: Absolute change in price
    # ML use: Direct indicator of price movement, useful for regression models
    df['Price_Change'] = df['Close'].diff()
    
    # Percentage change: Relative change in price
    # ML use: Normalized price change, useful for comparing across different price scales
    df['Pct_Change'] = df['Close'].pct_change()

    # Lagged features: Past values of close price and volume
    # ML use: Allows the model to capture time-dependent patterns and trends
    for i in [1, 2, 3, 5, 10]:
        df[f'Close_Lag_{i}'] = df['Close'].shift(i)
        df[f'Volume_Lag_{i}'] = df['Volume'].shift(i)

    # Rolling statistics: Moving averages and standard deviations
    # ML use: Captures recent trends and volatility, can help in predicting future movements
    for window in [5, 10, 20]:
        df[f'Close_Roll_Mean_{window}'] = df['Close'].rolling(window=window).mean()
        df[f'Close_Roll_Std_{window}'] = df['Close'].rolling(window=window).std()
        df[f'Volume_Roll_Mean_{window}'] = df['Volume'].rolling(window=window).mean()

    # Relative volume: Current volume compared to recent average
    # ML use: Identifies unusual trading activity, which might precede significant price moves
    df['Relative_Volume'] = df['Volume'] / df['Volume'].rolling(window=20).mean()

    # Day of week: Captures potential day-of-week effects
    # ML use: Some stocks might have patterns related to the day of the week
    df['Day_of_Week'] = df['Date'].dt.dayofweek

    # Is month end: Captures potential end-of-month effects
    # ML use: Some stocks might have patterns related to the end of the month (e.g., due to rebalancing)
    df['Is_Month_End'] = df['Date'].dt.is_month_end.astype(int)

    # VWAP: Volume-Weighted Average Price
    # ML use: Provides a benchmark for intraday trades, can be used to identify trend strength
    df['VWAP'] = (df['Close'] * df['Volume']).cumsum() / df['Volume'].cumsum()

    # High Volume: Identifies days with unusually high trading volume
    # ML use: Can indicate important events or significant market interest
    volume_mean = df['Volume'].rolling(window=20).mean()
    volume_std = df['Volume'].rolling(window=20).std()
    df['High_Volume'] = (df['Volume'] > (volume_mean + 2 * volume_std)).astype(int)

    # Significant Price Move: Identifies days with unusually large price changes
    # ML use: Can indicate important events or significant shifts in market sentiment
    returns = df['Close'].pct_change()
    returns_mean = returns.rolling(window=20).mean()
    returns_std = returns.rolling(window=20).std()
    df['Significant_Price_Move'] = (abs(returns - returns_mean) > (2 * returns_std)).astype(int)

    # Volume Spike with Price Move: Identifies high volume days with significant price changes
    # ML use: Can indicate particularly important market events or major shifts in supply/demand
    df['Volume_Spike_With_Price_Move'] = (df['High_Volume'] & df['Significant_Price_Move']).astype(int)

    return df

In [26]:
# Set parameters
symbol = "RELIANCE.NS"
end_date = datetime.now()
start_date = end_date - timedelta(days=1000)  # Using more historical data for better feature calculation

In [27]:
# Fetch data
df = fetch_stock_data(symbol, start_date, end_date)
# Select the columns we want, including Volume
df = df[['Date', 'Open', 'High', 'Low', 'Close', 'Volume']]

# Add features
df = add_technical_indicators(df)
df = add_derived_features(df)


In [28]:
df.shape

(675, 54)

In [29]:
df.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'SMA_20', 'SMA_50',
       'SMA_100', 'SMA_200', 'EMA_20', 'EMA_50', 'EMA_63', 'EMA_100',
       'EMA_200', 'MACD', 'ADX', 'RSI', 'Stoch_Osc', 'Williams_R', 'BBlow',
       'BBmid', 'BBupp', 'ATR', 'OBV', 'CMF', 'Price_Change', 'Pct_Change',
       'Close_Lag_1', 'Volume_Lag_1', 'Close_Lag_2', 'Volume_Lag_2',
       'Close_Lag_3', 'Volume_Lag_3', 'Close_Lag_5', 'Volume_Lag_5',
       'Close_Lag_10', 'Volume_Lag_10', 'Close_Roll_Mean_5',
       'Close_Roll_Std_5', 'Volume_Roll_Mean_5', 'Close_Roll_Mean_10',
       'Close_Roll_Std_10', 'Volume_Roll_Mean_10', 'Close_Roll_Mean_20',
       'Close_Roll_Std_20', 'Volume_Roll_Mean_20', 'Relative_Volume',
       'Day_of_Week', 'Is_Month_End', 'VWAP', 'High_Volume',
       'Significant_Price_Move', 'Volume_Spike_With_Price_Move'],
      dtype='object')

In [25]:
df.to_csv('reliance_stock_data_ml_ready.csv', index=False)