In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import r2_score
from xgboost import XGBRegressor
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler



In [None]:
# Function to fetch stock data using yfinance
def fetch_stock_data(symbol, interval='1d'):
    if interval in ['1m', '2m', '5m', '15m', '30m', '60m']:
        period = '1mo' if interval in ['60m','30m', '15m'] else '5d'
    elif interval in ['90m']:
        period = '1mo'
    elif interval in ['1d', '5d']:
        period = '10y'
    elif interval == '1wk':
        period = '10y'
    elif interval == '1mo':
        period = 'max'
    else:
        raise ValueError("Unsupported interval")

    stock = yf.Ticker(symbol)
    hist = stock.history(period=period, interval=interval)
    
    if (hist.empty) or (len(hist) < 60):
        print(f"No sufficient data available for {symbol} at {interval} interval")
        return None
    
    hist.index = hist.index.tz_localize(None)
    
    return hist



In [None]:
# Function to process stock data and compute technical indicators
def process_stock_data(stock_data, previous_candles, future_candles):
    stock_data.index = pd.to_datetime(stock_data.index)
    stock_data.sort_index(inplace=True)

    # Calculate various technical indicators
    stock_data['SMA_10'] = calculate_sma(stock_data, 10)
    stock_data['SMA_50'] = calculate_sma(stock_data, 50)
    stock_data['EMA_10'] = calculate_ema(stock_data, 10)
    stock_data['EMA_50'] = calculate_ema(stock_data, 50)
    stock_data['RSI'] = calculate_rsi(stock_data, 14)
    stock_data['MACD'], stock_data['MACD_signal'] = calculate_macd(stock_data)
    stock_data['BB_upper'], stock_data['BB_lower'] = calculate_bbands(stock_data)
    stock_data['Stoch'] = calculate_stoch(stock_data)
    stock_data['ATR'] = calculate_atr(stock_data)
    stock_data['OBV'] = calculate_obv(stock_data)
    stock_data['Williams_%R'] = calculate_williams_r(stock_data)
    stock_data['ADX'] = calculate_adx(stock_data, 14)
    stock_data['CCI'] = calculate_cci(stock_data, 20)
    stock_data['Momentum'] = calculate_momentum(stock_data, 10)
    stock_data['Super_Trend'] = calculate_super_trend(stock_data)
    stock_data['VWAP'] = calculate_vwap(stock_data)
    stock_data['Intraday_Volatility'] = (stock_data['High'] - stock_data['Low']) / stock_data['Close']

    stock_data[f'{previous_candles}_period_EMA'] = calculate_ema(stock_data, previous_candles)

    stock_data['Hour'] = stock_data.index.hour
    stock_data['DayOfWeek'] = stock_data.index.dayofweek

    return stock_data



In [None]:
# Function to combine data and prepare for model training
def combined_data(symbol, interval='1d', previous_candles=30, future_candles=30):
    stock_data = fetch_stock_data(symbol, interval)

    if stock_data is None:
        return None

    stock_data = process_stock_data(stock_data, previous_candles, future_candles)

    stock_data['Price_Difference'] = stock_data[f'{previous_candles}_period_EMA'].shift(-future_candles) - stock_data[f'{previous_candles}_period_EMA']

    stock_data.dropna(inplace=True)

    current_tech_indicators = stock_data[[
        'SMA_10', 'SMA_50', 'EMA_10', 'EMA_50', 'RSI', 'MACD', 'MACD_signal',
        'BB_upper', 'BB_lower', 'Stoch', 'ATR', 'OBV', 'Williams_%R', 'ADX',
        'CCI', 'Momentum', 'Super_Trend', 'VWAP', 'Intraday_Volatility',
        'Hour', 'DayOfWeek',
        'Price_Difference', f'{previous_candles}_period_EMA'
    ]].copy()
    
    current_tech_indicators['Datetime'] = stock_data.index

    return current_tech_indicators



In [None]:
# Technical indicators calculation functions
def calculate_sma(data, window):
    return data['Close'].rolling(window=window).mean()

def calculate_ema(data, window):
    return data['Close'].ewm(span=window, adjust=False).mean()

def calculate_rsi(data, window):
    delta = data['Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

def calculate_macd(data, fastperiod=12, slowperiod=26, signalperiod=9):
    exp1 = data['Close'].ewm(span=fastperiod, adjust=False).mean()
    exp2 = data['Close'].ewm(span=slowperiod, adjust=False).mean()
    macd = exp1 - exp2
    signal = macd.ewm(span=signalperiod, adjust=False).mean()
    return macd, signal

def calculate_bbands(data, window=20):
    sma = data['Close'].rolling(window=window).mean()
    std = data['Close'].rolling(window=window).std()
    upper_band = sma + (std * 2)
    lower_band = sma - (std * 2)
    return upper_band, lower_band

def calculate_stoch(data, window=14):
    low_min = data['Low'].rolling(window=window).min()
    high_max = data['High'].rolling(window=window).max()
    stoch_k = 100 * (data['Close'] - low_min) / (high_max - low_min)
    return stoch_k

def calculate_atr(data, window=14):
    high_low = data['High'] - data['Low']
    high_close = np.abs(data['High'] - data['Close'].shift())
    low_close = np.abs(data['Low'] - data['Close'].shift())
    tr = high_low.combine(high_close, max).combine(low_close, max)
    atr = tr.rolling(window=window).mean()
    return atr

def calculate_obv(data):
    obv = (np.sign(data['Close'].diff()) * data['Volume']).fillna(0).cumsum()
    return obv

def calculate_williams_r(data, window=14):
    high_max = data['High'].rolling(window=window).max()
    low_min = data['Low'].rolling(window=window).min()
    williams_r = (high_max - data['Close']) / (high_max - low_min) * -100
    return williams_r

def calculate_adx(data, window=14):
    high_low = data['High'] - data['Low']
    high_close = np.abs(data['High'] - data['Close'].shift())
    low_close = np.abs(data['Low'] - data['Close'].shift())
    tr = high_low.combine(high_close, max).combine(low_close, max)
    atr = tr.rolling(window=window).mean()
    up = data['High'] - data['High'].shift()
    down = data['Low'].shift() - data['Low']
    pos_dm = (up > down) & (up > 0)
    neg_dm = (down > up) & (down > 0)
    pos_dm = pos_dm.fillna(0).astype(int)
    neg_dm = neg_dm.fillna(0).astype(int)
    pos_dm = pos_dm * up
    neg_dm = neg_dm * down
    pos_dm = pos_dm.ewm(span=window, adjust=False).mean()
    neg_dm = neg_dm.ewm(span=window, adjust=False).mean()
    pos_di = 100 * (pos_dm / atr)
    neg_di = 100 * (neg_dm / atr)
    dx = 100 * np.abs((pos_di - neg_di) / (pos_di + neg_di))
    adx = dx.rolling(window=window).mean()
    return adx

def calculate_cci(data, window=20):
    tp = (data['High'] + data['Low'] + data['Close']) / 3
    sma_tp = tp.rolling(window=window).mean()
    mad = np.abs(tp - sma_tp).rolling(window=window).mean()
    cci = (tp - sma_tp) / (0.015 * mad)
    return cci

def calculate_momentum(data, window=10):
    momentum = data['Close'].diff(window - 1)
    return momentum

def calculate_super_trend(data, period=10, multiplier=3):
    atr = calculate_atr(data, period)
    hl2 = (data['High'] + data['Low']) / 2
    basic_upper_band = hl2 + (multiplier * atr)
    basic_lower_band = hl2 - (multiplier * atr)
    final_upper_band = basic_upper_band.copy()
    final_lower_band = basic_lower_band.copy()
    for i in range(1, len(data)):
        if data['Close'].iloc[i-1] > final_upper_band.iloc[i-1]:
            final_upper_band.iloc[i] = max(basic_upper_band.iloc[i], final_upper_band.iloc[i-1])
        else:
            final_upper_band.iloc[i] = basic_upper_band.iloc[i]

        if data['Close'].iloc[i-1] < final_lower_band.iloc[i-1]:
            final_lower_band.iloc[i] = min(basic_lower_band.iloc[i], final_lower_band.iloc[i-1])
        else:
            final_lower_band.iloc[i] = basic_lower_band.iloc[i]

    super_trend = data['Close'].copy()
    for i in range(len(data)):
        if data['Close'].iloc[i] > final_upper_band.iloc[i]:
            super_trend.iloc[i] = final_lower_band.iloc[i]
        elif data['Close'].iloc[i] < final_lower_band.iloc[i]:
            super_trend.iloc[i] = final_upper_band.iloc[i]
        else:
            super_trend.iloc[i] = super_trend.iloc[i-1]

    return super_trend

def calculate_vwap(data):
    return (data['Close'] * data['Volume']).cumsum() / data['Volume'].cumsum()



In [None]:
# Function to perform hyperparameter tuning for XGBoost model
def hyperparameter_tuning(X_train, y_train):
    param_grid = {
        'n_estimators': [50, 100, 150],
        'max_depth': [3, 6, 9],
        'learning_rate': [0.01, 0.1, 0.2]
    }
    xgb_regressor = XGBRegressor(random_state=0)
    grid_search = GridSearchCV(estimator=xgb_regressor, param_grid=param_grid, cv=5, scoring='r2')
    grid_search.fit(X_train, y_train)
    #print("Best parameters found: ", grid_search.best_params_)
    return grid_search.best_estimator_



In [None]:
# Main function to fetch and process data, and train the model
def fetch_data():
    companies = ['^NSEI', '^NSEBANK']
    
    intervals = ['1m', '2m', '5m', '15m', '30m', '60m','1d', '1wk','1mo']
    
    print("Select an interval for the candles:")
    for i, interval in enumerate(intervals, 1):
        print(f"{i}. {interval}")
    
    interval_choice = int(input("Enter the number corresponding to your choice: "))
    interval = intervals[interval_choice - 1]

    previous_candles = int(input("Enter the number of candles to be considered for previous EMA: "))
    future_candles = int(input("Enter the number of candles to be considered for future EMA: "))

    results = []

    for symbol in companies:
        data = combined_data(symbol, interval, previous_candles, future_candles)
        if data is None:
            print(f"No data available for {symbol}. Skipping...")
            continue

        X = data[['SMA_10', 'SMA_50', 'EMA_10', 'EMA_50', 'RSI', 'MACD', 'MACD_signal',
                  'BB_upper', 'BB_lower', 'Stoch', 'ATR', 'OBV', 'Williams_%R', 'ADX',
                  'CCI', 'Momentum', 'Super_Trend', 'VWAP', 'Intraday_Volatility',
                  'Hour', 'DayOfWeek']]
        y = data['Price_Difference']

        # Feature scaling
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)

        # PCA for feature selection
        pca = PCA()
        X_pca = pca.fit_transform(X_scaled)

        # Splitting data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

        # Hyperparameter tuning for XGBoost
        best_xgb = hyperparameter_tuning(X_train, y_train)

        # Predicting on test set
        y_pred = best_xgb.predict(X_test)

        # Calculating R-squared score
        r2 = r2_score(y_test, y_pred)
        accuracy = r2 * 100

        # Predicting future EMA
        recent_data = X_scaled[-previous_candles:].reshape(-1, X_scaled.shape[1])
        recent_data_pca = pca.transform(recent_data)
        predicted_price_diff = best_xgb.predict(recent_data_pca)
        final_ema = data[f'{previous_candles}_period_EMA'].iloc[-1]
        predicted_ema = final_ema + predicted_price_diff[-1]

        # Calculating percentage change
        percentage_change = (predicted_price_diff[-1] / final_ema) * 100

        results.append((symbol, percentage_change, accuracy, final_ema, predicted_ema))

    results.sort(key=lambda x: x[1], reverse=True)

    # Displaying results
    print("\nCompanies sorted by predicted percentage change:")
    for symbol, change, acc, initial_price, final_price in results:
        print(f"{symbol}: {change:.2f}%, Accuracy: {acc:.2f}%, Initial Price: {initial_price:.2f}, Final Price: {final_price:.2f}")


In [None]:
fetch_data()