In [None]:
import talib
# Function to get the technical indicators dataframe
def get_technical_indicators(hist_data):
    """
    Assemble a dataframe of technical indicator series for an market index
    """
    hist_data.dropna(inplace=True)
    op = hist_data['Open']
    hi = hist_data['High']
    lo = hist_data['Low']
    cl = hist_data['Adj Close']
    vo = hist_data['Volume']
    
    df = pd.DataFrame()
    # Define Target variable
    df['Ret'] = (hist_data['Adj Close'].shift(-1) - hist_data['Adj Close']).rolling(5).mean()
    
    # Define features
    df['HLS'] = hi - lo
    df['COS'] = cl - op
    df['EMA'] = talib.EMA(cl, timeperiod=20)
    df['DEMA'] = talib.DEMA(cl, timeperiod=20)
    df['BBANDS_upper'], df['BBANDS_middle'], df['BBANDS_lower'] = talib.BBANDS(cl, timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)
    df['MACD_macd'], df['MACD_macdsignal'], df['MACD_macdhist'] = talib.MACD(cl, fastperiod=12, slowperiod=26, signalperiod=9)
    df['RSI'] = talib.RSI(cl, timeperiod=20)
    df['ADX'] = talib.ADX(hi, lo, cl, timeperiod=20)
    df['OBV'] = talib.OBV(cl, vo)
    df['WILLR'] = talib.WILLR(hi, lo, cl, timeperiod=30)
    
    return df

In [None]:
# Import Basic Libraries
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

import yfinance as yf

from sklearn.metrics import *
from sklearn.linear_model import Ridge
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit

In [None]:
nse_data = yf.download("^NSEI", start='2014-01-01', end='2023-02-28')
nse_data.reset_index(inplace=True)
nse_data.head()

In [None]:
split_size = 0.2
split_index = int(nse_data.shape[0]*(1 - 0.2))

In [None]:
train_set = nse_data.drop('Date', axis=1).iloc[:split_index]
test_set = nse_data.drop('Date', axis=1).iloc[split_index:]

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
train_set = pd.DataFrame(scaler.fit_transform(train_set), columns=train_set.columns)
test_set = pd.DataFrame(scaler.transform(test_set), columns=test_set.columns)

In [None]:
data = pd.concat([train_set, test_set], axis=0)
data.set_index(nse_data.Date, inplace=True)

In [None]:
features = get_technical_indicators(data)
features.dropna(axis=0, inplace=True)

In [None]:
X_train = features.drop('Ret', axis=1).iloc[:split_index]
X_test = features.drop('Ret', axis=1).iloc[split_index:]
y_train = features['Ret'].iloc[:split_index]
y_test = features['Ret'].iloc[split_index:]

In [None]:
pipe = Pipeline([('feat_sel', SelectKBest(f_regression)), ('model', Ridge(alpha=0.1))])

cv = TimeSeriesSplit(n_splits=5, test_size=10)

parameters = {'feat_sel__k': [i for i in range(1, len(features.columns))]}

clf = GridSearchCV(pipe, parameters, scoring='neg_root_mean_squared_error', cv=cv, verbose=1)
clf.fit(X_train, y_train)

In [None]:
clf.best_params_

In [None]:
feat_sel = SelectKBest(f_regression, k=clf.best_params_['feat_sel__k'])
X_train_rev = feat_sel.fit_transform(X_train, y_train)
X_test_rev = feat_sel.transform(X_test)
feat_sel.get_feature_names_out()

In [None]:
model = Ridge(alpha=0.1)
model.fit(X_train_rev, y_train)

In [None]:
train_pred = model.predict(X_train_rev)
test_pred = model.predict(X_test_rev)

In [None]:
train_result = pd.DataFrame(index=y_train.index)
train_result['train_pred'] = train_pred
train_result['y_train'] = y_train
train_result.head(500).plot()

In [None]:
score = {'train_r2': 0, 'train_mse': 0}
score['train_r2'] = r2_score(y_train, train_pred)
score['train_mse'] = np.sqrt(mean_squared_error(y_train, train_pred))
score

In [None]:
test_result = pd.DataFrame(index=y_test.index)
test_result['test_pred'] = test_pred
test_result['y_test'] = y_test
test_result.plot()

In [None]:
score = {'test_r2': 0, 'test_mse': 0}
score['test_r2'] = r2_score(y_test, test_pred)
score['test_mse'] = np.sqrt(mean_squared_error(y_test, test_pred))
score

In [None]:
vis = pd.concat([train_result, test_result], axis=0)
vis.plot(figsize=(10,6))