# Import Required Libraries
Import necessary libraries such as pandas, numpy, sklearn, tensorflow, and matplotlib.

In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.feature_selection import RFECV
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import matplotlib.pyplot as plt
import logging
import tensorflow as tf
from tensorflow.python.client import device_lib

# Configure Logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')

# Configure Logging
Set up logging configuration to display information during execution.

In [14]:
# Configure Logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')

# Define Functions for Technical Indicators
Define functions to calculate technical indicators like SMA, EMA, RSI, ATR, Bollinger Bands, Momentum, ROC, Log Returns, High-Low Spread, and Open-Close Spread.

In [15]:
# Define Functions for Technical Indicators

def calculate_indicators(df):
    logging.info("Calculating technical indicators...")
    df['SMA_10'] = df['close'].rolling(window=10).mean()
    df['EMA_10'] = df['close'].ewm(span=10, adjust=False).mean()
    df['RSI'] = calculate_rsi(df['close'])
    df['ATR'] = calculate_atr(df)
    df['Bollinger_Upper'], df['Bollinger_Lower'] = calculate_bollinger_bands(df['close'])
    df['Momentum'] = df['close'] - df['close'].shift(10)
    df['ROC'] = (df['close'] - df['close'].shift(10)) / df['close'].shift(10)
    df['Log_Returns'] = np.log(df['close'] / df['close'].shift(1))
    df['High_Low_Spread'] = df['high'] - df['low']
    df['Open_Close_Spread'] = df['open'] - df['close']
    return df.dropna()

def calculate_rsi(series, window=14):
    delta = series.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs))

def calculate_atr(df, window=14):
    high_low = df['high'] - df['low']
    high_close = np.abs(df['high'] - df['close'].shift())
    low_close = np.abs(df['low'] - df['close'].shift())
    true_range = pd.concat([high_low, high_close, low_close], axis=1).max(axis=1)
    return true_range.rolling(window=window).mean()

def calculate_bollinger_bands(series, window=20, num_sd=2):
    sma = series.rolling(window=window).mean()
    std = series.rolling(window=window).std()
    upper_band = sma + (num_sd * std)
    lower_band = sma - (num_sd * std)
    return upper_band, lower_band

# Define Functions for Data Preparation and Plotting
Define functions to prepare LSTM input data, plot training loss, and plot predictions vs actual prices.

In [16]:
# Define Functions for Data Preparation and Plotting

def prepare_lstm_data(X, y, time_steps):
    logging.info("Preparing LSTM input data...")
    X_lstm, y_lstm = [], []
    for i in range(len(X) - time_steps):
        X_lstm.append(X[i:i + time_steps])
        y_lstm.append(y[i + time_steps])
    return np.array(X_lstm), np.array(y_lstm)

def plot_training_loss(history):
    plt.figure(figsize=(10, 6))
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

def plot_predictions(y_actual, y_pred):
    plt.figure(figsize=(10, 6))
    plt.plot(y_actual, label='Actual Prices')
    plt.plot(y_pred, label='Predicted Prices')
    plt.title('Actual vs Predicted Prices')
    plt.xlabel('Time Steps')
    plt.ylabel('Price')
    plt.legend()
    plt.show()

# Load and Preprocess Dataset
Load the dataset, convert date column to datetime, and set it as the index.

In [17]:
# Load the dataset
df = pd.read_csv("/content/sample_data/NIFTY_100_minute.csv")

# Convert date column to datetime
df['date'] = pd.to_datetime(df['date'])

# Set date column as the index
df.set_index('date', inplace=True)

# Display the first few rows of the dataframe
df.head()

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-01-09 09:15:00,8300.6,8309.75,8300.6,8308.35,0
2015-01-09 09:16:00,8308.35,8308.65,8304.05,8304.2,0
2015-01-09 09:17:00,8304.2,8308.2,8304.0,8308.2,0
2015-01-09 09:18:00,8308.2,8315.3,8308.2,8315.3,0
2015-01-09 09:19:00,8315.3,8316.1,8314.85,8316.1,0


# Feature Engineering
Calculate technical indicators and add them as features to the dataset.

In [18]:
# Calculate technical indicators and add them as features to the dataset
df = calculate_indicators(df)

# Display the first few rows of the dataframe with the new features
df.head()

Unnamed: 0_level_0,open,high,low,close,volume,SMA_10,EMA_10,RSI,ATR,Bollinger_Upper,Bollinger_Lower,Momentum,ROC,Log_Returns,High_Low_Spread,Open_Close_Spread
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2015-01-09 09:34:00,8297.1,8302.95,8296.75,8302.95,0,8303.52,8303.33786,39.450355,4.978571,8319.768393,8295.651607,-13.25,-0.001593,0.000626,6.2,-5.85
2015-01-09 09:35:00,8302.95,8303.3,8297.8,8297.8,0,8302.365,8302.330976,33.726813,5.2,8320.020817,8294.344183,-11.55,-0.00139,-0.00062,5.5,5.15
2015-01-09 09:36:00,8297.8,8300.35,8297.8,8299.05,0,8301.795,8301.734435,39.461467,4.903571,8320.213875,8293.636125,-5.7,-0.000686,0.000151,2.55,-1.25
2015-01-09 09:37:00,8298.0,8299.05,8291.6,8292.3,0,8300.295,8300.019083,36.324786,5.267857,8320.915818,8291.344182,-15.0,-0.001806,-0.000814,7.45,5.7
2015-01-09 09:38:00,8292.3,8295.65,8292.3,8295.6,0,8299.395,8299.215613,30.890538,4.942857,8319.983318,8290.306682,-9.0,-0.001084,0.000398,3.35,-3.3


# Feature Selection
Use RandomForestRegressor and RFECV to select important features.

In [None]:
# Define features and target
X = df.drop(columns=['close'])
y = df['close']

# Perform feature selection using RandomForestRegressor and RFECV
rf = RandomForestRegressor()
rfecv = RFECV(estimator=rf, step=1, cv=5, scoring='neg_mean_squared_error')
print(f'rfecv: {rfecv}')
rfecv.fit(X, y)
print(f'rfecv.fit(X, y): {rfecv.fit(X, y)}')

# Select important features
X_selected = X.iloc[:, rfecv.support_]

# Display selected features
X_selected.head()
print(f'X_selected: {X_selected.head()}')

rfecv: RFECV(cv=5, estimator=RandomForestRegressor(), scoring='neg_mean_squared_error')


# Train-Test Split
Split the dataset into training and testing sets.

In [None]:
# Perform train-test split
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, shuffle=False)

# Display the shapes of the training and testing sets
X_train.shape, X_test.shape, y_train.shape, y_test.shape

# Data Scaling
Scale the features using MinMaxScaler.

In [None]:
# Scale the features using MinMaxScaler
scaler = MinMaxScaler()

# Fit the scaler on the training data and transform both training and testing data
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Display the first few rows of the scaled training data
pd.DataFrame(X_train_scaled, columns=X_train.columns).head()

# Prepare Data for LSTM
Prepare the scaled data for LSTM model input.

In [None]:
# Prepare Data for LSTM

# Define the number of time steps
time_steps = 10

# Prepare the LSTM input data for training and testing sets
X_train_lstm, y_train_lstm = prepare_lstm_data(X_train_scaled, y_train.values, time_steps)
X_test_lstm, y_test_lstm = prepare_lstm_data(X_test_scaled, y_test.values, time_steps)

# Display the shapes of the LSTM input data
X_train_lstm.shape, y_train_lstm.shape, X_test_lstm.shape, y_test_lstm.shape

# Build and Train LSTM Model
Build and train the LSTM model using the prepared data.

In [None]:
# Build and Train LSTM Model

# Build the LSTM model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(X_train_lstm.shape[1], X_train_lstm.shape[2])),
    Dropout(0.2),
    LSTM(50, return_sequences=False),
    Dropout(0.2),
    Dense(1)
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the LSTM model
history = model.fit(X_train_lstm, y_train_lstm, epochs=50, batch_size=32, validation_data=(X_test_lstm, y_test_lstm), verbose=1)

# Plot Training Loss
plot_training_loss(history)

# Evaluate the model
y_pred = model.predict(X_test_lstm)
y_test_actual = y_test_lstm
y_pred_actual = scaler.inverse_transform(y_pred.reshape(-1, 1)).flatten()

# Calculate Metrics
mae = mean_absolute_error(y_test_actual, y_pred_actual)
rmse = np.sqrt(mean_squared_error(y_test_actual, y_pred_actual))
logging.info(f"Mean Absolute Error (MAE): {mae}")
logging.info(f"Root Mean Squared Error (RMSE): {rmse}")

# Plot Predictions
plot_predictions(y_test_actual, y_pred_actual)

# Evaluate Model
Evaluate the model's performance using metrics like MAE and RMSE.

In [None]:
# Evaluate the model
y_pred = model.predict(X_test_lstm)
y_test_actual = y_test_lstm
y_pred_actual = scaler.inverse_transform(y_pred.reshape(-1, 1)).flatten()

# Calculate Metrics
mae = mean_absolute_error(y_test_actual, y_pred_actual)
rmse = np.sqrt(mean_squared_error(y_test_actual, y_pred_actual))
logging.info(f"Mean Absolute Error (MAE): {mae}")
logging.info(f"Root Mean Squared Error (RMSE): {rmse}")

# Plot Predictions
plot_predictions(y_test_actual, y_pred_actual)

# Plot Results
Plot the training loss and predictions vs actual prices.

In [None]:
# Plot Training Loss
plot_training_loss(history)

# Evaluate the model
y_pred = model.predict(X_test_lstm)
y_test_actual = y_test_lstm
y_pred_actual = scaler.inverse_transform(y_pred.reshape(-1, 1)).flatten()

# Calculate Metrics
mae = mean_absolute_error(y_test_actual, y_pred_actual)
rmse = np.sqrt(mean_squared_error(y_test_actual, y_pred_actual))
logging.info(f"Mean Absolute Error (MAE): {mae}")
logging.info(f"Root Mean Squared Error (RMSE): {rmse}")

# Plot Predictions
plot_predictions(y_test_actual, y_pred_actual)