# Getting the list of all available expiry dates for NIFTY using the given API

In [69]:
import requests

# Define the API endpoint
api_url = "https://live.markethound.in/api/history/expiries?index=NIFTY"

# Send GET request to the API
response = requests.get(api_url)

# Check for successful response
if response.status_code == 200:
  # Parse the JSON response
  data = response.json()

  # Access the expiry dates
  expiry_dates = data["result"]

  # Print the expiry dates
  print("Expiry Dates for NIFTY:")
  for expiry in expiry_dates:
    print(expiry)
else:
  print(f"Error: API request failed with status code {response.status_code}")

Expiry Dates for NIFTY:
2024-03-14T00:00:00.000Z
2024-03-07T00:00:00.000Z
2024-02-29T00:00:00.000Z
2024-02-22T00:00:00.000Z
2024-02-15T00:00:00.000Z
2024-02-08T00:00:00.000Z
2024-02-01T00:00:00.000Z
2024-01-25T00:00:00.000Z
2024-01-18T00:00:00.000Z
2024-01-11T00:00:00.000Z
2024-01-04T00:00:00.000Z
2023-12-28T00:00:00.000Z
2023-12-21T00:00:00.000Z
2023-12-14T00:00:00.000Z
2023-12-07T00:00:00.000Z
2023-11-30T00:00:00.000Z
2023-11-23T00:00:00.000Z
2023-11-16T00:00:00.000Z
2023-11-09T00:00:00.000Z
2023-11-02T00:00:00.000Z
2023-10-26T00:00:00.000Z
2023-10-19T00:00:00.000Z
2023-10-12T00:00:00.000Z
2023-10-05T00:00:00.000Z
2023-09-28T00:00:00.000Z
2023-09-21T00:00:00.000Z
2023-09-14T00:00:00.000Z
2023-09-07T00:00:00.000Z
2023-08-31T00:00:00.000Z
2023-08-24T00:00:00.000Z
2023-08-17T00:00:00.000Z
2023-08-10T00:00:00.000Z
2023-08-03T00:00:00.000Z
2023-07-27T00:00:00.000Z
2023-07-20T00:00:00.000Z


## **Preparing the DataSet**
## Creating the dataframe of historic straddle and index datas retrieved from the given API

In [3]:
import pandas as pd

data_list1 = []

# iterating over all expiry dates
for expiry_date in expiry_dates:
  # collecting date from 0 to 4 only
  for dte in range(0,5):
   # Define the API endpoint
   api_url = f"https://live.markethound.in/api/history/decay?name=NIFTY&expiry={expiry_date}&dte={dte}"

   # Send GET request to the API
   response = requests.get(api_url)

   # Check for successful response
   if response.status_code == 200:
     # Parse the JSON response
     data = response.json()

     # Access the expiry dates
     results = data["result"]

     if results:
       # Create a list of dictionaries to store data
       # appending 1st element only because all other elements are of other expiry dates so they will be repeated if we take all of results
       data_list1.append(results[0])

   else:
     print(f"Error: API request failed with status code {response.status_code}")

# Create the DataFrame
df = pd.DataFrame(data_list1)

df

Unnamed: 0,_id,daysToExpiry,expiry,index,close,date,high,index_close,index_high,index_low,index_open,intradayMovement,intradayTotal,low,open,overnightExpected,overnightGap
0,65f2e9b5301dfda3b6b521fd,0,2024-03-14T00:00:00.000Z,NIFTY,3.40,2024-03-14T00:00:00.000Z,156.80,22144.95,22201.90,21928.65,21956.00,273.25,141.20,2.70,144.60,0,0
1,65f2e9b5301dfda3b6b5220a,1,2024-03-14T00:00:00.000Z,NIFTY,151.35,2024-03-13T00:00:00.000Z,224.55,21984.20,22411.65,21909.55,22403.05,502.10,36.30,151.35,187.65,6.75,28.20
2,65f2e9b5301dfda3b6b5221c,2,2024-03-14T00:00:00.000Z,NIFTY,198.55,2024-03-12T00:00:00.000Z,233.15,22335.65,22450.70,22261.40,22341.65,189.30,22.15,195.25,220.70,10.90,-67.40
3,65f2e9b5301dfda3b6b5222f,3,2024-03-14T00:00:00.000Z,NIFTY,230.55,2024-03-11T00:00:00.000Z,260.85,22325.40,22511.00,22311.20,22486.95,199.80,30.30,230.55,260.85,9.85,-16.25
4,65f2e9b5301dfda3b6b5223d,4,2024-03-14T00:00:00.000Z,NIFTY,254.45,2024-03-07T00:00:00.000Z,314.10,22483.60,22519.60,22439.20,22498.05,80.40,59.65,247.95,314.10,-6.40,-3.35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
168,6500482a50132f5a00c71af7,3,2023-07-27T00:00:00.000Z,NIFTY,173.05,2023-07-24T00:00:00.000Z,212.20,19663.65,19781.45,19663.65,19729.35,117.80,39.15,173.05,212.20,2.40,-34.10
169,6500482a50132f5a00c71b23,4,2023-07-27T00:00:00.000Z,NIFTY,215.80,2023-07-21T00:00:00.000Z,237.80,19772.00,19885.90,19709.10,19797.10,176.80,13.20,211.45,229.00,3.60,42.65
170,6500482450132f5a00c711cb,0,2023-07-20T00:00:00.000Z,NIFTY,20.10,2023-07-20T00:00:00.000Z,95.10,19967.70,19989.90,19759.65,19831.10,230.25,74.90,15.80,95.00,0,0
171,6500482450132f5a00c711fb,1,2023-07-20T00:00:00.000Z,NIFTY,94.65,2023-07-19T00:00:00.000Z,127.50,19839.55,19841.60,19730.65,19837.75,110.95,32.25,94.30,126.90,-0.35,8.45


In [4]:
# check if any null values are present
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 173 entries, 0 to 172
Data columns (total 17 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   _id                173 non-null    object
 1   daysToExpiry       173 non-null    int64 
 2   expiry             173 non-null    object
 3   index              173 non-null    object
 4   close              173 non-null    object
 5   date               173 non-null    object
 6   high               173 non-null    object
 7   index_close        173 non-null    object
 8   index_high         173 non-null    object
 9   index_low          173 non-null    object
 10  index_open         173 non-null    object
 11  intradayMovement   173 non-null    object
 12  intradayTotal      173 non-null    object
 13  low                173 non-null    object
 14  open               173 non-null    object
 15  overnightExpected  173 non-null    object
 16  overnightGap       173 non-null    object
dt

### Dropping unnecessary columns

In [102]:
# dropping the following columns, they are not relavant
df_dropped = df.drop(columns=['_id', 'index', 'expiry'])
df_dropped

Unnamed: 0,daysToExpiry,close,date,high,index_close,index_high,index_low,index_open,intradayMovement,intradayTotal,low,open,overnightExpected,overnightGap
0,0,3.40,2024-03-14T00:00:00.000Z,156.80,22144.95,22201.90,21928.65,21956.00,273.25,141.20,2.70,144.60,0,0
1,1,151.35,2024-03-13T00:00:00.000Z,224.55,21984.20,22411.65,21909.55,22403.05,502.10,36.30,151.35,187.65,6.75,28.20
2,2,198.55,2024-03-12T00:00:00.000Z,233.15,22335.65,22450.70,22261.40,22341.65,189.30,22.15,195.25,220.70,10.90,-67.40
3,3,230.55,2024-03-11T00:00:00.000Z,260.85,22325.40,22511.00,22311.20,22486.95,199.80,30.30,230.55,260.85,9.85,-16.25
4,4,254.45,2024-03-07T00:00:00.000Z,314.10,22483.60,22519.60,22439.20,22498.05,80.40,59.65,247.95,314.10,-6.40,-3.35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
168,3,173.05,2023-07-24T00:00:00.000Z,212.20,19663.65,19781.45,19663.65,19729.35,117.80,39.15,173.05,212.20,2.40,-34.10
169,4,215.80,2023-07-21T00:00:00.000Z,237.80,19772.00,19885.90,19709.10,19797.10,176.80,13.20,211.45,229.00,3.60,42.65
170,0,20.10,2023-07-20T00:00:00.000Z,95.10,19967.70,19989.90,19759.65,19831.10,230.25,74.90,15.80,95.00,0,0
171,1,94.65,2023-07-19T00:00:00.000Z,127.50,19839.55,19841.60,19730.65,19837.75,110.95,32.25,94.30,126.90,-0.35,8.45


## We will keep data till 12th march, because we have to predict for 12th march


In [130]:
# save the target column, i.e., 12th march's column
target_column = df_dropped.iloc[2]

# drop the excess columns
df_train = df_dropped.drop(df.index[:2])
df_train.reset_index(drop=True)
df_train

Unnamed: 0,daysToExpiry,close,date,high,index_close,index_high,index_low,index_open,intradayMovement,intradayTotal,low,open,overnightExpected,overnightGap
2,2,198.55,2024-03-12T00:00:00.000Z,233.15,22335.65,22450.70,22261.40,22341.65,189.30,22.15,195.25,220.70,10.90,-67.40
3,3,230.55,2024-03-11T00:00:00.000Z,260.85,22325.40,22511.00,22311.20,22486.95,199.80,30.30,230.55,260.85,9.85,-16.25
4,4,254.45,2024-03-07T00:00:00.000Z,314.10,22483.60,22519.60,22439.20,22498.05,80.40,59.65,247.95,314.10,-6.40,-3.35
5,0,6.55,2024-03-07T00:00:00.000Z,118.00,22483.60,22519.60,22439.20,22498.05,80.40,111.45,4.65,118.00,0,0
6,1,125.55,2024-03-06T00:00:00.000Z,190.60,22484.55,22486.55,22227.55,22304.55,259.00,59.80,123.10,185.35,7.55,-13.50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
168,3,173.05,2023-07-24T00:00:00.000Z,212.20,19663.65,19781.45,19663.65,19729.35,117.80,39.15,173.05,212.20,2.40,-34.10
169,4,215.80,2023-07-21T00:00:00.000Z,237.80,19772.00,19885.90,19709.10,19797.10,176.80,13.20,211.45,229.00,3.60,42.65
170,0,20.10,2023-07-20T00:00:00.000Z,95.10,19967.70,19989.90,19759.65,19831.10,230.25,74.90,15.80,95.00,0,0
171,1,94.65,2023-07-19T00:00:00.000Z,127.50,19839.55,19841.60,19730.65,19837.75,110.95,32.25,94.30,126.90,-0.35,8.45


# Performing Feature Engineering or Feature Extraction

In [131]:
# Feature engineering on straddle OHLC

for column in df_train.columns:
  if column != "date":
    df_train[column] = df_train[column].astype(float)

# 1. Moving Averages
df_train['SMA_5'] = df_train['close'].rolling(window=5).mean()
df_train['EMA_5'] = df_train['close'].ewm(span=5, min_periods=5).mean()

# 2. Log Returns
df_train['close'] = df_train['close'].astype(float)
df_train['log_return'] = np.log( df_train['close'] / df_train['close'].shift(1))

# 3) technical indicators
def add_technical_indicators(df):
  """
  Calculates and adds technical indicators to the DataFrame.

  Args:
      df (pd.DataFrame): DataFrame containing index and straddle price data.

  Returns:
      pd.DataFrame: DataFrame with added technical indicators.
  """
  # Stochastic Oscillator (%K, %D)
  df['high_window'] = df['high'].rolling(window=14).max()
  df['low_window'] = df['low'].rolling(window=14).min()
  df['K'] = 100 * (df['close'] - df['low_window']) / (df['high_window'] - df['low_window'])
  df['D'] = df['K'].ewm(span=3, min_periods=3).mean()

  # Average True Range (ATR)
  df['true_range'] = df[['high', 'low', 'close']].max(axis=1) - df[['high', 'low', 'close']].min(axis=1)
  df['average_gain'] = df['true_range'].where(df['close'] > df['open'], other=0).ewm(alpha=1/14, min_periods=14).mean()
  df['average_loss'] = -df['true_range'].where(df['close'] < df['open'], other=0).ewm(alpha=1/14, min_periods=14).mean()
  df['ATR'] = df[['average_gain', 'average_loss']].max(axis=1)

  # Relative Strength Index (RSI)
  df['delta'] = df['close'].diff()
  df['up'] = df['delta'].clip(lower=0)
  df['down'] = -df['delta'].clip(upper=0)
  df['ema_up'] = df['up'].ewm(alpha=1/14, min_periods=14).mean()
  df['ema_down'] = df['down'].ewm(alpha=1/14, min_periods=14).mean()
  df['RSI'] = 100 * df['ema_up'] / (df['ema_up'] + df['ema_down'])

  # Drop temporary columns
  df.drop(columns=['high_window', 'low_window', 'true_range', 'average_gain', 'average_loss', 'delta', 'up', 'down', 'ema_up', 'ema_down'], inplace=True)

  return df

# Add technical indicators
df_train = add_technical_indicators(df_train.copy())

df_train

Unnamed: 0,daysToExpiry,close,date,high,index_close,index_high,index_low,index_open,intradayMovement,intradayTotal,...,open,overnightExpected,overnightGap,SMA_5,EMA_5,log_return,K,D,ATR,RSI
2,2.0,198.55,2024-03-12T00:00:00.000Z,233.15,22335.65,22450.70,22261.40,22341.65,189.30,22.15,...,220.70,10.90,-67.40,,,,,,,
3,3.0,230.55,2024-03-11T00:00:00.000Z,260.85,22325.40,22511.00,22311.20,22486.95,199.80,30.30,...,260.85,9.85,-16.25,,,0.149427,,,,
4,4.0,254.45,2024-03-07T00:00:00.000Z,314.10,22483.60,22519.60,22439.20,22498.05,80.40,59.65,...,314.10,-6.40,-3.35,,,0.098637,,,,
5,0.0,6.55,2024-03-07T00:00:00.000Z,118.00,22483.60,22519.60,22439.20,22498.05,80.40,111.45,...,118.00,0.00,0.00,,,-3.659639,,,,
6,1.0,125.55,2024-03-06T00:00:00.000Z,190.60,22484.55,22486.55,22227.55,22304.55,259.00,59.80,...,185.35,7.55,-13.50,163.13,134.566114,2.953239,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
168,3.0,173.05,2023-07-24T00:00:00.000Z,212.20,19663.65,19781.45,19663.65,19729.35,117.80,39.15,...,212.20,2.40,-34.10,124.62,134.606491,0.240926,76.629936,63.911273,0.186814,52.949372
169,4.0,215.80,2023-07-21T00:00:00.000Z,237.80,19772.00,19885.90,19709.10,19797.10,176.80,13.20,...,229.00,3.60,42.65,129.55,161.670994,0.220771,90.502914,77.207094,0.173470,55.052942
170,0.0,20.10,2023-07-20T00:00:00.000Z,95.10,19967.70,19989.90,19759.65,19831.10,230.25,74.90,...,95.00,0.00,0.00,131.64,114.480663,-2.373632,6.022016,41.614555,0.161079,45.110211
171,1.0,94.65,2023-07-19T00:00:00.000Z,127.50,19839.55,19841.60,19730.65,19837.75,110.95,32.25,...,126.90,-0.35,8.45,127.92,107.870442,1.549466,38.204187,39.909371,0.149573,48.896529


## filling the NaN values using Iterative Imputer

In [132]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

# Create an IterativeImputer object
imputer = IterativeImputer(random_state=42, max_iter=1000)

# Fit the imputer on the DataFrame (excluding target variable if applicable)
imputer.fit(df_train[['SMA_5', 'EMA_5','log_return', 'K', 'D', 'ATR', 'RSI']])

# Transform the DataFrame using the fitted imputer
filled_columns = imputer.transform(df_train[['SMA_5', 'EMA_5', 'log_return', 'K', 'D', 'ATR', 'RSI']])
df_train[['SMA_5', 'EMA_5', 'log_return', 'K', 'D', 'ATR', 'RSI']] = filled_columns

# Print the DataFrame with imputed values
df_train

Unnamed: 0,daysToExpiry,close,date,high,index_close,index_high,index_low,index_open,intradayMovement,intradayTotal,...,open,overnightExpected,overnightGap,SMA_5,EMA_5,log_return,K,D,ATR,RSI
2,2.0,198.55,2024-03-12T00:00:00.000Z,233.15,22335.65,22450.70,22261.40,22341.65,189.30,22.15,...,220.70,10.90,-67.40,141.373271,141.298127,-0.002339,49.036791,49.025560,1.349798,49.933097
3,3.0,230.55,2024-03-11T00:00:00.000Z,260.85,22325.40,22511.00,22311.20,22486.95,199.80,30.30,...,260.85,9.85,-16.25,141.261549,140.855643,0.149427,49.626503,48.894788,1.352490,50.005663
4,4.0,254.45,2024-03-07T00:00:00.000Z,314.10,22483.60,22519.60,22439.20,22498.05,80.40,59.65,...,314.10,-6.40,-3.35,141.298938,141.003725,0.098637,49.429150,48.938553,1.351589,49.981378
5,0.0,6.55,2024-03-07T00:00:00.000Z,118.00,22483.60,22519.60,22439.20,22498.05,80.40,111.45,...,118.00,0.00,0.00,144.065566,151.961240,-3.659639,34.825761,52.176930,1.284944,48.184382
6,1.0,125.55,2024-03-06T00:00:00.000Z,190.60,22484.55,22486.55,22227.55,22304.55,259.00,59.80,...,185.35,7.55,-13.50,163.130000,134.566114,2.953239,42.805676,36.811819,1.344852,49.239242
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
168,3.0,173.05,2023-07-24T00:00:00.000Z,212.20,19663.65,19781.45,19663.65,19729.35,117.80,39.15,...,212.20,2.40,-34.10,124.620000,134.606491,0.240926,76.629936,63.911273,0.186814,52.949372
169,4.0,215.80,2023-07-21T00:00:00.000Z,237.80,19772.00,19885.90,19709.10,19797.10,176.80,13.20,...,229.00,3.60,42.65,129.550000,161.670994,0.220771,90.502914,77.207094,0.173470,55.052942
170,0.0,20.10,2023-07-20T00:00:00.000Z,95.10,19967.70,19989.90,19759.65,19831.10,230.25,74.90,...,95.00,0.00,0.00,131.640000,114.480663,-2.373632,6.022016,41.614555,0.161079,45.110211
171,1.0,94.65,2023-07-19T00:00:00.000Z,127.50,19839.55,19841.60,19730.65,19837.75,110.95,32.25,...,126.90,-0.35,8.45,127.920000,107.870442,1.549466,38.204187,39.909371,0.149573,48.896529


# **Performing Prediction**
## We are using LSTM model for our prediction process

In [134]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam

# Define lookback window
look_back = 1


def prepare_data(df_train, features):

  X_train, y_train = [], []

  for i in range(look_back, len(df_train)):
    past_days = df_train[features].iloc[i-look_back:i]
    # Convert features to numpy array
    past_days = past_days.to_numpy()
    # Reshape for LSTM input
    past_days = past_days.reshape(1, look_back, len(features))
    # Target OHLC values (Open, High, Low, Close)
    target = df_train[['open', 'high', 'low', 'close']].iloc[i].to_numpy()

    X_train.append(past_days)
    y_train.append(target)

    X_train_ = np.array(X_train)
    y_train_ = np.array(y_train)
    X_train_ = X_train_.astype(np.float32)
    y_train_ = y_train_.astype(np.float32)

    # Reshape X_train to remove the extra dimension
    X_train_ = X_train_.reshape(-1, look_back, len(features))  # -1 infers the first dimension

  return X_train_, y_train_


# Select features
features = df_train.columns.tolist()
features.remove('date')

# Convert data to numpy arrays
X_train, y_train = prepare_data(df_train.iloc[1:], features)  # Skip the first row for testing


# Define the LSTM model
model = Sequential()
model.add(LSTM(units=170, activation='relu', return_sequences=True, input_shape=(look_back, len(features))))
model.add(LSTM(units=154, activation='relu', return_sequences=True))
model.add(LSTM(units=137, activation='relu', return_sequences=True, kernel_regularizer=l2(0.01)))
model.add(LSTM(units=121, activation='relu', return_sequences=True))
model.add(LSTM(units=104, activation='relu', return_sequences=True, kernel_regularizer=l2(0.01)))
model.add(LSTM(units=88, activation='relu', return_sequences=True))
model.add(LSTM(units=71, activation='relu', return_sequences=True, kernel_regularizer=l2(0.1)))
model.add(LSTM(units=55, activation='relu', return_sequences=True))
model.add(LSTM(units=38, activation='relu', return_sequences=True, kernel_regularizer=l2(0.1)))
model.add(LSTM(units=22, activation='relu', return_sequences=True))
model.add(LSTM(units=8, activation='relu'))
model.add(Dense(units=4, activation='linear'))  # Output layer with 4 units for open, high, low, close


# Define optimizer with learning rate scheduler
optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=0.001, decay=1e-5)

# Compile the model
model.compile(loss='mae', optimizer=optimizer, metrics=['accuracy'])

# Monitor loss for 10 epochs
early_stopping = EarlyStopping(monitor='loss', patience=10)

# Train the model (adjust epochs based on your needs)
model.fit(X_train, y_train, epochs=100, batch_size=6, callbacks=[early_stopping])

# Prepare testing data (first row)
test_data = df_train[features].iloc[0].to_numpy()
test_data_ = test_data.astype(np.float32)
test_data_ = test_data_.reshape(1, look_back, len(features))

# Predict OHLC for the test data
predicted_ohlc = model.predict(test_data_)

# Print predicted OHLC values
print("Predicted OHLC:", predicted_ohlc[0])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Predicted OHLC: [224.91643 234.4111  189.33315 191.52945]


In [135]:
o = float(target_column['open'])
h = float(target_column['high'])
l = float(target_column['low'])
c = float(target_column['close'])

actual_ohlc = [o,h,l,c]

# Comparing the predicted results from the actual data

In [136]:
print("For NIFTY50 and March 12th 2024, these are the results for the straddle OHLC prediction:")
print("Predicted OHLC:", predicted_ohlc[0])
print("Actual OHLC:", actual_ohlc)

For NIFTY50 and March 12th 2024, these are the results for the straddle OHLC prediction:
Predicted OHLC: [224.91643 234.4111  189.33315 191.52945]
Actual OHLC: [220.7, 233.15, 195.25, 198.55]
