# Import libraries

In [73]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

# Load merged dataset
df = pd.read_csv(r"C:\Users\Radhika\OneDrive\Desktop\Radhika_klypto\data\nifty merged 5min.csv")
print(df.head())

  Index Name        Date  Open_spot  High_spot  Low_spot  Close_spot Symbol  \
0   NIFTY 50  2025-01-14   23165.90   23264.95  23134.15    23176.05    NaN   
1   NIFTY 50  2025-01-15   23250.45   23293.65  23146.45    23213.20    NaN   
2   NIFTY 50  2025-01-16   23377.25   23391.65  23272.05    23311.80    NaN   
3   NIFTY 50  2025-01-17   23277.10   23292.10  23100.35    23203.20    NaN   
4   NIFTY 50  2025-01-20   23290.40   23391.10  23170.65    23344.75    NaN   

  Expiry  Open_fut  High_fut  ...  DaysToExpiry   TradeDate  STRIKE  LTP_y  \
0    NaN       NaN       NaN  ...           NaN  2025-01-14     NaN    NaN   
1    NaN       NaN       NaN  ...           NaN  2025-01-15     NaN    NaN   
2    NaN       NaN       NaN  ...           NaN  2025-01-16     NaN    NaN   
3    NaN       NaN       NaN  ...           NaN  2025-01-17     NaN    NaN   
4    NaN       NaN       NaN  ...           NaN  2025-01-20     NaN    NaN   

   IV  VOLUME  OI  CHNG IN OI  OptionType  ATM  
0 NaN  

In [74]:
# Convert Date column to datetime
df['Date'] = pd.to_datetime(df['Date'])

# Sort data properly
df = df.sort_values('Date').reset_index(drop=True)

# EMA INDICATORS

In [75]:
# Fast EMA
df['EMA_5'] = df['Close_spot'].ewm(span=5, adjust=False).mean()

# Slow EMA
df['EMA_15'] = df['Close_spot'].ewm(span=15, adjust=False).mean()


# RETURNS

In [76]:
# Spot returns
df['Spot_Return'] = np.log(df['Close_spot'] / df['Close_spot'].shift(1))

# Futures returns (will be NaN until futures data exists)
df['Futures_Return'] = np.log(df['Close_fut'] / df['Close_fut'].shift(1))


# FUTURES BASIS

In [77]:
df['Futures_Basis'] = (df['Close_fut'] - df['Close_spot']) / df['Close_spot']

# Task 2.1

What we need:

Spot price

Strike

Time to expiry (in years)

IV

Risk-free rate = 6.5%

In [78]:
pip install pandas numpy py_vollib

Note: you may need to restart the kernel to use updated packages.


In [79]:
from py_vollib.black_scholes.greeks.analytical import delta, gamma, theta, vega, rho

RISK_FREE_RATE = 0.065

## Delta

In [80]:
df['Delta'] = df.apply(
    lambda x: delta(
        'c' if x['OptionType'] == 'CE' else 'p',
        x['Close_spot'],
        x['STRIKE'],
        x['Time_to_Expiry'],
        RISK_FREE_RATE,
        x['IV'])
    if pd.notna(x['IV']) else np.nan, axis=1)

# Gamma

In [81]:
df['Gamma'] = df.apply(
    lambda x: gamma(
        'c' if x['OptionType'] == 'CE' else 'p',
        x['Close_spot'],
        x['STRIKE'],
        x['Time_to_Expiry'],
        RISK_FREE_RATE,
        x['IV'])
    if pd.notna(x['IV']) else np.nan,axis=1)

## Theta

In [82]:
df['Theta'] = df.apply(
    lambda x: theta(
        'c' if x['OptionType'] == 'CE' else 'p',
        x['Close_spot'],
        x['STRIKE'],
        x['Time_to_Expiry'],
        RISK_FREE_RATE,
        x['IV']) 
    if pd.notna(x['IV']) else np.nan,axis=1)

## Vega

In [83]:
df['Vega'] = df.apply(
    lambda x: vega(
        'c' if x['OptionType'] == 'CE' else 'p',
        x['Close_spot'],
        x['STRIKE'],
        x['Time_to_Expiry'],
        RISK_FREE_RATE,
        x['IV']) 
    if pd.notna(x['IV']) else np.nan,axis=1)

## Rho

In [84]:
df['Rho'] = df.apply(
    lambda x: rho(
        'c' if x['OptionType'] == 'CE' else 'p',
        x['Close_spot'],
        x['STRIKE'],
        x['Time_to_Expiry'],
        RISK_FREE_RATE,
        x['IV'])
    if pd.notna(x['IV']) else np.nan,axis=1)

## PCR_OI & PCR_VOLUME

In [85]:
df['PCR_OI'] = df.groupby('Date')['OI'].transform(
    lambda x: x[df['OptionType'] == 'PE'].sum() /
              x[df['OptionType'] == 'CE'].sum())

df['PCR_VOLUME'] = df.groupby('Date')['VOLUME'].transform(
    lambda x: x[df['OptionType'] == 'PE'].sum() /
              x[df['OptionType'] == 'CE'].sum())

## Gamma_Exposure

In [86]:
df['Gamma_Exposure'] = df['Close_spot'] * df['Gamma'] * df['OI']

## Call_IV, Put_IV, Average_IV, IV_Spread

In [87]:
df['Call_IV'] = df['IV'].where(df['OptionType'] == 'CE')
df['Put_IV'] = df['IV'].where(df['OptionType'] == 'PE')

df['Average_IV'] = (df['Call_IV'] + df['Put_IV']) / 2

df['IV_Spread'] = df['Call_IV'] - df['Put_IV']

## Futures_Basis

In [88]:
df['Futures_Basis'] = (df['Close_fut'] - df['Close_spot']) / df['Close_spot']

## Spot_Return & Futures_Return

In [89]:
df['Spot_Return'] = np.log(df['Close_spot'] / df['Close_spot'].shift(1))

df['Futures_Return'] = np.log(df['Close_fut'] / df['Close_fut'].shift(1))

## Call_Delta, Put_Delta, Delta_Neutral_Ratio

In [90]:
df['Call_Delta'] = df['Delta'].where(df['OptionType'] == 'CE')
df['Put_Delta'] = df['Delta'].where(df['OptionType'] == 'PE')

df['Delta_Neutral_Ratio'] = abs(df['Call_Delta']) / abs(df['Put_Delta'])

In [91]:
final_features = df.copy()

final_features.to_csv(r"C:\Users\Radhika\OneDrive\Desktop\Radhika_klypto\data\nifty_features_5min.csv", index=False)