In [1]:
# Import libraries

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import glob
import os
import datetime
from datetime import timedelta

# Import plotly express for EF plot
# import plotly.express as px
# import plotly.graph_objects as go
# px.defaults.width, px.defaults.height = 1000,600
# Set precision
pd.set_option('display.precision', 4)

In [2]:
# Specify the columns you want to import
columns_to_import = ['quote_datetime', 'strike', 'option_type', 'bid', 'ask', 'underlying_bid', 'underlying_ask', 'DTE']

# Define the directory containing the CSV files
directory = r'min_dte1'

# Use glob to get all the CSV files in the directory
file_paths = glob.glob(os.path.join(directory, '*.csv'))

# Initialize a list to store DataFrames
dataframes = []

# Loop through the files
for file_path in file_paths:
    # Read the CSV file
    df = pd.read_csv(file_path, skipinitialspace=True, usecols=columns_to_import)
    dataframes.append(df)
    

# Concatenate all DataFrames into one
df_option = pd.concat(dataframes, ignore_index=True)

In [3]:
# df_option.to_csv('colab_option.csv')

In [4]:
# Convert 'quote_datetime' to datetime
df_option['quote_datetime'] = pd.to_datetime(df_option['quote_datetime'])

In [6]:
# # Calculate the number of unique days
# num_unique_days = df_option['quote_datetime'].dt.date.nunique()

# # Display the result
# print(num_unique_days)

In [7]:
# Specify the columns you want to import
columns_to_import = ['Date_plus_1min', 'Open', 'High', 'Low', 'Close', 'prev_close', 'PDL', 'PDH', 'gap_new931', 'prev_day_neg', 'range', 'realized_volatility_post30min', 'realized_volatility_full_day', 'sma_50', 'sma_21', 'intraday_sma_11', 'below_sma_50', 'day_close', 'intraday_sma_20']

# Import only the OHLC columns
df_spot = pd.read_csv('E:\spot_file123.csv', usecols=columns_to_import, parse_dates=['Date_plus_1min'])

# Ensure the 'Date' column is in datetime format
df_spot['Date_plus_1min'] = pd.to_datetime(df_spot['Date_plus_1min'])

# Sort the DataFrame by the 'Date' column
df_spot = df_spot.sort_values(by='Date_plus_1min')

# Replace commas and convert columns to float
df_spot[['Close', 'prev_close', 'PDL', 'PDH', 'gap_new931', 'range', 'Open', 'High', 'Low', 'realized_volatility_post30min', 'realized_volatility_full_day', 'sma_50', 'sma_21', 'intraday_sma_11', 'day_close', 'intraday_sma_20']] = df_spot[['Close', 'prev_close', 'PDL', 'PDH', 'gap_new931', 'range', 'Open', 'High', 'Low', 'realized_volatility_post30min', 'realized_volatility_full_day', 'sma_50', 'sma_21', 'intraday_sma_11', 'day_close', 'intraday_sma_20']].replace(
    {',': ''}, regex=True).astype(float)

In [8]:
df_spot.head()

Unnamed: 0,Open,High,Low,Close,Date_plus_1min,day_close,prev_close,PDL,PDH,prev_day_neg,range,realized_volatility_post30min,realized_volatility_full_day,sma_50,sma_21,below_sma_50,gap_new931,intraday_sma_11,intraday_sma_20
0,2683.73,2686.18,2683.73,2685.58,2018-01-02 09:31:00,2695.81,2673.61,2673.61,2692.12,yes,18.51,0.0378,0.0401,2620.0874,2665.839,No,11.97,2685.58,2685.58
1,2685.6,2685.6,2684.3,2684.3,2018-01-02 09:32:00,2695.81,2673.61,2673.61,2692.12,yes,18.51,0.0378,0.0401,2620.0874,2665.839,No,11.97,2684.94,2684.94
2,2684.32,2685.74,2684.23,2685.3,2018-01-02 09:33:00,2695.81,2673.61,2673.61,2692.12,yes,18.51,0.0378,0.0401,2620.0874,2665.839,No,11.97,2685.06,2685.06
3,2685.18,2685.18,2683.73,2683.73,2018-01-02 09:34:00,2695.81,2673.61,2673.61,2692.12,yes,18.51,0.0378,0.0401,2620.0874,2665.839,No,11.97,2684.7275,2684.7275
4,2683.8,2684.18,2683.05,2683.07,2018-01-02 09:35:00,2695.81,2673.61,2673.61,2692.12,yes,18.51,0.0378,0.0401,2620.0874,2665.839,No,11.97,2684.396,2684.396


In [9]:
df_spot['gap_new931931'] = df_spot['gap_new931'].where(df_spot['Date_plus_1min'].dt.time == pd.to_datetime('09:31:00').time())
df_spot['gap_new931931'] = df_spot['gap_new931931'].ffill() 

In [10]:
df_spot['date1'] = df_spot['Date_plus_1min'].dt.date 

In [11]:
df_spot['high931'] = df_spot['High'].where(df_spot['Date_plus_1min'].dt.time == pd.to_datetime('09:31:00').time())
df_spot['high931'] = df_spot['high931'].ffill() 

df_spot['low931'] = df_spot['Low'].where(df_spot['Date_plus_1min'].dt.time == pd.to_datetime('09:31:00').time())
df_spot['low931'] = df_spot['low931'].ffill() 

df_spot['close931'] = df_spot['Close'].where(df_spot['Date_plus_1min'].dt.time == pd.to_datetime('09:31:00').time())
df_spot['close931'] = df_spot['close931'].ffill() 

df_spot['open931'] = df_spot['Open'].where(df_spot['Date_plus_1min'].dt.time == pd.to_datetime('09:31:00').time())
df_spot['open931'] = df_spot['open931'].ffill()

In [12]:
df_option = df_option[df_option['DTE']==0]
del df_option['DTE']

In [13]:
# Filter rows where the time falls between 9:35 AM and 4:10 PM
start_time = pd.to_datetime('09:31:00').time()
end_time = pd.to_datetime('16:00:00').time()

# Apply the filter based on the time component of 'quote_datetime'
df_option = df_option[(df_option['quote_datetime'].dt.time >= start_time) & (df_option['quote_datetime'].dt.time <= end_time)]

In [14]:
# Group by date and calculate the number of unique times per date
df_option = df_option.groupby(df_option['quote_datetime'].dt.date).filter(
    lambda x: len(x['quote_datetime'].dt.time.unique()) == 390
)

In [15]:
# # Calculate the number of unique days
# num_unique_days = df_option['quote_datetime'].dt.date.nunique()

# # Display the result
# print(num_unique_days)

In [16]:
# Pivot the DataFrame based on 'option_type'
df_option = df_option.pivot_table(index=['quote_datetime', 'strike', 'underlying_bid', 'underlying_ask'], 
                          columns='option_type', 
                          values=['bid', 'ask'], 
                          aggfunc='first')

# Flatten the column MultiIndex
df_option.columns = [f'{col[0]}_{col[1]}' for col in df_option.columns]

# Reset the index to turn it back into a standard DataFrame
df_option = df_option.reset_index()

In [17]:
df_option.columns

Index(['quote_datetime', 'strike', 'underlying_bid', 'underlying_ask', 'ask_C',
       'ask_P', 'bid_C', 'bid_P'],
      dtype='object')

In [18]:
# df1 = df.copy()

In [19]:
df_option = df_option.sort_values(by='quote_datetime')

In [20]:
# Extract date and time components for filtering
df_spot['date'] = df_spot['Date_plus_1min'].dt.date
df_spot['time'] = df_spot['Date_plus_1min'].dt.time

In [21]:
def find_first_high_after_noon(df):
    # Ensure the 'time' column is in datetime format
    #df['time'] = pd.to_datetime(df['time'])
    
    # Define the time ranges
    before_noon = pd.Timestamp('10:32').time()
    start_time = pd.Timestamp('10:32').time()
    end_time = pd.Timestamp('14:30').time()
    start_filter_time = pd.Timestamp('09:32').time()
    
    # List to store results
    results = []

    # Group by each date
    for date, group in df.groupby('date'):
        # Filter rows before 12:00
        before_noon_group = group[(group['time'] >= start_filter_time) & (group['time'] < before_noon)]
        
        if not before_noon_group.empty:
            # Find the max 'High' before 12:00
            max_high_before_noon = before_noon_group['High'].max()
            
            # Filter rows between 12:00 and 14:00
            noon_to_two_group = group[(group['time'] >= start_time) & (group['time'] <= end_time)]
            
            # Find the first row where 'High' exceeds max_high_before_noon
            higher_high = noon_to_two_group[noon_to_two_group['High'] > max_high_before_noon]
            
            if not higher_high.empty:
                # Get the first timestamp where the condition is met
                first_higher_high_time = higher_high['time'].iloc[0]
                results.append({'date': date, 'first_higher_high_time': first_higher_high_time})
            # If no higher high, skip the group (do nothing)
    
    # Create a DataFrame from the results
    result_df = pd.DataFrame(results)
    
    # Merge back with the original DataFrame if needed
    df = df.merge(result_df, on='date', how='left')
    
    return df

In [22]:
#Example usage
df_spot = find_first_high_after_noon(df_spot)

In [23]:
def calculate_day_low_till_entry(df):
    df['day_low_till_entry'] = np.nan
    
    for date, group in df.groupby('date'):
        # Filter rows before the specific time for the date
        before_group = group[group['time'] < group['first_higher_high_time']]
        
        if not before_group.empty:
            # Find the min 'Low' before the specified time
            min_low = before_group['Low'].min()
            df.loc[group.index, 'day_low_till_entry'] = min_low
    
    return df

In [24]:
df_spot = calculate_day_low_till_entry(df_spot)

In [25]:
df_spot.head()

Unnamed: 0,Open,High,Low,Close,Date_plus_1min,day_close,prev_close,PDL,PDH,prev_day_neg,...,gap_new931931,date1,high931,low931,close931,open931,date,time,first_higher_high_time,day_low_till_entry
0,2683.73,2686.18,2683.73,2685.58,2018-01-02 09:31:00,2695.81,2673.61,2673.61,2692.12,yes,...,11.97,2018-01-02,2686.18,2683.73,2685.58,2683.73,2018-01-02,09:31:00,10:37:00,2682.36
1,2685.6,2685.6,2684.3,2684.3,2018-01-02 09:32:00,2695.81,2673.61,2673.61,2692.12,yes,...,11.97,2018-01-02,2686.18,2683.73,2685.58,2683.73,2018-01-02,09:32:00,10:37:00,2682.36
2,2684.32,2685.74,2684.23,2685.3,2018-01-02 09:33:00,2695.81,2673.61,2673.61,2692.12,yes,...,11.97,2018-01-02,2686.18,2683.73,2685.58,2683.73,2018-01-02,09:33:00,10:37:00,2682.36
3,2685.18,2685.18,2683.73,2683.73,2018-01-02 09:34:00,2695.81,2673.61,2673.61,2692.12,yes,...,11.97,2018-01-02,2686.18,2683.73,2685.58,2683.73,2018-01-02,09:34:00,10:37:00,2682.36
4,2683.8,2684.18,2683.05,2683.07,2018-01-02 09:35:00,2695.81,2673.61,2673.61,2692.12,yes,...,11.97,2018-01-02,2686.18,2683.73,2685.58,2683.73,2018-01-02,09:35:00,10:37:00,2682.36


In [26]:
# Calculate rolling mean (SMA) and standard deviation for each date
df_spot['Rolling_MA'] = (
    df_spot.groupby(df_spot['Date_plus_1min'].dt.date)['Close']
    .transform(lambda x: x.rolling(window=20, min_periods=1).mean())
)

df_spot['Rolling_STD'] = (
    df_spot.groupby(df_spot['Date_plus_1min'].dt.date)['Close']
    .transform(lambda x: x.rolling(window=20, min_periods=1).std())
)

# Calculate Upper and Lower Bollinger Bands
df_spot['Upper_Band'] = df_spot['Rolling_MA'] + 2 * df_spot['Rolling_STD']
df_spot['Lower_Band'] = df_spot['Rolling_MA'] - 1.7 * df_spot['Rolling_STD']

In [27]:
def low_find(df):
    # Ensure 'low_find' is initialized as False
    df['low_find'] = False

    # Group by each date
    for date, group in df.groupby('date'):
        # Check if 'first_higher_high_time' is NaN for the group
        first_higher_high_time_str = group['first_higher_high_time'].iloc[0]
        if pd.isna(first_higher_high_time_str):
            continue  # Skip this group if 'first_higher_high_time' is NaN
        
        # Calculate the 10-minute offset
        ten_min_offset = pd.Timedelta(minutes=10)
        
        # Convert 'first_higher_high_time' to a datetime object
        first_higher_high_time = pd.to_datetime(str(date) + ' ' + str(first_higher_high_time_str))
        
        # Filter rows within the 10-minute window after the first higher high time
        before_noon_group = group[(group['Date_plus_1min'] > first_higher_high_time) & 
                                  (group['Date_plus_1min'] <= first_higher_high_time + ten_min_offset)]
        
        # Identify the low candle time within the specified range
        low_candle = group.loc[(group['Date_plus_1min'] == first_higher_high_time), 'Low']
        
        if not low_candle.empty:
            # Print debug information
            # print(f"Date: {date}, First Higher High Time: {first_higher_high_time}")
            # print(f"Low Candle: {low_candle.values}")
        
            if not before_noon_group.empty:
                # Find the min 'Close' within the 10-minute window
                max_high_before_noon = before_noon_group['Close'].min()
                # print(f"Max High Before Noon: {max_high_before_noon}")
                
                # Update 'low_find' based on the condition
                df.loc[group.index, 'low_find'] = low_candle.values[0] > max_high_before_noon
    
    return df

In [28]:
df_spot = low_find(df_spot)

In [29]:
df_spot.head()   

Unnamed: 0,Open,High,Low,Close,Date_plus_1min,day_close,prev_close,PDL,PDH,prev_day_neg,...,open931,date,time,first_higher_high_time,day_low_till_entry,Rolling_MA,Rolling_STD,Upper_Band,Lower_Band,low_find
0,2683.73,2686.18,2683.73,2685.58,2018-01-02 09:31:00,2695.81,2673.61,2673.61,2692.12,yes,...,2683.73,2018-01-02,09:31:00,10:37:00,2682.36,2685.58,,,,True
1,2685.6,2685.6,2684.3,2684.3,2018-01-02 09:32:00,2695.81,2673.61,2673.61,2692.12,yes,...,2683.73,2018-01-02,09:32:00,10:37:00,2682.36,2684.94,0.9051,2686.7502,2683.4013,True
2,2684.32,2685.74,2684.23,2685.3,2018-01-02 09:33:00,2695.81,2673.61,2673.61,2692.12,yes,...,2683.73,2018-01-02,09:33:00,10:37:00,2682.36,2685.06,0.6729,2686.4058,2683.9161,True
3,2685.18,2685.18,2683.73,2683.73,2018-01-02 09:34:00,2695.81,2673.61,2673.61,2692.12,yes,...,2683.73,2018-01-02,09:34:00,10:37:00,2682.36,2684.7275,0.8626,2686.4527,2683.2611,True
4,2683.8,2684.18,2683.05,2683.07,2018-01-02 09:35:00,2695.81,2673.61,2673.61,2692.12,yes,...,2683.73,2018-01-02,09:35:00,10:37:00,2682.36,2684.396,1.0524,2686.5008,2682.6069,True


In [30]:
df_spot1 = df_spot.copy()

In [31]:
df_spot1 = df_spot1[df_spot1['first_higher_high_time'].notna()]

In [32]:
df_spot1 = df_spot1[df_spot1['time'] >= df_spot1['first_higher_high_time']]

In [33]:
df_spot1.head()

Unnamed: 0,Open,High,Low,Close,Date_plus_1min,day_close,prev_close,PDL,PDH,prev_day_neg,...,open931,date,time,first_higher_high_time,day_low_till_entry,Rolling_MA,Rolling_STD,Upper_Band,Lower_Band,low_find
66,2692.09,2692.39,2692.06,2692.33,2018-01-02 10:37:00,2695.81,2673.61,2673.61,2692.12,yes,...,2683.73,2018-01-02,10:37:00,10:37:00,2682.36,2691.0175,0.6846,2692.3868,2689.8536,True
67,2692.35,2692.35,2690.16,2690.16,2018-01-02 10:38:00,2695.81,2673.61,2673.61,2692.12,yes,...,2683.73,2018-01-02,10:38:00,10:37:00,2682.36,2691.016,0.6866,2692.3891,2689.8488,True
68,2690.07,2690.07,2688.18,2688.76,2018-01-02 10:39:00,2695.81,2673.61,2673.61,2692.12,yes,...,2683.73,2018-01-02,10:39:00,10:37:00,2682.36,2690.9205,0.8505,2692.6215,2689.4747,True
69,2688.77,2689.92,2688.75,2689.84,2018-01-02 10:40:00,2695.81,2673.61,2673.61,2692.12,yes,...,2683.73,2018-01-02,10:40:00,10:37:00,2682.36,2690.8935,0.8767,2692.6469,2689.4031,True
70,2689.82,2690.07,2689.47,2690.07,2018-01-02 10:41:00,2695.81,2673.61,2673.61,2692.12,yes,...,2683.73,2018-01-02,10:41:00,10:37:00,2682.36,2690.876,0.8901,2692.6561,2689.3629,True


In [34]:
# Calculate the number of unique days
num_unique_days = df_spot1['Date_plus_1min'].dt.date.nunique()

# Display the result
print(num_unique_days)

1085


In [35]:
def find_lbb(df):
    filtered_df_list = []
    
    # Group by each date
    for date, group in df.groupby('date'):
        # Ensure 'quote_datetime' is sorted
        group = group.sort_values(by='Date_plus_1min')
                
        # Apply condition: spot < instrady_ma in the last 30 minutes
        condition = group['Close'] > group['Lower_Band']
        
        # Find the first row where the condition fails
        if condition.any():
            first_fail_idx = condition[~condition].index[0] if (~condition).any() else None
            
            # Keep rows up to the first failure in group_last_30
            if first_fail_idx is not None:
                filtered_group = group.loc[first_fail_idx:]
            
            filtered_df_list.append(filtered_group)
    
    return pd.concat(filtered_df_list, ignore_index=True)
    
    # Concatenate all filtered DataFrames into a single DataFrame
    # return pd.concat(filtered_df_list, ignore_index=True) if filtered_df_list else pd.DataFrame()

In [36]:
df_spot1 = find_lbb(df_spot1)

In [37]:
df_spot1.head()

Unnamed: 0,Open,High,Low,Close,Date_plus_1min,day_close,prev_close,PDL,PDH,prev_day_neg,...,open931,date,time,first_higher_high_time,day_low_till_entry,Rolling_MA,Rolling_STD,Upper_Band,Lower_Band,low_find
0,2690.07,2690.07,2688.18,2688.76,2018-01-02 10:39:00,2695.81,2673.61,2673.61,2692.12,yes,...,2683.73,2018-01-02,10:39:00,10:37:00,2682.36,2690.9205,0.8505,2692.6215,2689.4747,True
1,2688.77,2689.92,2688.75,2689.84,2018-01-02 10:40:00,2695.81,2673.61,2673.61,2692.12,yes,...,2683.73,2018-01-02,10:40:00,10:37:00,2682.36,2690.8935,0.8767,2692.6469,2689.4031,True
2,2689.82,2690.07,2689.47,2690.07,2018-01-02 10:41:00,2695.81,2673.61,2673.61,2692.12,yes,...,2683.73,2018-01-02,10:41:00,10:37:00,2682.36,2690.876,0.8901,2692.6561,2689.3629,True
3,2690.06,2690.75,2689.98,2690.28,2018-01-02 10:42:00,2695.81,2673.61,2673.61,2692.12,yes,...,2683.73,2018-01-02,10:42:00,10:37:00,2682.36,2690.88,0.887,2692.6541,2689.372,True
4,2690.28,2690.46,2689.85,2689.94,2018-01-02 10:43:00,2695.81,2673.61,2673.61,2692.12,yes,...,2683.73,2018-01-02,10:43:00,10:37:00,2682.36,2690.8515,0.9085,2692.6684,2689.3071,True


In [38]:
def find_close_high(df):
    filtered_df_list = []
    
    # Group by each date
    for date, group in df.groupby('date'):
        # Ensure 'Date_plus_1min' is sorted
        group = group.sort_values(by='Date_plus_1min')
        
        # Filter rows based on 'first_higher_high_time' condition
        group = group[group['Date_plus_1min'].dt.time >= group['first_higher_high_time'].iloc[0]]
        
        # Iterate through rows to find the first instance where Close > prev Close
        for i in range(1, len(group)):  # Start from the second row
            if group['Close'].iloc[i] > group['Close'].iloc[i - 1]:
                # Filter from this row onward
                filtered_group = group.iloc[i:]
                filtered_df_list.append(filtered_group)
                break  # Stop after finding the first instance
    
    # Concatenate all filtered DataFrames into a single DataFrame
    return pd.concat(filtered_df_list, ignore_index=True) if filtered_df_list else pd.DataFrame()

In [39]:
df_spot1 = find_close_high(df_spot1)

In [40]:
df_spot1['first_time'] = df_spot1.groupby('date')['time'].transform('first').ffill()

In [41]:
df_spot1.head()

Unnamed: 0,Open,High,Low,Close,Date_plus_1min,day_close,prev_close,PDL,PDH,prev_day_neg,...,date,time,first_higher_high_time,day_low_till_entry,Rolling_MA,Rolling_STD,Upper_Band,Lower_Band,low_find,first_time
0,2688.77,2689.92,2688.75,2689.84,2018-01-02 10:40:00,2695.81,2673.61,2673.61,2692.12,yes,...,2018-01-02,10:40:00,10:37:00,2682.36,2690.8935,0.8767,2692.6469,2689.4031,True,10:40:00
1,2689.82,2690.07,2689.47,2690.07,2018-01-02 10:41:00,2695.81,2673.61,2673.61,2692.12,yes,...,2018-01-02,10:41:00,10:37:00,2682.36,2690.876,0.8901,2692.6561,2689.3629,True,10:40:00
2,2690.06,2690.75,2689.98,2690.28,2018-01-02 10:42:00,2695.81,2673.61,2673.61,2692.12,yes,...,2018-01-02,10:42:00,10:37:00,2682.36,2690.88,0.887,2692.6541,2689.372,True,10:40:00
3,2690.28,2690.46,2689.85,2689.94,2018-01-02 10:43:00,2695.81,2673.61,2673.61,2692.12,yes,...,2018-01-02,10:43:00,10:37:00,2682.36,2690.8515,0.9085,2692.6684,2689.3071,True,10:40:00
4,2689.94,2690.14,2689.89,2690.12,2018-01-02 10:44:00,2695.81,2673.61,2673.61,2692.12,yes,...,2018-01-02,10:44:00,10:37:00,2682.36,2690.8385,0.9174,2692.6732,2689.279,True,10:40:00


In [42]:
# df_spot1[df_spot1['date'] == pd.to_datetime('2022-04-04').date()]

In [43]:
df_option = pd.merge(df_option, df_spot, left_on = 'quote_datetime', right_on = 'Date_plus_1min', how = 'left')

In [44]:
df_option = df_option[df_option['first_higher_high_time'].notna()]

In [45]:
# Calculate the number of unique days
num_unique_days = df_option['quote_datetime'].dt.date.nunique()

# Display the result
print(num_unique_days)

820


In [46]:
# Check if column3 lies between 0.99 * column1 and 1.01 * column2
mask = (df_option['Close'] >= 0.99 * df_option['underlying_bid']) & (df_option['Close'] <= 1.01 * df_option['underlying_ask'])

# You can apply this mask to filter the DataFrame or add it as a new column
df_option['in_range'] = mask

# To filter rows where column3 satisfies the condition
df_option = df_option[mask]

In [47]:
df_option[df_option['in_range']==False]

Unnamed: 0,quote_datetime,strike,underlying_bid,underlying_ask,ask_C,ask_P,bid_C,bid_P,Open,High,...,date,time,first_higher_high_time,day_low_till_entry,Rolling_MA,Rolling_STD,Upper_Band,Lower_Band,low_find,in_range


In [48]:
# Group by date and calculate the number of unique times per date
df_option = df_option.groupby(df_option['quote_datetime'].dt.date).filter(
    lambda x: len(x['quote_datetime'].dt.time.unique()) == 390
)

In [49]:
# df_option = df_option[df_option['low_find'] == False]

In [50]:
# Calculate the number of unique days
num_unique_days = df_option['quote_datetime'].dt.date.nunique()

# Display the result
print(num_unique_days)

819


In [51]:
# df2 = df.copy()

In [52]:
# # Calculate the number of unique days
# num_unique_days = df['quote_datetime'].dt.date.nunique()

# # Display the result
# print(num_unique_days)

In [53]:
# Filter the DataFrame for '2020-04-28'
filtered_df = df_option[df_option['quote_datetime'].dt.date == pd.to_datetime('2022-08-30').date()]

# Extract the time part and get the unique times
unique_times = filtered_df['quote_datetime'].dt.time.unique()

# Display the unique times
print(len(unique_times))

0


In [54]:
del df_option['in_range']

In [55]:
df_option.head()

Unnamed: 0,quote_datetime,strike,underlying_bid,underlying_ask,ask_C,ask_P,bid_C,bid_P,Open,High,...,open931,date,time,first_higher_high_time,day_low_till_entry,Rolling_MA,Rolling_STD,Upper_Band,Lower_Band,low_find
0,2018-01-02 09:31:00,2635.0,2683.49,2687.85,52.3,0.05,48.2,0.0,2683.73,2686.18,...,2683.73,2018-01-02,09:31:00,10:37:00,2682.36,2685.58,,,,True
1,2018-01-02 09:31:00,2765.0,2683.49,2687.85,0.05,82.5,0.0,76.2,2683.73,2686.18,...,2683.73,2018-01-02,09:31:00,10:37:00,2682.36,2685.58,,,,True
2,2018-01-02 09:31:00,2760.0,2683.49,2687.85,0.05,77.5,0.0,71.2,2683.73,2686.18,...,2683.73,2018-01-02,09:31:00,10:37:00,2682.36,2685.58,,,,True
3,2018-01-02 09:31:00,2755.0,2683.49,2687.85,0.05,72.5,0.0,66.2,2683.73,2686.18,...,2683.73,2018-01-02,09:31:00,10:37:00,2682.36,2685.58,,,,True
4,2018-01-02 09:31:00,2750.0,2683.49,2687.85,0.05,67.5,0.0,61.2,2683.73,2686.18,...,2683.73,2018-01-02,09:31:00,10:37:00,2682.36,2685.58,,,,True


In [56]:
df_option['date'] = df_option['Date_plus_1min'].dt.date 

In [57]:
# #Read the CSV file
# dates = pd.read_excel('E:\marker34date.xlsx')

# #Extract the 'Date' column (replace 'Date' with the actual column name in your CSV)
# dates_to_plot = dates['date'].astype(str).tolist()

In [58]:
# dates_to_plot = pd.to_datetime(dates_to_plot).date
# # Filter the DataFrame for the specified dates
# df_option = df_option[df_option['date'].isin(dates_to_plot)]

In [59]:
# # df2 = df2[df2['date'] >= pd.to_datetime('2021-01-01').date()]
del df_option['date']

In [60]:
df_option = df_option[df_option['quote_datetime'].dt.date != pd.to_datetime('2020-03-09').date()]

In [61]:
df_option = df_option[df_option['quote_datetime'].dt.date != pd.to_datetime('2020-11-09').date()]

In [62]:
# Calculate the number of unique days
num_unique_days = df_option['quote_datetime'].dt.date.nunique()

# Display the result
print(num_unique_days)

818


In [63]:
df_option = df_option.reset_index(drop = True)

In [64]:
# df_option['first_higher_high_time_10'] = (pd.to_datetime(df_option['first_higher_high_time'].astype(str)) 
#                               + pd.Timedelta(minutes=10)).dt.time

In [65]:
df_option = df_option[df_option['low_find'] == False]

In [66]:
df_option.head()

Unnamed: 0,quote_datetime,strike,underlying_bid,underlying_ask,ask_C,ask_P,bid_C,bid_P,Open,High,...,close931,open931,time,first_higher_high_time,day_low_till_entry,Rolling_MA,Rolling_STD,Upper_Band,Lower_Band,low_find
21060,2018-01-05 09:31:00,2775.0,2730.88,2735.48,0.05,43.7,0.0,40.6,2731.33,2733.28,...,2733.28,2731.33,09:31:00,11:19:00,2727.92,2733.28,,,,False
21061,2018-01-05 09:31:00,2780.0,2730.88,2735.48,0.05,49.1,0.0,45.2,2731.33,2733.28,...,2733.28,2731.33,09:31:00,11:19:00,2727.92,2733.28,,,,False
21062,2018-01-05 09:31:00,2765.0,2730.88,2735.48,0.05,33.3,0.0,31.0,2731.33,2733.28,...,2733.28,2731.33,09:31:00,11:19:00,2727.92,2733.28,,,,False
21063,2018-01-05 09:31:00,2770.0,2730.88,2735.48,0.05,38.7,0.0,35.6,2731.33,2733.28,...,2733.28,2731.33,09:31:00,11:19:00,2727.92,2733.28,,,,False
21064,2018-01-05 09:31:00,2785.0,2730.88,2735.48,0.05,54.1,0.0,50.2,2731.33,2733.28,...,2733.28,2731.33,09:31:00,11:19:00,2727.92,2733.28,,,,False


In [67]:
# Calculate the number of unique days
num_unique_days = df_spot1['Date_plus_1min'].dt.date.nunique()

# Display the result
print(num_unique_days)

1084


In [68]:
df_option = pd.merge(df_option, df_spot1[['Date_plus_1min', 'first_time']], left_on = 'quote_datetime', right_on = 'Date_plus_1min', how = 'left')

In [69]:
# Filter rows where the time falls between 9:35 AM and 4:10 PM
# start_time = pd.to_datetime('09:32:00').time()
end_time = pd.to_datetime('16:00:00').time()

# Apply the filter based on the time component of 'quote_datetime'
df3 = df_option[(df_option['time'] >= df_option['first_time']) & (df_option['quote_datetime'].dt.time <= end_time)]

In [70]:
df3.head()

Unnamed: 0,quote_datetime,strike,underlying_bid,underlying_ask,ask_C,ask_P,bid_C,bid_P,Open,High,...,time,first_higher_high_time,day_low_till_entry,Rolling_MA,Rolling_STD,Upper_Band,Lower_Band,low_find,Date_plus_1min_y,first_time
3645,2018-01-05 11:46:00,2775.0,2733.22,2734.04,0.05,44.8,0.0,38.8,2733.44,2733.64,...,11:46:00,11:19:00,2727.92,2734.051,0.3327,2734.7165,2733.4853,False,2018-01-05 11:46:00,11:46:00
3646,2018-01-05 11:46:00,2760.0,2733.22,2734.04,0.05,29.8,0.0,23.9,2733.44,2733.64,...,11:46:00,11:19:00,2727.92,2734.051,0.3327,2734.7165,2733.4853,False,2018-01-05 11:46:00,11:46:00
3647,2018-01-05 11:46:00,2765.0,2733.22,2734.04,0.05,34.8,0.0,28.8,2733.44,2733.64,...,11:46:00,11:19:00,2727.92,2734.051,0.3327,2734.7165,2733.4853,False,2018-01-05 11:46:00,11:46:00
3648,2018-01-05 11:46:00,2770.0,2733.22,2734.04,0.05,39.8,0.0,33.8,2733.44,2733.64,...,11:46:00,11:19:00,2727.92,2734.051,0.3327,2734.7165,2733.4853,False,2018-01-05 11:46:00,11:46:00
3649,2018-01-05 11:46:00,2780.0,2733.22,2734.04,0.05,49.8,0.0,43.8,2733.44,2733.64,...,11:46:00,11:19:00,2727.92,2734.051,0.3327,2734.7165,2733.4853,False,2018-01-05 11:46:00,11:46:00


In [71]:
df3['atm'] = (df3['strike'] + df3['bid_C'] - df3['ask_P']).apply(lambda x: 5 * round(x / 5) if not np.isnan(x) else np.nan)

In [72]:
# Calculate the number of unique days
num_unique_days = df3['quote_datetime'].dt.date.nunique()

# Display the result
print(num_unique_days)

398


In [73]:
# df3[df3['date'] == pd.to_datetime('2022-04-04').date()]

In [74]:
df3.head()

Unnamed: 0,quote_datetime,strike,underlying_bid,underlying_ask,ask_C,ask_P,bid_C,bid_P,Open,High,...,first_higher_high_time,day_low_till_entry,Rolling_MA,Rolling_STD,Upper_Band,Lower_Band,low_find,Date_plus_1min_y,first_time,atm
3645,2018-01-05 11:46:00,2775.0,2733.22,2734.04,0.05,44.8,0.0,38.8,2733.44,2733.64,...,11:19:00,2727.92,2734.051,0.3327,2734.7165,2733.4853,False,2018-01-05 11:46:00,11:46:00,2730
3646,2018-01-05 11:46:00,2760.0,2733.22,2734.04,0.05,29.8,0.0,23.9,2733.44,2733.64,...,11:19:00,2727.92,2734.051,0.3327,2734.7165,2733.4853,False,2018-01-05 11:46:00,11:46:00,2730
3647,2018-01-05 11:46:00,2765.0,2733.22,2734.04,0.05,34.8,0.0,28.8,2733.44,2733.64,...,11:19:00,2727.92,2734.051,0.3327,2734.7165,2733.4853,False,2018-01-05 11:46:00,11:46:00,2730
3648,2018-01-05 11:46:00,2770.0,2733.22,2734.04,0.05,39.8,0.0,33.8,2733.44,2733.64,...,11:19:00,2727.92,2734.051,0.3327,2734.7165,2733.4853,False,2018-01-05 11:46:00,11:46:00,2730
3649,2018-01-05 11:46:00,2780.0,2733.22,2734.04,0.05,49.8,0.0,43.8,2733.44,2733.64,...,11:19:00,2727.92,2734.051,0.3327,2734.7165,2733.4853,False,2018-01-05 11:46:00,11:46:00,2730


In [75]:
df3['straddle_exit'] = (df3['bid_C'] + df3['ask_C'] + df3['bid_P'] + df3['ask_P'])/2

In [76]:
df3['abs_diff'] = abs((df3['bid_C'] + df3['ask_C'] - df3['bid_P'] - df3['ask_P'])/2)

In [77]:
df3['call_price'] = (df3['bid_C'] + df3['ask_C'])/2
df3['put_price'] = (df3['bid_P'] + df3['ask_P'])/2

In [78]:
#df3 = df3[df3['below_sma_50'] == 'No']

In [79]:
# df3 = df3[df3['quote_datetime'].dt.date == pd.to_datetime('2024-12-04').date()] 

In [80]:
# def process_group(group):
#     # Filter rows for 15:30
#     group_1530 = group[group['quote_datetime'].dt.time == pd.to_datetime('15:30:00').time()]
    
#     if group_1530.empty:
#         return group  # No 15:30 rows, return original group

#     # Get unique ATM values at 15:30
#     unique_atms = group_1530['atm'].unique()

#     # Define the range based on 'Close' at 15:30
#     close_value = group_1530['Close'].iloc[0]
#     lower_bound = close_value - 15
#     upper_bound = close_value + 15

#     # Filter unique_atms within range
#     filtered_atms = [atm for atm in unique_atms if lower_bound <= atm <= upper_bound]

#     # Filter 15:30 rows by strike and straddle_exit
#     matching_rows = group_1530[(group_1530['strike'].isin(filtered_atms)) & (group_1530['straddle_exit'] > 1)]
    
#     if not matching_rows.empty:
#         # Sort by abs_diff and select the row with the smallest value
#         selected_row = matching_rows.loc[matching_rows['abs_diff'].idxmin()]
#     else:
#         # Adjust the first 15:30 row's values if no match
#         first_row = group_1530.iloc[0]
#         group.loc[first_row.name, 'straddle_exit'] -= abs(first_row['strike'] - first_row['atm'])
#         group.loc[first_row.name, 'strike'] = first_row['atm']
#         selected_row = group.loc[first_row.name]

#     # Remove all 15:30 rows from the group
#     group = group.drop(group_1530.index)

#     # Append the selected row
#     group = pd.concat([group, pd.DataFrame([selected_row])], ignore_index=True)

#     return group

In [81]:
def process_group(group):
    # Step 1: Filter rows where the time is 15:30
    group_1530 = group[group['quote_datetime'].dt.time == group['first_time']]
    
    # Step 2: Get a list of unique ATM values at 15:30
    unique_atms = group_1530['atm'].unique()
    
    # Step 3: Define the range based on 'Close' at 15:30
    if not group_1530.empty:
        close_value = group_1530['Close'].iloc[0]
        lower_bound = close_value - 15
        upper_bound = close_value + 15

        # Step 4: Trim unique_atms to include only those within the ±50 range
        unique_atms = [atm for atm in unique_atms if lower_bound <= atm <= upper_bound]
    
    # Step 3: Filter rows where strike is in unique_atms
    matching_rows = group_1530[(group_1530['strike'].isin(unique_atms)) & (group_1530['straddle_exit'] > 1)]
    
    # Step 4: If there are matching rows
    if not matching_rows.empty:
        # Sort by abs_diff to get the minimum value, then drop duplicates keeping the first occurrence
        matching_rows = matching_rows.sort_values('abs_diff').head(1)
        selected_rows = matching_rows
    else:
        # Step 5: If no match, adjust the first 15:30 row's strike and straddle_exit for each ATM
        if not group_1530.empty:
            first_row_idx = group_1530.index[0]
            group.loc[first_row_idx, 'straddle_exit'] -= abs(group_1530['strike'].iloc[0] - group_1530['atm'].iloc[0])
            group.at[group.index[0], 'call_price'] += group['strike'].iloc[0] - group['atm'].iloc[0]
            group.at[group.index[0], 'put_price'] += group['atm'].iloc[0] - group['strike'].iloc[0]
            group.loc[first_row_idx, 'strike'] = group_1530['atm'].iloc[0]
            selected_rows = group.loc[[first_row_idx]]
        else:
            selected_rows = pd.DataFrame(columns=group.columns)  # Empty DataFrame if no rows are available

    # Drop all 15:30 rows from the original group
    group = group.drop(group_1530.index)

    # Use pd.concat to combine the original group with the selected rows
    group = pd.concat([group, selected_rows], ignore_index=True)

    # Return the modified group
    return group

In [82]:
# Apply the function to each date group
df3 = df3.groupby(df3['quote_datetime'].dt.date, group_keys=False).apply(process_group)

In [83]:
# df3 = df3.sort_values(by='quote_datetime').reset_index(drop=True)

# chunks = []
# chunk_size = 10**6  # Adjust chunk size based on available memory
# for i in range(0, len(df3), chunk_size):
#     chunk = df3.iloc[i:i+chunk_size].sort_values(by='quote_datetime')
#     chunks.append(chunk)

# df3_sorted = pd.concat(chunks).sort_values(by='quote_datetime').reset_index(drop=True)

In [84]:
df3.tail()

Unnamed: 0,quote_datetime,strike,underlying_bid,underlying_ask,ask_C,ask_P,bid_C,bid_P,Open,High,...,Upper_Band,Lower_Band,low_find,Date_plus_1min_y,first_time,atm,straddle_exit,abs_diff,call_price,put_price
17734,2024-12-30 16:00:00,5830.0,5906.36,5908.38,80.8,0.05,75.6,0.0,5911.98,5912.29,...,5931.882,5911.7105,False,2024-12-30 16:00:00,12:59:00,5905,78.225,78.175,78.2,0.025
17735,2024-12-30 16:00:00,5835.0,5906.36,5908.38,75.8,0.05,70.6,0.0,5911.98,5912.29,...,5931.882,5911.7105,False,2024-12-30 16:00:00,12:59:00,5905,73.225,73.175,73.2,0.025
17736,2024-12-30 16:00:00,5840.0,5906.36,5908.38,70.8,0.05,65.6,0.0,5911.98,5912.29,...,5931.882,5911.7105,False,2024-12-30 16:00:00,12:59:00,5905,68.225,68.175,68.2,0.025
17737,2024-12-30 16:00:00,5825.0,5906.36,5908.38,85.8,0.05,80.6,0.0,5911.98,5912.29,...,5931.882,5911.7105,False,2024-12-30 16:00:00,12:59:00,5905,83.225,83.175,83.2,0.025
17738,2024-12-30 12:59:00,5930.0,5927.35,5931.03,11.4,11.6,11.3,11.5,5924.52,5929.15,...,5939.77,5924.6009,False,2024-12-30 12:59:00,12:59:00,5930,22.9,0.2,11.35,11.55


In [85]:
df3['sell_strike'] = df3['strike'].where(df3['quote_datetime'].dt.time == df3['first_time'])
df3['sell_strike'] = df3['sell_strike'].bfill()  

In [86]:
# Calculate the number of unique days
num_unique_days = df3['quote_datetime'].dt.date.nunique()

# Display the result
print(num_unique_days)

398


In [87]:
# def update_straddle_and_filter(group):
#     # Check if there are matching rows
#     sell_strike = group['sell_strike'].iloc[0]
#     matching_rows = group[group['strike'] == sell_strike]

#     if matching_rows.empty:
#         # Directly update the first row
#         group.iloc[0, group.columns.get_loc('straddle_exit')] -= abs(group['strike'].iloc[0] - sell_strike)
#         group.iloc[0, group.columns.get_loc('call_price')] += group['strike'].iloc[0] - sell_strike
#         group.iloc[0, group.columns.get_loc('put_price')] += sell_strike - group['strike'].iloc[0]
#         group.iloc[0, group.columns.get_loc('strike')] = sell_strike
#         # Keep only the updated first row
#         return group.iloc[[0]]
#     else:
#         # Return only the matching rows
#         return matching_rows

In [88]:
# yeh baad ke strikes theek krha
def update_straddle_and_filter(group):
    # Step 1: Filter rows where 'strike' matches the 'strike' value at iloc[0]
    matching_rows = group[group['strike'] == group['sell_strike'].iloc[0]]
    
    # Step 2: If there are no matching rows, update the first row's 'straddle_exit' and 'strike'
    if matching_rows.empty:
        group.at[group.index[0], 'straddle_exit'] -= abs(group['strike'].iloc[0] - group['sell_strike'].iloc[0])
        group.at[group.index[0], 'call_price'] += group['strike'].iloc[0] - group['sell_strike'].iloc[0]
        group.at[group.index[0], 'put_price'] += group['sell_strike'].iloc[0] - group['strike'].iloc[0]
        group.at[group.index[0], 'strike'] = group['sell_strike'].iloc[0]
        # Select only the updated first row
        selected_rows = group.loc[[group.index[0]]]
    else:
        # If matches are found, use the matching rows
        selected_rows = matching_rows
    
    # Return only the filtered or updated rows
    return selected_rows

# # Apply the function to each group of the same datetime and reset the index
# df3 = df3.groupby('quote_datetime').apply(update_straddle_and_filter).reset_index(drop=True)

In [89]:
# # Get the unique dates
# unique_dates = df3['quote_datetime'].dt.date.unique()

# # Initialize a list to store processed results
# processed_chunks = []

# # Process data for each unique date
# for date in unique_dates:
#     # Filter rows for the current date
#     daily_data = df3[df3['quote_datetime'].dt.date == date]
    
#     # Group by 'quote_datetime' (time within the day) and apply the function
#     processed_chunk = daily_data.groupby('quote_datetime', group_keys=False).apply(update_straddle_and_filter)
#     processed_chunk = processed_chunk.sort_values(by='quote_datetime').reset_index(drop=True)
    
#     # Append the processed data for the current date
#     processed_chunks.append(processed_chunk)

# # Concatenate all processed chunks
# df3 = pd.concat(processed_chunks, ignore_index=True)

In [90]:
# df3 = df3.groupby('quote_datetime', group_keys=False).apply(update_straddle_and_filter)

In [91]:
df3 = df3.groupby('quote_datetime').apply(update_straddle_and_filter).reset_index(drop=True)

In [92]:
columns_to_drop = ['underlying_bid', 'underlying_ask','abs_diff', 'Date_plus_1min_x', 'Date_plus_1min_y', 'atm']
df3 = df3.drop(columns=columns_to_drop)

In [93]:
df3.isna().any().any()

True

In [94]:
df3['date'] = df3['quote_datetime'].dt.date

In [95]:
# df3.to_csv('options_data_eod_fianl1.csv')

In [96]:
# df3.head(50)

In [97]:
df3 = df3.sort_values(by='quote_datetime').reset_index(drop=True)

In [98]:
# df3 = df3.reset_index(drop=True)

In [99]:
# Calculate the number of unique days
num_unique_days = df3['quote_datetime'].dt.date.nunique()

# Display the result
print(num_unique_days)

398


In [100]:
# Identify dates where all `straddle_exit` values are >= 0 for every ticker
valid_dates = df3.groupby('date')['straddle_exit'].apply(lambda x: (x >= 0).all())

In [101]:
# Filter `df_atm` to keep only rows from the valid dates
df3 = df3[df3['date'].isin(valid_dates.index)]

In [102]:
# Calculate the number of unique days
num_unique_days = df3['quote_datetime'].dt.date.nunique()

# Display the result
print(num_unique_days)

398


In [103]:
df3['time'] = df3['quote_datetime'].dt.time

In [104]:
df3['sell_call'] = df3['call_price'].where(df3['quote_datetime'].dt.time == df3['first_time'])
df3['sell_call'] = df3['sell_call'].ffill()   

In [105]:
df3['sell_put'] = df3['put_price'].where(df3['quote_datetime'].dt.time == df3['first_time'])
df3['sell_put'] = df3['sell_put'].ffill()   

In [106]:
df3 = df3[df3['sell_call'] > 0]

In [107]:
# Calculate the number of unique days
num_unique_days = df3['quote_datetime'].dt.date.nunique()

# Display the result
print(num_unique_days)

398


In [108]:
# Iterate through groups by 'date'
for date, group in df3.groupby('date'):
    group_updated = group.copy()
    
    # Keep iterating until all zeros are replaced or no progress is made
    while (group_updated['call_price'] == 0).any():
        for idx in group_updated[group_updated['call_price'] == 0].index:  # Use the original index
            # Find the previous and next rows within the group
            prev_rows = group_updated.loc[:idx].iloc[:-1]  # Previous rows
            next_rows = group_updated.loc[idx:].iloc[1:]  # Next rows

            prev_row = prev_rows.iloc[-1:] if not prev_rows.empty else pd.DataFrame()
            next_row = next_rows.iloc[:1] if not next_rows.empty else pd.DataFrame()

            # Initialize replacement value
            replacement_value = None

            # Check conditions
            if not prev_row.empty and not next_row.empty:  # Both exist
                prev_price = prev_row['call_price'].values[0]
                next_price = next_row['call_price'].values[0]
                if prev_price != 0 and next_price != 0:
                    replacement_value = (prev_price + next_price) / 2  # Take the average
                elif prev_price != 0:
                    replacement_value = prev_price  # Use the non-zero previous price
                elif next_price != 0:
                    replacement_value = next_price  # Use the non-zero next price
            elif not prev_row.empty:  # Only previous row exists
                prev_price = prev_row['call_price'].values[0]
                if prev_price != 0:
                    replacement_value = prev_price  # Use previous price
            elif not next_row.empty:  # Only next row exists
                next_price = next_row['call_price'].values[0]
                if next_price != 0:
                    replacement_value = next_price  # Use next price

            # Update the call_price value if a replacement was found
            if replacement_value is not None:
                group_updated.loc[idx, 'call_price'] = replacement_value

        # Break the loop if no more replacements can be made
        if (group_updated['call_price'] == group['call_price']).all():
            break

    # Update the original DataFrame with the processed group
    df3.loc[group_updated.index, 'call_price'] = group_updated['call_price']

In [109]:
# Iterate through groups by 'date'
for date, group in df3.groupby('date'):
    group_updated = group.copy()
    
    # Keep iterating until all zeros are replaced or no progress is made
    while (group_updated['put_price'] == 0).any():
        for idx in group_updated[group_updated['put_price'] == 0].index:  # Use the original index
            # Find the previous and next rows within the group
            prev_rows = group_updated.loc[:idx].iloc[:-1]  # Previous rows
            next_rows = group_updated.loc[idx:].iloc[1:]  # Next rows

            prev_row = prev_rows.iloc[-1:] if not prev_rows.empty else pd.DataFrame()
            next_row = next_rows.iloc[:1] if not next_rows.empty else pd.DataFrame()

            # Initialize replacement value
            replacement_value = None

            # Check conditions
            if not prev_row.empty and not next_row.empty:  # Both exist
                prev_price = prev_row['put_price'].values[0]
                next_price = next_row['put_price'].values[0]
                if prev_price != 0 and next_price != 0:
                    replacement_value = (prev_price + next_price) / 2  # Take the average
                elif prev_price != 0:
                    replacement_value = prev_price  # Use the non-zero previous price
                elif next_price != 0:
                    replacement_value = next_price  # Use the non-zero next price
            elif not prev_row.empty:  # Only previous row exists
                prev_price = prev_row['put_price'].values[0]
                if prev_price != 0:
                    replacement_value = prev_price  # Use previous price
            elif not next_row.empty:  # Only next row exists
                next_price = next_row['put_price'].values[0]
                if next_price != 0:
                    replacement_value = next_price  # Use next price

            # Update the put_price value if a replacement was found
            if replacement_value is not None:
                group_updated.loc[idx, 'put_price'] = replacement_value

        # Break the loop if no more replacements can be made
        if (group_updated['put_price'] == group['put_price']).all():
            break

    # Update the original DataFrame with the processed group
    df3.loc[group_updated.index, 'put_price'] = group_updated['put_price']

In [110]:
# Calculate the number of unique days
num_unique_days = df3['quote_datetime'].dt.date.nunique()

# Display the result
print(num_unique_days)

398


In [111]:
df3.tail()

Unnamed: 0,quote_datetime,strike,ask_C,ask_P,bid_C,bid_P,Open,High,Low,Close,...,Lower_Band,low_find,first_time,straddle_exit,call_price,put_price,sell_strike,date,sell_call,sell_put
93806,2024-12-30 15:56:00,5930.0,0.3,9.2,0.2,8.7,5920.95,5922.12,5918.94,5921.21,...,5916.447,False,12:59:00,9.2,0.25,8.95,5930.0,2024-12-30,11.35,11.55
93807,2024-12-30 15:57:00,5930.0,0.1,16.3,0.05,15.2,5921.01,5921.01,5914.01,5914.38,...,5915.8395,False,12:59:00,15.825,0.075,15.75,5930.0,2024-12-30,11.35,11.55
93808,2024-12-30 15:58:00,5930.0,0.05,15.9,0.0,14.9,5914.28,5915.57,5914.18,5914.72,...,5915.1766,False,12:59:00,15.425,0.025,15.4,5930.0,2024-12-30,11.35,11.55
93809,2024-12-30 15:59:00,5930.0,0.05,19.3,0.0,17.7,5914.51,5915.01,5911.5,5911.5,...,5913.9542,False,12:59:00,18.525,0.025,18.5,5930.0,2024-12-30,11.35,11.55
93810,2024-12-30 16:00:00,5930.0,0.05,24.4,0.0,21.0,5911.98,5912.29,5905.8,5907.43,...,5911.7105,False,12:59:00,22.725,0.025,22.7,5930.0,2024-12-30,11.35,11.55


In [112]:
# df_atm[df_atm['quote_datetime'].dt.date == pd.to_datetime('2024-09-09').date()].to_csv('ch.csv')

In [113]:
# df3[df3['quote_datetime'].dt.date == pd.to_datetime('2024-09-09').date()].to_csv('ch1.csv')

In [114]:
# df3 = df_atm2.copy()

In [115]:
df3.columns

Index(['quote_datetime', 'strike', 'ask_C', 'ask_P', 'bid_C', 'bid_P', 'Open',
       'High', 'Low', 'Close', 'day_close', 'prev_close', 'PDL', 'PDH',
       'prev_day_neg', 'range', 'realized_volatility_post30min',
       'realized_volatility_full_day', 'sma_50', 'sma_21', 'below_sma_50',
       'gap_new931', 'intraday_sma_11', 'intraday_sma_20', 'gap_new931931',
       'date1', 'high931', 'low931', 'close931', 'open931', 'time',
       'first_higher_high_time', 'day_low_till_entry', 'Rolling_MA',
       'Rolling_STD', 'Upper_Band', 'Lower_Band', 'low_find', 'first_time',
       'straddle_exit', 'call_price', 'put_price', 'sell_strike', 'date',
       'sell_call', 'sell_put'],
      dtype='object')

In [116]:
# df3['put_price'] /= 2
# df3['sell_put'] /= 2

In [117]:
df3['straddle_exit'] = df3['call_price'] + df3['put_price']

In [118]:
df3['sell_straddle'] = df3['straddle_exit'].where(df3['quote_datetime'].dt.time == df3['first_time'])
df3['sell_straddle'] = df3['sell_straddle'].ffill()    

In [119]:
df3['sell_spot'] = df3['Close'].where(df3['quote_datetime'].dt.time == df3['first_time'])
df3['sell_spot'] = df3['sell_spot'].ffill() 

In [120]:
df3.columns

Index(['quote_datetime', 'strike', 'ask_C', 'ask_P', 'bid_C', 'bid_P', 'Open',
       'High', 'Low', 'Close', 'day_close', 'prev_close', 'PDL', 'PDH',
       'prev_day_neg', 'range', 'realized_volatility_post30min',
       'realized_volatility_full_day', 'sma_50', 'sma_21', 'below_sma_50',
       'gap_new931', 'intraday_sma_11', 'intraday_sma_20', 'gap_new931931',
       'date1', 'high931', 'low931', 'close931', 'open931', 'time',
       'first_higher_high_time', 'day_low_till_entry', 'Rolling_MA',
       'Rolling_STD', 'Upper_Band', 'Lower_Band', 'low_find', 'first_time',
       'straddle_exit', 'call_price', 'put_price', 'sell_strike', 'date',
       'sell_call', 'sell_put', 'sell_straddle', 'sell_spot'],
      dtype='object')

In [121]:
# df3 = df3[df3['close931'] > df3['sell_spot']]

In [122]:
# def exit_strategy(df):
#     filtered_df_list = []
    
#     # Group by each date
#     for date, group in df.groupby('date'):
#         # Step 1: Get the first 'Close' value for each date (first_close)
#         #first_close = group['Close'].iloc[0]
#         first_put = group['call_price'].iloc[0]
        
#         # Step 2: Calculate the condition and filter the rows accordingly
#         condition = group['call_price'] < 1.7 * first_put
        
#         # Step 3: Find the first row where the condition fails and filter accordingly
#         if condition.any():
#             first_fail_idx = condition[~condition].index[0] if (~condition).any() else None
            
#             # Keep rows before the first failure (inclusive of the row where it fails)
#             if first_fail_idx:
#                 filtered_group = group.loc[:first_fail_idx+1]
#             else:
#                 filtered_group = group  # Keep the entire group if the condition never fails
            
#             filtered_df_list.append(filtered_group)
    
#     return pd.concat(filtered_df_list, ignore_index=True)

In [123]:
# # Step 5: Apply the exit strategy
# df3 = exit_strategy(df3)

In [124]:
# def exit_strategy1(df):
#     filtered_df_list = []
    
#     # Group by each date
#     for date, group in df.groupby('date'):
#         # Step 1: Get the first 'put_price' and 'call_price' for the date
#         first_put = group['put_price'].iloc[0]
#         first_put = group['call_price'].iloc[0]
        
#         # Step 2: Define the stop-loss level
#         stop_loss_level = 1.7 * first_put
#         condition_level = 1.5 * first_put
        
#         # Step 3: Apply the stop-loss logic
#         stop_loss_triggered = False  # Track if the stop-loss condition has been hit
        
#         group['put_stop1_price'] = np.nan
#         group['put_stoploss_time1'] = np.nan
        
#         for i in range(len(group)):
#             # Stop-loss condition
#             if not stop_loss_triggered and group['put_price'].iloc[i] >= stop_loss_level:
#                 stop_loss_triggered = True
#                 stop_row_index = i
#                 group['put_stop1_price'] = stop_loss_level
#                 group['put_stoploss_time1'] = group['time'].iloc[i]
#                 #group = group.iloc[:stop_row_index + 1]  # Keep rows up to the stop row
#                 #break  # Exit loop once stop-loss is triggered
            
#             # Condition logic: No slicing here
#             elif group['call_price'].iloc[i] >= condition_level:
#                 group['put_stop1_price'] = group['put_price'].iloc[i]
#                 group['put_stoploss_time1'] = group['time'].iloc[i]
        
#         # Step 4: Add the adjusted group to the filtered list
#         filtered_df_list.append(group)
    
#     # Combine all the filtered groups
#     return pd.concat(filtered_df_list, ignore_index=True)

In [125]:
# # Sort the DataFrame to ensure proper order
# df3 = df3.sort_values(by=['date', 'time'])

# # Define a function to calculate the max of the first 15 rows for 'call_price'
# def calculate_call_max_15(group):
#     group['call_max_15'] = group['call_price'].iloc[:15].max()
#     return group

# # Apply the function to each group based on 'date'
# df3 = df3.groupby('date').apply(calculate_call_max_15).reset_index(drop=True)

# # Forward-fill any missing values in 'call_max_15'
# df3['call_max_15'] = df3['call_max_15'].ffill()

In [126]:
df3.head()

Unnamed: 0,quote_datetime,strike,ask_C,ask_P,bid_C,bid_P,Open,High,Low,Close,...,first_time,straddle_exit,call_price,put_price,sell_strike,date,sell_call,sell_put,sell_straddle,sell_spot
0,2018-01-05 11:46:00,2735.0,1.55,3.0,1.35,2.8,2733.44,2733.64,2733.43,2733.64,...,11:46:00,4.35,1.45,2.9,2735.0,2018-01-05,1.45,2.9,4.35,2733.64
1,2018-01-05 11:47:00,2735.0,1.55,3.1,1.4,2.9,2733.65,2733.79,2733.48,2733.52,...,11:46:00,4.475,1.475,3.0,2735.0,2018-01-05,1.45,2.9,4.35,2733.64
2,2018-01-05 11:48:00,2735.0,1.55,3.0,1.35,2.85,2733.51,2733.61,2733.5,2733.57,...,11:46:00,4.375,1.45,2.925,2735.0,2018-01-05,1.45,2.9,4.35,2733.64
3,2018-01-05 11:49:00,2735.0,1.5,3.1,1.35,2.9,2733.57,2733.61,2733.51,2733.54,...,11:46:00,4.425,1.425,3.0,2735.0,2018-01-05,1.45,2.9,4.35,2733.64
4,2018-01-05 11:50:00,2735.0,1.4,3.2,1.25,2.95,2733.56,2733.63,2733.23,2733.23,...,11:46:00,4.4,1.325,3.075,2735.0,2018-01-05,1.45,2.9,4.35,2733.64


In [127]:
# def calculate_day_lbb(df):
#     df['lbb_cross_time'] = np.nan
#     df['lbb3_cross_time'] = np.nan    
    
#     for date, group in df.groupby('date'):
#         # Filter rows where 'Close' is less than 'Lower_Band' and 'Lower_Band3'
#         before_group1 = group[group['Close'] < group['Lower_Band']]
#         before_group2 = group[group['Close'] < group['Lower_Band3']]
        
#         # Check if before_group1 is not empty and set 'lbb_cross_time'
#         if not before_group1.empty:
#             df.loc[group.index, 'lbb_cross_time'] = before_group1['time'].iloc[0]
        
#         # Check if before_group2 is not empty and set 'lbb3_cross_time'
#         if not before_group2.empty:
#             df.loc[group.index, 'lbb3_cross_time'] = before_group2['time'].iloc[0]
    
#     return df

In [128]:
# df3 = calculate_day_lbb(df3)

In [129]:
# df3 = exit_strategy(df3)

In [130]:
df3.head()

Unnamed: 0,quote_datetime,strike,ask_C,ask_P,bid_C,bid_P,Open,High,Low,Close,...,first_time,straddle_exit,call_price,put_price,sell_strike,date,sell_call,sell_put,sell_straddle,sell_spot
0,2018-01-05 11:46:00,2735.0,1.55,3.0,1.35,2.8,2733.44,2733.64,2733.43,2733.64,...,11:46:00,4.35,1.45,2.9,2735.0,2018-01-05,1.45,2.9,4.35,2733.64
1,2018-01-05 11:47:00,2735.0,1.55,3.1,1.4,2.9,2733.65,2733.79,2733.48,2733.52,...,11:46:00,4.475,1.475,3.0,2735.0,2018-01-05,1.45,2.9,4.35,2733.64
2,2018-01-05 11:48:00,2735.0,1.55,3.0,1.35,2.85,2733.51,2733.61,2733.5,2733.57,...,11:46:00,4.375,1.45,2.925,2735.0,2018-01-05,1.45,2.9,4.35,2733.64
3,2018-01-05 11:49:00,2735.0,1.5,3.1,1.35,2.9,2733.57,2733.61,2733.51,2733.54,...,11:46:00,4.425,1.425,3.0,2735.0,2018-01-05,1.45,2.9,4.35,2733.64
4,2018-01-05 11:50:00,2735.0,1.4,3.2,1.25,2.95,2733.56,2733.63,2733.23,2733.23,...,11:46:00,4.4,1.325,3.075,2735.0,2018-01-05,1.45,2.9,4.35,2733.64


In [131]:
df3.columns

Index(['quote_datetime', 'strike', 'ask_C', 'ask_P', 'bid_C', 'bid_P', 'Open',
       'High', 'Low', 'Close', 'day_close', 'prev_close', 'PDL', 'PDH',
       'prev_day_neg', 'range', 'realized_volatility_post30min',
       'realized_volatility_full_day', 'sma_50', 'sma_21', 'below_sma_50',
       'gap_new931', 'intraday_sma_11', 'intraday_sma_20', 'gap_new931931',
       'date1', 'high931', 'low931', 'close931', 'open931', 'time',
       'first_higher_high_time', 'day_low_till_entry', 'Rolling_MA',
       'Rolling_STD', 'Upper_Band', 'Lower_Band', 'low_find', 'first_time',
       'straddle_exit', 'call_price', 'put_price', 'sell_strike', 'date',
       'sell_call', 'sell_put', 'sell_straddle', 'sell_spot'],
      dtype='object')

In [132]:
def exit_strategy12(df):
    filtered_df_list = []
    
    # Group by each date
    for date, group in df.groupby('date'):
        # Ensure 'quote_datetime' is sorted
        group = group.sort_values(by='quote_datetime')
        
        # Determine the last 30 minutes' data
        last_30_min = group['quote_datetime'].max() - pd.Timedelta(minutes=30)
        group_before30 = group[group['quote_datetime'] < last_30_min]
        group_last_30 = group[group['quote_datetime'] >= last_30_min]
        
        # Apply condition: spot < instrady_ma in the last 30 minutes
        condition = group_last_30['Close'] > group_last_30['Rolling_MA']
        
        # Find the first row where the condition fails
        if condition.any():
            first_fail_idx = condition[~condition].index[0] if (~condition).any() else None
            
            # Keep rows up to the first failure in group_last_30
            if first_fail_idx is not None:
                filtered_last_30 = group_last_30.loc[:first_fail_idx]
            else:
                filtered_last_30 = group_last_30  # Keep all rows if the condition never fails
            
            # Concatenate group_before30 with the filtered group_last_30
            concatenated_group = pd.concat([group_before30, filtered_last_30])
            filtered_df_list.append(concatenated_group)
    
    # Concatenate all filtered DataFrames into a single DataFrame
    return pd.concat(filtered_df_list, ignore_index=True) if filtered_df_list else pd.DataFrame()

In [133]:
df6 = exit_strategy12(df3)
# df6 = df3.copy()

In [134]:
# df6[df6['date'] == pd.to_datetime('2022-04-04').date()]

In [135]:
df6.tail()

Unnamed: 0,quote_datetime,strike,ask_C,ask_P,bid_C,bid_P,Open,High,Low,Close,...,first_time,straddle_exit,call_price,put_price,sell_strike,date,sell_call,sell_put,sell_straddle,sell_spot
82441,2024-12-30 15:26:00,5930.0,5.7,6.0,5.6,5.8,5932.16,5932.89,5930.09,5930.09,...,12:59:00,11.55,5.65,5.9,5930.0,2024-12-30,11.35,11.55,22.9,5929.15
82442,2024-12-30 15:27:00,5930.0,5.0,6.7,4.9,6.5,5929.96,5930.75,5927.88,5927.93,...,12:59:00,11.55,4.95,6.6,5930.0,2024-12-30,11.35,11.55,22.9,5929.15
82443,2024-12-30 15:28:00,5930.0,4.1,8.2,4.0,8.0,5928.09,5928.47,5925.3,5925.53,...,12:59:00,12.15,4.05,8.1,5930.0,2024-12-30,11.35,11.55,22.9,5929.15
82444,2024-12-30 15:29:00,5930.0,3.3,10.4,3.2,10.3,5925.51,5925.78,5921.39,5922.39,...,12:59:00,13.6,3.25,10.35,5930.0,2024-12-30,11.35,11.55,22.9,5929.15
82445,2024-12-30 15:30:00,5930.0,2.7,12.6,2.6,12.3,5922.66,5922.66,5920.19,5920.22,...,12:59:00,15.1,2.65,12.45,5930.0,2024-12-30,11.35,11.55,22.9,5929.15


In [136]:
# df31 = df6[df6['low_find'] == False]
# df32 = df6[df6['low_find'] == True]

In [137]:
# df31 = df6[df6['low_find'] == False]

In [138]:
# def calculate_check(df):
#     check_list = []

#     # Group by 'date'
#     for date, group in df.groupby('date'):
#         try:
#             # Extract the put_price at first_higher_high_time
#             put_price_high = group.loc[group['quote_datetime'].dt.time == group['first_higher_high_time'].iloc[0], 'put_price'].iloc[0]
            
#             # Extract the put_price at first_higher_high_time_10
#             put_price_high_10 = group.loc[group['quote_datetime'].dt.time == group['first_higher_high_time_10'].iloc[0], 'put_price'].iloc[0]
            
#             # Calculate the condition
#             check_value = 1.25 * put_price_high < put_price_high_10
#         except (IndexError, KeyError):  # Handle cases where the required rows are missing
#             check_value = False

#         # Assign the result to all rows of the group
#         group['check'] = check_value
#         check_list.append(group)
    
#     # Concatenate all groups back into a single DataFrame
#     return pd.concat(check_list, ignore_index=True)

In [139]:
# # Apply the function to df31
# df31 = calculate_check(df31)

In [140]:
# df31['put_at_high'] = df31['put_price'].where(df31['quote_datetime'].dt.time == df31['first_higher_high_time'])
# df31['put_at_high'] = df31['put_at_high'].ffill() 

In [141]:
# df31.head()

In [142]:
# df31 = df31[df31['quote_datetime'].dt.time >= df31['first_higher_high_time_10']]

In [143]:
# df31.head()

In [144]:
def exit_strategy1(df):
    filtered_df_list = []
    
    # Group by each date
    for date, group in df.groupby('date'):
        # Step 1: Get the first 'Close' value for each date (first_close)
        #first_close = group['Close'].iloc[0]        
        first_put = group['put_price'].iloc[0]

        #group = group[group['quote_datetime'].dt.time >= group['first_higher_high_time_10']]
        
        # Step 2: Calculate the condition and filter the rows accordingly
        condition = group['put_price'] < 1.5 * first_put
        
        # Step 3: Find the first row where the condition fails and filter accordingly
        if condition.any():
            first_fail_idx = condition[~condition].index[0] if (~condition).any() else None
            
            # Keep rows before the first failure (inclusive of the row where it fails)
            if first_fail_idx:
                filtered_group = group.loc[:first_fail_idx]
            else:
                filtered_group = group  # Keep the entire group if the condition never fails
            
            filtered_df_list.append(filtered_group)
    
    return pd.concat(filtered_df_list, ignore_index=True)

In [145]:
# df31

In [146]:
# df32

In [147]:
df31 = exit_strategy1(df6)

In [148]:
df31[df31['date'] == pd.to_datetime('2022-04-04').date()]

Unnamed: 0,quote_datetime,strike,ask_C,ask_P,bid_C,bid_P,Open,High,Low,Close,...,first_time,straddle_exit,call_price,put_price,sell_strike,date,sell_call,sell_put,sell_straddle,sell_spot


In [149]:
df31.tail()

Unnamed: 0,quote_datetime,strike,ask_C,ask_P,bid_C,bid_P,Open,High,Low,Close,...,first_time,straddle_exit,call_price,put_price,sell_strike,date,sell_call,sell_put,sell_straddle,sell_spot
56872,2024-12-30 15:26:00,5930.0,5.7,6.0,5.6,5.8,5932.16,5932.89,5930.09,5930.09,...,12:59:00,11.55,5.65,5.9,5930.0,2024-12-30,11.35,11.55,22.9,5929.15
56873,2024-12-30 15:27:00,5930.0,5.0,6.7,4.9,6.5,5929.96,5930.75,5927.88,5927.93,...,12:59:00,11.55,4.95,6.6,5930.0,2024-12-30,11.35,11.55,22.9,5929.15
56874,2024-12-30 15:28:00,5930.0,4.1,8.2,4.0,8.0,5928.09,5928.47,5925.3,5925.53,...,12:59:00,12.15,4.05,8.1,5930.0,2024-12-30,11.35,11.55,22.9,5929.15
56875,2024-12-30 15:29:00,5930.0,3.3,10.4,3.2,10.3,5925.51,5925.78,5921.39,5922.39,...,12:59:00,13.6,3.25,10.35,5930.0,2024-12-30,11.35,11.55,22.9,5929.15
56876,2024-12-30 15:30:00,5930.0,2.7,12.6,2.6,12.3,5922.66,5922.66,5920.19,5920.22,...,12:59:00,15.1,2.65,12.45,5930.0,2024-12-30,11.35,11.55,22.9,5929.15


In [150]:
# df5 = pd.concat([df31, df32], ignore_index=True)

In [151]:
# df32 = exit_strategy1(df32)

In [152]:
df5 = df31.copy()

In [153]:
# Sort by 'quote_datetime'
df5 = df5.sort_values(by='quote_datetime').reset_index(drop=True)

In [154]:
df5

Unnamed: 0,quote_datetime,strike,ask_C,ask_P,bid_C,bid_P,Open,High,Low,Close,...,first_time,straddle_exit,call_price,put_price,sell_strike,date,sell_call,sell_put,sell_straddle,sell_spot
0,2018-01-05 11:46:00,2735.0,1.55,3.0,1.35,2.80,2733.44,2733.64,2733.43,2733.64,...,11:46:00,4.350,1.450,2.900,2735.0,2018-01-05,1.45,2.90,4.35,2733.64
1,2018-01-05 11:47:00,2735.0,1.55,3.1,1.40,2.90,2733.65,2733.79,2733.48,2733.52,...,11:46:00,4.475,1.475,3.000,2735.0,2018-01-05,1.45,2.90,4.35,2733.64
2,2018-01-05 11:48:00,2735.0,1.55,3.0,1.35,2.85,2733.51,2733.61,2733.50,2733.57,...,11:46:00,4.375,1.450,2.925,2735.0,2018-01-05,1.45,2.90,4.35,2733.64
3,2018-01-05 11:49:00,2735.0,1.50,3.1,1.35,2.90,2733.57,2733.61,2733.51,2733.54,...,11:46:00,4.425,1.425,3.000,2735.0,2018-01-05,1.45,2.90,4.35,2733.64
4,2018-01-05 11:50:00,2735.0,1.40,3.2,1.25,2.95,2733.56,2733.63,2733.23,2733.23,...,11:46:00,4.400,1.325,3.075,2735.0,2018-01-05,1.45,2.90,4.35,2733.64
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56872,2024-12-30 15:26:00,5930.0,5.70,6.0,5.60,5.80,5932.16,5932.89,5930.09,5930.09,...,12:59:00,11.550,5.650,5.900,5930.0,2024-12-30,11.35,11.55,22.90,5929.15
56873,2024-12-30 15:27:00,5930.0,5.00,6.7,4.90,6.50,5929.96,5930.75,5927.88,5927.93,...,12:59:00,11.550,4.950,6.600,5930.0,2024-12-30,11.35,11.55,22.90,5929.15
56874,2024-12-30 15:28:00,5930.0,4.10,8.2,4.00,8.00,5928.09,5928.47,5925.30,5925.53,...,12:59:00,12.150,4.050,8.100,5930.0,2024-12-30,11.35,11.55,22.90,5929.15
56875,2024-12-30 15:29:00,5930.0,3.30,10.4,3.20,10.30,5925.51,5925.78,5921.39,5922.39,...,12:59:00,13.600,3.250,10.350,5930.0,2024-12-30,11.35,11.55,22.90,5929.15


In [155]:
# Calculate the number of unique days
num_unique_days = df5['quote_datetime'].dt.date.nunique()

# Display the result
print(num_unique_days)

391


In [156]:
def exit_strategy4(df):
    filtered_df_list = []
    
    # Group by each date
    for date, group in df.groupby('date'):
        first_put = group['put_price'].iloc[0]
        
        # Step 1: Find the timestamp where 'put_price' drops to 0.5 * first_put
        drop_idx = group[group['put_price'] <= 0.5 * first_put].index.min()
                
        threshold = 1.5 * first_put
        group['adjusted_threshold'] = threshold
        
        # If the drop index is found, adjust the threshold after this point
        if pd.notna(drop_idx):
            # Set 1 * first_put after the drop_idx
            group.loc[drop_idx + 1:, 'adjusted_threshold'] = 1 * first_put
        # else:
        #     # Otherwise, use the original threshold for the entire group
        #     group['adjusted_threshold'] = threshold
        
        # Step 2: Calculate the condition and filter the rows accordingly
        condition = group['put_price'] < group['adjusted_threshold']
        
        # Step 3: Find the first row where the condition fails and filter accordingly
        if condition.any():
            first_fail_idx = condition[~condition].index[0] if (~condition).any() else None
            
            # Keep rows before the first failure (inclusive of the row where it fails)
            if first_fail_idx:
                filtered_group = group.loc[:first_fail_idx]
            else:
                filtered_group = group  # Keep the entire group if the condition never fails
            
            filtered_df_list.append(filtered_group)
    
    return pd.concat(filtered_df_list, ignore_index=True)

In [157]:
df4 = exit_strategy4(df5)

In [158]:
# df4 = df5.copy()

In [159]:
df4.head()

Unnamed: 0,quote_datetime,strike,ask_C,ask_P,bid_C,bid_P,Open,High,Low,Close,...,straddle_exit,call_price,put_price,sell_strike,date,sell_call,sell_put,sell_straddle,sell_spot,adjusted_threshold
0,2018-01-05 11:46:00,2735.0,1.55,3.0,1.35,2.8,2733.44,2733.64,2733.43,2733.64,...,4.35,1.45,2.9,2735.0,2018-01-05,1.45,2.9,4.35,2733.64,4.35
1,2018-01-05 11:47:00,2735.0,1.55,3.1,1.4,2.9,2733.65,2733.79,2733.48,2733.52,...,4.475,1.475,3.0,2735.0,2018-01-05,1.45,2.9,4.35,2733.64,4.35
2,2018-01-05 11:48:00,2735.0,1.55,3.0,1.35,2.85,2733.51,2733.61,2733.5,2733.57,...,4.375,1.45,2.925,2735.0,2018-01-05,1.45,2.9,4.35,2733.64,4.35
3,2018-01-05 11:49:00,2735.0,1.5,3.1,1.35,2.9,2733.57,2733.61,2733.51,2733.54,...,4.425,1.425,3.0,2735.0,2018-01-05,1.45,2.9,4.35,2733.64,4.35
4,2018-01-05 11:50:00,2735.0,1.4,3.2,1.25,2.95,2733.56,2733.63,2733.23,2733.23,...,4.4,1.325,3.075,2735.0,2018-01-05,1.45,2.9,4.35,2733.64,4.35


In [160]:
# df4

In [161]:
# def close_lbb(df):
#     df['Close_at_LBB'] = np.nan  # Initialize the column with NaN values
    
#     # Group by each date
#     for date, group in df.groupby('date'):
#         # Step 1: Get the first put price for the date
#         first_put = group['put_price'].iloc[0]
        
#         # Step 2: Find the timestamp where 'Close' drops to the Lower_Band
#         drop_idx = group[group['Close'] <= group['Lower_Band']].index.min()
        
#         if pd.notna(drop_idx):
#             close_value = group.loc[drop_idx, 'Close']
#             df.loc[group.index, 'Close_at_LBB'] = close_value
    
#     return df

In [162]:
# df4 = close_lbb(df4)

In [163]:
# df4[df4['Close_at_LBB'].notna()]

In [164]:
# df4 = calculate_day_lbb(df4)

In [165]:
df4.head()

Unnamed: 0,quote_datetime,strike,ask_C,ask_P,bid_C,bid_P,Open,High,Low,Close,...,straddle_exit,call_price,put_price,sell_strike,date,sell_call,sell_put,sell_straddle,sell_spot,adjusted_threshold
0,2018-01-05 11:46:00,2735.0,1.55,3.0,1.35,2.8,2733.44,2733.64,2733.43,2733.64,...,4.35,1.45,2.9,2735.0,2018-01-05,1.45,2.9,4.35,2733.64,4.35
1,2018-01-05 11:47:00,2735.0,1.55,3.1,1.4,2.9,2733.65,2733.79,2733.48,2733.52,...,4.475,1.475,3.0,2735.0,2018-01-05,1.45,2.9,4.35,2733.64,4.35
2,2018-01-05 11:48:00,2735.0,1.55,3.0,1.35,2.85,2733.51,2733.61,2733.5,2733.57,...,4.375,1.45,2.925,2735.0,2018-01-05,1.45,2.9,4.35,2733.64,4.35
3,2018-01-05 11:49:00,2735.0,1.5,3.1,1.35,2.9,2733.57,2733.61,2733.51,2733.54,...,4.425,1.425,3.0,2735.0,2018-01-05,1.45,2.9,4.35,2733.64,4.35
4,2018-01-05 11:50:00,2735.0,1.4,3.2,1.25,2.95,2733.56,2733.63,2733.23,2733.23,...,4.4,1.325,3.075,2735.0,2018-01-05,1.45,2.9,4.35,2733.64,4.35


In [166]:
df4.columns

Index(['quote_datetime', 'strike', 'ask_C', 'ask_P', 'bid_C', 'bid_P', 'Open',
       'High', 'Low', 'Close', 'day_close', 'prev_close', 'PDL', 'PDH',
       'prev_day_neg', 'range', 'realized_volatility_post30min',
       'realized_volatility_full_day', 'sma_50', 'sma_21', 'below_sma_50',
       'gap_new931', 'intraday_sma_11', 'intraday_sma_20', 'gap_new931931',
       'date1', 'high931', 'low931', 'close931', 'open931', 'time',
       'first_higher_high_time', 'day_low_till_entry', 'Rolling_MA',
       'Rolling_STD', 'Upper_Band', 'Lower_Band', 'low_find', 'first_time',
       'straddle_exit', 'call_price', 'put_price', 'sell_strike', 'date',
       'sell_call', 'sell_put', 'sell_straddle', 'sell_spot',
       'adjusted_threshold'],
      dtype='object')

In [167]:
# df4['putlbb'] = df4['put_price'].where(df4['quote_datetime'].dt.time == df4['lbb_cross_time'])
# df4['putlbb3'] = df4['put_price'].where(df4['quote_datetime'].dt.time == df4['lbb3_cross_time'])

In [168]:
df4['put10'] = df4['put_price'].where(df4['quote_datetime'].dt.time == df4['first_time'])
df4['call10'] = df4['call_price'].where(df4['quote_datetime'].dt.time == df4['first_time'])
df4['spot10'] = df4['Close'].where(df4['quote_datetime'].dt.time == df4['first_time'])

In [169]:
# df4[df4['putlbb'].notna()].head()

In [170]:
# Identify the last row for each date
is_last_row = df4.groupby('date')['quote_datetime'].transform('idxmax') == df4.index

# Get the 'putlbb' value for the row where time equals 'lbb_cross_time' for each date
lbb_values = df4[df4['quote_datetime'].dt.time == df4['first_time']].set_index('date')['put10']

# Get the 'putlbb3' value for the row where time equals 'lbb3_cross_time' for each date
lbb3_values = df4[df4['quote_datetime'].dt.time == df4['first_time']].set_index('date')['call10']

# Get the 'putlbb3' value for the row where time equals 'lbb3_cross_time' for each date
lbb3_values1 = df4[df4['quote_datetime'].dt.time == df4['first_time']].set_index('date')['spot10']

# Fill the last row of each date with the corresponding values
df4.loc[is_last_row, 'put10'] = df4['date'].map(lbb_values)
df4.loc[is_last_row, 'call10'] = df4['date'].map(lbb3_values)
df4.loc[is_last_row, 'spot10'] = df4['date'].map(lbb3_values1)

In [171]:
df4

Unnamed: 0,quote_datetime,strike,ask_C,ask_P,bid_C,bid_P,Open,High,Low,Close,...,sell_strike,date,sell_call,sell_put,sell_straddle,sell_spot,adjusted_threshold,put10,call10,spot10
0,2018-01-05 11:46:00,2735.0,1.55,3.0,1.35,2.80,2733.44,2733.64,2733.43,2733.64,...,2735.0,2018-01-05,1.45,2.90,4.35,2733.64,4.35,2.90,1.45,2733.64
1,2018-01-05 11:47:00,2735.0,1.55,3.1,1.40,2.90,2733.65,2733.79,2733.48,2733.52,...,2735.0,2018-01-05,1.45,2.90,4.35,2733.64,4.35,,,
2,2018-01-05 11:48:00,2735.0,1.55,3.0,1.35,2.85,2733.51,2733.61,2733.50,2733.57,...,2735.0,2018-01-05,1.45,2.90,4.35,2733.64,4.35,,,
3,2018-01-05 11:49:00,2735.0,1.50,3.1,1.35,2.90,2733.57,2733.61,2733.51,2733.54,...,2735.0,2018-01-05,1.45,2.90,4.35,2733.64,4.35,,,
4,2018-01-05 11:50:00,2735.0,1.40,3.2,1.25,2.95,2733.56,2733.63,2733.23,2733.23,...,2735.0,2018-01-05,1.45,2.90,4.35,2733.64,4.35,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54280,2024-12-30 15:26:00,5930.0,5.70,6.0,5.60,5.80,5932.16,5932.89,5930.09,5930.09,...,5930.0,2024-12-30,11.35,11.55,22.90,5929.15,11.55,,,
54281,2024-12-30 15:27:00,5930.0,5.00,6.7,4.90,6.50,5929.96,5930.75,5927.88,5927.93,...,5930.0,2024-12-30,11.35,11.55,22.90,5929.15,11.55,,,
54282,2024-12-30 15:28:00,5930.0,4.10,8.2,4.00,8.00,5928.09,5928.47,5925.30,5925.53,...,5930.0,2024-12-30,11.35,11.55,22.90,5929.15,11.55,,,
54283,2024-12-30 15:29:00,5930.0,3.30,10.4,3.20,10.30,5925.51,5925.78,5921.39,5922.39,...,5930.0,2024-12-30,11.35,11.55,22.90,5929.15,11.55,,,


In [172]:
df_trade2 = df4.groupby('date').tail(1).reset_index(drop=True)

In [173]:
# Calculate the number of unique days
num_unique_days = df_trade2['quote_datetime'].dt.date.nunique()

# Display the result
print(num_unique_days)

391


In [174]:
df_trade2[df_trade2['day_close'].isna()]

Unnamed: 0,quote_datetime,strike,ask_C,ask_P,bid_C,bid_P,Open,High,Low,Close,...,sell_strike,date,sell_call,sell_put,sell_straddle,sell_spot,adjusted_threshold,put10,call10,spot10


In [175]:
df_trade2

Unnamed: 0,quote_datetime,strike,ask_C,ask_P,bid_C,bid_P,Open,High,Low,Close,...,sell_strike,date,sell_call,sell_put,sell_straddle,sell_spot,adjusted_threshold,put10,call10,spot10
0,2018-01-05 15:44:00,2735.0,4.30,0.25,3.00,0.15,2739.31,2739.37,2738.82,2738.82,...,2735.0,2018-01-05,1.450,2.900,4.350,2733.64,2.9000,2.900,1.450,2733.64
1,2018-01-12 11:15:00,2780.0,2.15,3.50,2.05,3.30,2779.25,2779.31,2778.67,2778.70,...,2780.0,2018-01-12,3.550,2.225,5.775,2781.29,3.3375,2.225,3.550,2781.29
2,2018-01-17 15:30:00,2805.0,0.70,2.60,0.65,2.40,2804.02,2804.03,2802.72,2802.87,...,2805.0,2018-01-17,1.150,1.750,2.900,2804.11,2.6250,1.750,1.150,2804.11
3,2018-01-22 15:39:00,2825.0,1.35,0.90,1.30,0.70,2825.59,2825.60,2825.32,2825.32,...,2825.0,2018-01-22,2.175,2.850,5.025,2823.96,2.8500,2.850,2.175,2823.96
4,2018-01-26 15:31:00,2850.0,17.80,0.05,13.00,0.00,2865.81,2865.93,2865.26,2865.30,...,2850.0,2018-01-26,5.100,2.150,7.250,2852.76,2.1500,2.150,5.100,2852.76
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
386,2024-12-16 15:30:00,6075.0,8.40,0.20,8.10,0.15,6083.82,6083.90,6082.99,6083.07,...,6075.0,2024-12-16,5.600,5.850,11.450,6074.19,5.8500,5.850,5.600,6074.19
387,2024-12-18 11:22:00,6065.0,8.70,19.70,8.50,19.60,6054.86,6054.96,6053.41,6053.47,...,6065.0,2024-12-18,14.850,12.550,27.400,6066.78,18.8250,12.550,14.850,6066.78
388,2024-12-20 13:16:00,5970.0,6.60,19.10,6.50,18.80,5962.12,5962.67,5957.33,5957.33,...,5970.0,2024-12-20,12.050,12.300,24.350,5968.86,18.4500,12.300,12.050,5968.86
389,2024-12-23 16:00:00,5945.0,31.50,0.05,21.50,0.00,5974.56,5975.68,5971.45,5972.23,...,5945.0,2024-12-23,9.550,10.500,20.050,5943.87,10.5000,10.500,9.550,5943.87


In [176]:
# Update 'call_price' based on the condition
df_trade2['call_price'] = np.where(
    (df_trade2['time'] == pd.Timestamp('16:00').time()) & ~df_trade2['day_close'].isna(),
    np.maximum(df_trade2['day_close'] - df_trade2['strike'], 0),
    df_trade2['call_price']  # Set to NaN if 'time' is not 16:00 or 'day_close' is NaN
)

In [177]:
df_trade2['put_price'] = np.where(
    (df_trade2['time'] == pd.Timestamp('16:00').time()) & ~df_trade2['day_close'].isna(),
    np.maximum(-df_trade2['day_close'] + df_trade2['strike'], 0),
    df_trade2['put_price']  # Set to NaN if 'time' is not 16:00 or 'day_close' is NaN
)

In [178]:
df_trade2['call_pnl'] = df_trade2['call10'] - df_trade2['call_price']
df_trade2['put_pnl'] = df_trade2['put10'] - df_trade2['put_price']
df_trade2['straddle_pnl'] = df_trade2['put_pnl'] + df_trade2['call_pnl']

In [179]:
df_trade2

Unnamed: 0,quote_datetime,strike,ask_C,ask_P,bid_C,bid_P,Open,High,Low,Close,...,sell_put,sell_straddle,sell_spot,adjusted_threshold,put10,call10,spot10,call_pnl,put_pnl,straddle_pnl
0,2018-01-05 15:44:00,2735.0,4.30,0.25,3.00,0.15,2739.31,2739.37,2738.82,2738.82,...,2.900,4.350,2733.64,2.9000,2.900,1.450,2733.64,-2.200,2.700,0.500
1,2018-01-12 11:15:00,2780.0,2.15,3.50,2.05,3.30,2779.25,2779.31,2778.67,2778.70,...,2.225,5.775,2781.29,3.3375,2.225,3.550,2781.29,1.450,-1.175,0.275
2,2018-01-17 15:30:00,2805.0,0.70,2.60,0.65,2.40,2804.02,2804.03,2802.72,2802.87,...,1.750,2.900,2804.11,2.6250,1.750,1.150,2804.11,0.475,-0.750,-0.275
3,2018-01-22 15:39:00,2825.0,1.35,0.90,1.30,0.70,2825.59,2825.60,2825.32,2825.32,...,2.850,5.025,2823.96,2.8500,2.850,2.175,2823.96,0.850,2.050,2.900
4,2018-01-26 15:31:00,2850.0,17.80,0.05,13.00,0.00,2865.81,2865.93,2865.26,2865.30,...,2.150,7.250,2852.76,2.1500,2.150,5.100,2852.76,-10.300,2.125,-8.175
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
386,2024-12-16 15:30:00,6075.0,8.40,0.20,8.10,0.15,6083.82,6083.90,6082.99,6083.07,...,5.850,11.450,6074.19,5.8500,5.850,5.600,6074.19,-2.650,5.675,3.025
387,2024-12-18 11:22:00,6065.0,8.70,19.70,8.50,19.60,6054.86,6054.96,6053.41,6053.47,...,12.550,27.400,6066.78,18.8250,12.550,14.850,6066.78,6.250,-7.100,-0.850
388,2024-12-20 13:16:00,5970.0,6.60,19.10,6.50,18.80,5962.12,5962.67,5957.33,5957.33,...,12.300,24.350,5968.86,18.4500,12.300,12.050,5968.86,5.500,-6.650,-1.150
389,2024-12-23 16:00:00,5945.0,31.50,0.05,21.50,0.00,5974.56,5975.68,5971.45,5972.23,...,10.500,20.050,5943.87,10.5000,10.500,9.550,5943.87,-19.520,10.500,-9.020


In [180]:
# df_trade2['call_price_eod'] = (df_trade2['day_close', 'intraday_sma_20'] - df_trade2['strike']).clip(lower=0)

In [181]:
df_trade2['call_pnl'] *= -1

In [182]:
# final_df = pd.concat([df3_isna1, df3_notna_call_above1, df3_notna_call_below1], ignore_index=True)

In [183]:
df_trade2 = df_trade2.sort_values(by='quote_datetime').reset_index(drop=True)

In [184]:
df_trade2

Unnamed: 0,quote_datetime,strike,ask_C,ask_P,bid_C,bid_P,Open,High,Low,Close,...,sell_put,sell_straddle,sell_spot,adjusted_threshold,put10,call10,spot10,call_pnl,put_pnl,straddle_pnl
0,2018-01-05 15:44:00,2735.0,4.30,0.25,3.00,0.15,2739.31,2739.37,2738.82,2738.82,...,2.900,4.350,2733.64,2.9000,2.900,1.450,2733.64,2.200,2.700,0.500
1,2018-01-12 11:15:00,2780.0,2.15,3.50,2.05,3.30,2779.25,2779.31,2778.67,2778.70,...,2.225,5.775,2781.29,3.3375,2.225,3.550,2781.29,-1.450,-1.175,0.275
2,2018-01-17 15:30:00,2805.0,0.70,2.60,0.65,2.40,2804.02,2804.03,2802.72,2802.87,...,1.750,2.900,2804.11,2.6250,1.750,1.150,2804.11,-0.475,-0.750,-0.275
3,2018-01-22 15:39:00,2825.0,1.35,0.90,1.30,0.70,2825.59,2825.60,2825.32,2825.32,...,2.850,5.025,2823.96,2.8500,2.850,2.175,2823.96,-0.850,2.050,2.900
4,2018-01-26 15:31:00,2850.0,17.80,0.05,13.00,0.00,2865.81,2865.93,2865.26,2865.30,...,2.150,7.250,2852.76,2.1500,2.150,5.100,2852.76,10.300,2.125,-8.175
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
386,2024-12-16 15:30:00,6075.0,8.40,0.20,8.10,0.15,6083.82,6083.90,6082.99,6083.07,...,5.850,11.450,6074.19,5.8500,5.850,5.600,6074.19,2.650,5.675,3.025
387,2024-12-18 11:22:00,6065.0,8.70,19.70,8.50,19.60,6054.86,6054.96,6053.41,6053.47,...,12.550,27.400,6066.78,18.8250,12.550,14.850,6066.78,-6.250,-7.100,-0.850
388,2024-12-20 13:16:00,5970.0,6.60,19.10,6.50,18.80,5962.12,5962.67,5957.33,5957.33,...,12.300,24.350,5968.86,18.4500,12.300,12.050,5968.86,-5.500,-6.650,-1.150
389,2024-12-23 16:00:00,5945.0,31.50,0.05,21.50,0.00,5974.56,5975.68,5971.45,5972.23,...,10.500,20.050,5943.87,10.5000,10.500,9.550,5943.87,19.520,10.500,-9.020


In [185]:
df_trade2 = df_trade2[df_trade2['call_price'] >= 0]

In [186]:
df_trade2 = df_trade2[df_trade2['put_price'] >= 0]

In [187]:
# df_trade2.head(6).to_csv('che.csv')

In [188]:
# df_filter.heed1['time'] = df_filtered1['quote_datetime'].dt.time

In [189]:
# df_trade2['straddle_pnl_bps'] =  (df_trade2['straddle_pnl']/ df_trade2['sell_spot'])*10000
df_trade2['put_pnl_bps'] =  (df_trade2['put_pnl']/ df_trade2['sell_spot'])*10000
df_trade2['call_pnl_bps'] =  (df_trade2['call_pnl']/ df_trade2['sell_spot'])*10000

In [190]:
df_trade2.columns

Index(['quote_datetime', 'strike', 'ask_C', 'ask_P', 'bid_C', 'bid_P', 'Open',
       'High', 'Low', 'Close', 'day_close', 'prev_close', 'PDL', 'PDH',
       'prev_day_neg', 'range', 'realized_volatility_post30min',
       'realized_volatility_full_day', 'sma_50', 'sma_21', 'below_sma_50',
       'gap_new931', 'intraday_sma_11', 'intraday_sma_20', 'gap_new931931',
       'date1', 'high931', 'low931', 'close931', 'open931', 'time',
       'first_higher_high_time', 'day_low_till_entry', 'Rolling_MA',
       'Rolling_STD', 'Upper_Band', 'Lower_Band', 'low_find', 'first_time',
       'straddle_exit', 'call_price', 'put_price', 'sell_strike', 'date',
       'sell_call', 'sell_put', 'sell_straddle', 'sell_spot',
       'adjusted_threshold', 'put10', 'call10', 'spot10', 'call_pnl',
       'put_pnl', 'straddle_pnl', 'put_pnl_bps', 'call_pnl_bps'],
      dtype='object')

In [191]:
df5 = df_trade2[['quote_datetime', 'date', 'time', 'sell_strike', 'low931', 'high931', 'open931', 'close931', 'Close', 'sell_spot', 'gap_new931', 'realized_volatility_post30min', 'prev_close', 'PDL', 'PDH', 'gap_new931', 'prev_day_neg', 'range', 'realized_volatility_full_day', 'sma_50', 'sma_21', 'intraday_sma_11', 'below_sma_50', 'day_close', 'intraday_sma_20', 'sell_straddle', 'straddle_exit', 'straddle_pnl', 'first_higher_high_time', 'first_time', 'sell_put', 'put_price', 'put_pnl', 'put_pnl_bps', 'sell_call', 'call_price', 'call_pnl', 'call_pnl_bps', 'day_low_till_entry', 'low_find', 'put10', 'call10', 'spot10', 'first_time']]

In [192]:
# Calculate the number of unique days
num_unique_days = df5['quote_datetime'].dt.date.nunique()

# Display the result
print(num_unique_days)

391


In [193]:
df5['year'] = df5['quote_datetime'].dt.year

In [194]:
# df5[df5['Close_at_LBB'].isna()]

In [195]:
# df5 = df5[df5['below_sma_50'] == 'Yes']

In [196]:
df5 = df5[df5['close931']-df5['PDL']>0]

In [197]:
# Calculate the number of unique days
num_unique_days = df5['quote_datetime'].dt.date.nunique()

# Display the result
print(num_unique_days)

329


In [198]:
filtered_df = df5[
        (df5['realized_volatility_full_day'] > 0.1) |  # Either 'rv' is greater than 0.1
        ((df5['realized_volatility_full_day'] <= 0.1) & (df5['prev_day_neg'] == "no"))  # Or 'rv' <= 0.1 and 'prev_Day_neg' is "no"
    ]

In [199]:
# Calculate the number of unique days
num_unique_days = filtered_df['quote_datetime'].dt.date.nunique()

# Display the result
print(num_unique_days)

274


In [200]:
# Initialize an empty DataFrame to store yearly drawdown information
yearly_drawdowns = []

# Group the DataFrame by year and calculate the drawdowns
for year, group in filtered_df.groupby('year'):
    # Calculate the cumulative sum of pnl_bps
    group['cumulative_pnl'] = group['put_pnl_bps'].cumsum()
    
    # Calculate the running maximum of the cumulative pnl
    group['running_max'] = group['cumulative_pnl'].cummax()
    
    # Calculate the drawdown as the difference between the running max and the current cumulative pnl
    group['drawdown'] = group['running_max'] - group['cumulative_pnl']
    
    # Find the maximum drawdown for the year
    max_drawdown = group['drawdown'].max()
    
    # Append the results to the list
    yearly_drawdowns.append({'year': year, 'max_drawdown': max_drawdown})

# Create a DataFrame from the yearly drawdown information
yearly_drawdowns_df = pd.DataFrame(yearly_drawdowns)

print(yearly_drawdowns_df)

   year  max_drawdown
0  2018       35.3942
1  2019       16.9882
2  2020       37.7834
3  2021       19.0788
4  2022      128.8162
5  2023       25.6276
6  2024       41.2156


In [201]:
filtered_df.to_excel('breakout_false_50_time230_17.xlsx')

In [201]:
# 279 low break hua 

In [202]:
# 266

In [203]:
# rv< 0.1 then pdp, rv> 0.1