In [1]:
# Import libraries

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import glob
import os
import datetime
from datetime import timedelta

# Import plotly express for EF plot
# import plotly.express as px
# import plotly.graph_objects as go
# px.defaults.width, px.defaults.height = 1000,600
# Set precision
pd.set_option('display.precision', 4)

In [2]:
# Specify the columns you want to import
columns_to_import = ['quote_datetime', 'strike', 'option_type', 'bid', 'ask', 'underlying_bid', 'underlying_ask', 'DTE']

# Define the directory containing the CSV files
directory = r'min_dte1'

# Use glob to get all the CSV files in the directory
file_paths = glob.glob(os.path.join(directory, '*.csv'))

# Initialize a list to store DataFrames
dataframes = []

# Loop through the files
for file_path in file_paths:
    # Read the CSV file
    df = pd.read_csv(file_path, skipinitialspace=True, usecols=columns_to_import)
    dataframes.append(df)
    

# Concatenate all DataFrames into one
df_option = pd.concat(dataframes, ignore_index=True)

In [3]:
# df_option.to_csv('colab_option.csv')

In [4]:
# Convert 'quote_datetime' to datetime
df_option['quote_datetime'] = pd.to_datetime(df_option['quote_datetime'])

In [5]:
# # Calculate the number of unique days
# num_unique_days = df_option['quote_datetime'].dt.date.nunique()

# # Display the result
# print(num_unique_days)

In [6]:
# Specify the columns you want to import
columns_to_import = ['Date_plus_1min', 'Open', 'High', 'Low', 'Close', 'prev_close', 'PDL', 'PDH', 'gap_new931', 'prev_day_neg', 'range', 'realized_volatility_post30min', 'realized_volatility_full_day', 'sma_50', 'sma_21', 'intraday_sma_11', 'below_sma_50', 'day_close', 'intraday_sma_20']

# Import only the OHLC columns
df_spot = pd.read_csv('E:\spot_file12.csv', usecols=columns_to_import, parse_dates=['Date_plus_1min'])

# Ensure the 'Date' column is in datetime format
df_spot['Date_plus_1min'] = pd.to_datetime(df_spot['Date_plus_1min'])

# Sort the DataFrame by the 'Date' column
df_spot = df_spot.sort_values(by='Date_plus_1min')

# Replace commas and convert columns to float
df_spot[['Close', 'prev_close', 'PDL', 'PDH', 'gap_new931', 'range', 'Open', 'High', 'Low', 'realized_volatility_post30min', 'realized_volatility_full_day', 'sma_50', 'sma_21', 'intraday_sma_11', 'day_close', 'intraday_sma_20']] = df_spot[['Close', 'prev_close', 'PDL', 'PDH', 'gap_new931', 'range', 'Open', 'High', 'Low', 'realized_volatility_post30min', 'realized_volatility_full_day', 'sma_50', 'sma_21', 'intraday_sma_11', 'day_close', 'intraday_sma_20']].replace(
    {',': ''}, regex=True).astype(float)

In [7]:
df_spot.head()

Unnamed: 0,Open,High,Low,Close,Date_plus_1min,day_close,prev_close,PDL,PDH,prev_day_neg,range,realized_volatility_post30min,realized_volatility_full_day,sma_50,sma_21,below_sma_50,gap_new931,intraday_sma_11,intraday_sma_20
0,2683.73,2686.18,2683.73,2685.58,2018-01-02 09:31:00,2695.81,2673.61,2673.61,2692.12,yes,18.51,0.0378,0.0401,2620.0874,2665.839,No,11.97,2685.58,2685.58
1,2685.6,2685.6,2684.3,2684.3,2018-01-02 09:32:00,2695.81,2673.61,2673.61,2692.12,yes,18.51,0.0378,0.0401,2620.0874,2665.839,No,11.97,2684.94,2684.94
2,2684.32,2685.74,2684.23,2685.3,2018-01-02 09:33:00,2695.81,2673.61,2673.61,2692.12,yes,18.51,0.0378,0.0401,2620.0874,2665.839,No,11.97,2685.06,2685.06
3,2685.18,2685.18,2683.73,2683.73,2018-01-02 09:34:00,2695.81,2673.61,2673.61,2692.12,yes,18.51,0.0378,0.0401,2620.0874,2665.839,No,11.97,2684.7275,2684.7275
4,2683.8,2684.18,2683.05,2683.07,2018-01-02 09:35:00,2695.81,2673.61,2673.61,2692.12,yes,18.51,0.0378,0.0401,2620.0874,2665.839,No,11.97,2684.396,2684.396


In [8]:
df_spot['gap_new931931'] = df_spot['gap_new931'].where(df_spot['Date_plus_1min'].dt.time == pd.to_datetime('09:31:00').time())
df_spot['gap_new931931'] = df_spot['gap_new931931'].ffill() 

In [9]:
df_spot['date1'] = df_spot['Date_plus_1min'].dt.date 

In [10]:
df_spot['high931'] = df_spot['High'].where(df_spot['Date_plus_1min'].dt.time == pd.to_datetime('09:31:00').time())
df_spot['high931'] = df_spot['high931'].ffill() 

df_spot['low931'] = df_spot['Low'].where(df_spot['Date_plus_1min'].dt.time == pd.to_datetime('09:31:00').time())
df_spot['low931'] = df_spot['low931'].ffill() 

df_spot['close931'] = df_spot['Close'].where(df_spot['Date_plus_1min'].dt.time == pd.to_datetime('09:31:00').time())
df_spot['close931'] = df_spot['close931'].ffill() 

df_spot['open931'] = df_spot['Open'].where(df_spot['Date_plus_1min'].dt.time == pd.to_datetime('09:31:00').time())
df_spot['open931'] = df_spot['open931'].ffill()

In [11]:
df_option = df_option[df_option['DTE']==0]
del df_option['DTE']

In [12]:
# Filter rows where the time falls between 9:35 AM and 4:10 PM
start_time = pd.to_datetime('09:31:00').time()
end_time = pd.to_datetime('16:00:00').time()

# Apply the filter based on the time component of 'quote_datetime'
df_option = df_option[(df_option['quote_datetime'].dt.time >= start_time) & (df_option['quote_datetime'].dt.time <= end_time)]

In [13]:
# Group by date and calculate the number of unique times per date
df_option = df_option.groupby(df_option['quote_datetime'].dt.date).filter(
    lambda x: len(x['quote_datetime'].dt.time.unique()) == 390
)

In [14]:
# # Calculate the number of unique days
# num_unique_days = df_option['quote_datetime'].dt.date.nunique()

# # Display the result
# print(num_unique_days)

In [15]:
# Pivot the DataFrame based on 'option_type'
df_option = df_option.pivot_table(index=['quote_datetime', 'strike', 'underlying_bid', 'underlying_ask'], 
                          columns='option_type', 
                          values=['bid', 'ask'], 
                          aggfunc='first')

# Flatten the column MultiIndex
df_option.columns = [f'{col[0]}_{col[1]}' for col in df_option.columns]

# Reset the index to turn it back into a standard DataFrame
df_option = df_option.reset_index()

In [16]:
df_option.columns

Index(['quote_datetime', 'strike', 'underlying_bid', 'underlying_ask', 'ask_C',
       'ask_P', 'bid_C', 'bid_P'],
      dtype='object')

In [17]:
# df1 = df.copy()

In [18]:
df_option = df_option.sort_values(by='quote_datetime')

In [19]:
# Extract date and time components for filtering
df_spot['date'] = df_spot['Date_plus_1min'].dt.date
df_spot['time'] = df_spot['Date_plus_1min'].dt.time

In [20]:
def find_first_high_after_noon(df):
    # Ensure the 'time' column is in datetime format
    #df['time'] = pd.to_datetime(df['time'])
    
    # Define the time ranges
    before_noon = pd.Timestamp('10:32').time()
    start_time = pd.Timestamp('10:32').time()
    end_time = pd.Timestamp('14:00').time()
    start_filter_time = pd.Timestamp('09:32').time()
    
    # List to store results
    results = []

    # Group by each date
    for date, group in df.groupby('date'):
        # Filter rows before 12:00
        before_noon_group = group[(group['time'] >= start_filter_time) & (group['time'] < before_noon)]
        
        if not before_noon_group.empty:
            # Find the max 'High' before 12:00
            max_high_before_noon = before_noon_group['High'].max()
            
            # Filter rows between 12:00 and 14:00
            noon_to_two_group = group[(group['time'] >= start_time) & (group['time'] <= end_time)]
            
            # Find the first row where 'High' exceeds max_high_before_noon
            higher_high = noon_to_two_group[noon_to_two_group['High'] > max_high_before_noon]
            
            if not higher_high.empty:
                # Get the first timestamp where the condition is met
                first_higher_high_time = higher_high['time'].iloc[0]
                results.append({'date': date, 'first_higher_high_time': first_higher_high_time})
            # If no higher high, skip the group (do nothing)
    
    # Create a DataFrame from the results
    result_df = pd.DataFrame(results)
    
    # Merge back with the original DataFrame if needed
    df = df.merge(result_df, on='date', how='left')
    
    return df

In [21]:
#Example usage
df_spot = find_first_high_after_noon(df_spot)

In [22]:
def calculate_day_low_till_entry(df):
    df['day_low_till_entry'] = np.nan
    
    for date, group in df.groupby('date'):
        # Filter rows before the specific time for the date
        before_group = group[group['time'] < group['first_higher_high_time']]
        
        if not before_group.empty:
            # Find the min 'Low' before the specified time
            min_low = before_group['Low'].min()
            df.loc[group.index, 'day_low_till_entry'] = min_low
    
    return df

In [23]:
df_spot = calculate_day_low_till_entry(df_spot)

In [24]:
df_spot.head()

Unnamed: 0,Open,High,Low,Close,Date_plus_1min,day_close,prev_close,PDL,PDH,prev_day_neg,...,gap_new931931,date1,high931,low931,close931,open931,date,time,first_higher_high_time,day_low_till_entry
0,2683.73,2686.18,2683.73,2685.58,2018-01-02 09:31:00,2695.81,2673.61,2673.61,2692.12,yes,...,11.97,2018-01-02,2686.18,2683.73,2685.58,2683.73,2018-01-02,09:31:00,10:37:00,2682.36
1,2685.6,2685.6,2684.3,2684.3,2018-01-02 09:32:00,2695.81,2673.61,2673.61,2692.12,yes,...,11.97,2018-01-02,2686.18,2683.73,2685.58,2683.73,2018-01-02,09:32:00,10:37:00,2682.36
2,2684.32,2685.74,2684.23,2685.3,2018-01-02 09:33:00,2695.81,2673.61,2673.61,2692.12,yes,...,11.97,2018-01-02,2686.18,2683.73,2685.58,2683.73,2018-01-02,09:33:00,10:37:00,2682.36
3,2685.18,2685.18,2683.73,2683.73,2018-01-02 09:34:00,2695.81,2673.61,2673.61,2692.12,yes,...,11.97,2018-01-02,2686.18,2683.73,2685.58,2683.73,2018-01-02,09:34:00,10:37:00,2682.36
4,2683.8,2684.18,2683.05,2683.07,2018-01-02 09:35:00,2695.81,2673.61,2673.61,2692.12,yes,...,11.97,2018-01-02,2686.18,2683.73,2685.58,2683.73,2018-01-02,09:35:00,10:37:00,2682.36


In [25]:
# Calculate rolling mean (SMA) and standard deviation for each date
df_spot['Rolling_MA'] = (
    df_spot.groupby(df_spot['Date_plus_1min'].dt.date)['Close']
    .transform(lambda x: x.rolling(window=20, min_periods=1).mean())
)

df_spot['Rolling_STD'] = (
    df_spot.groupby(df_spot['Date_plus_1min'].dt.date)['Close']
    .transform(lambda x: x.rolling(window=20, min_periods=1).std())
)

# Calculate Upper and Lower Bollinger Bands
df_spot['Upper_Band'] = df_spot['Rolling_MA'] + 2 * df_spot['Rolling_STD']
df_spot['Lower_Band'] = df_spot['Rolling_MA'] - 2 * df_spot['Rolling_STD']
df_spot['Lower_Band3'] = df_spot['Rolling_MA'] - 3 * df_spot['Rolling_STD']

In [26]:
def low_find(df):
    # Ensure 'low_find' is initialized as False
    df['low_find'] = False

    # Group by each date
    for date, group in df.groupby('date'):
        # Check if 'first_higher_high_time' is NaN for the group
        first_higher_high_time_str = group['first_higher_high_time'].iloc[0]
        if pd.isna(first_higher_high_time_str):
            continue  # Skip this group if 'first_higher_high_time' is NaN
        
        # Calculate the 10-minute offset
        ten_min_offset = pd.Timedelta(minutes=10)
        
        # Convert 'first_higher_high_time' to a datetime object
        first_higher_high_time = pd.to_datetime(str(date) + ' ' + str(first_higher_high_time_str))
        
        # Filter rows within the 10-minute window after the first higher high time
        before_noon_group = group[(group['Date_plus_1min'] > first_higher_high_time) & 
                                  (group['Date_plus_1min'] <= first_higher_high_time + ten_min_offset)]
        
        # Identify the low candle time within the specified range
        low_candle = group.loc[(group['Date_plus_1min'] == first_higher_high_time), 'Low']
        
        if not low_candle.empty:
            # Print debug information
            # print(f"Date: {date}, First Higher High Time: {first_higher_high_time}")
            # print(f"Low Candle: {low_candle.values}")
        
            if not before_noon_group.empty:
                # Find the min 'Close' within the 10-minute window
                max_high_before_noon = before_noon_group['Close'].min()
                # print(f"Max High Before Noon: {max_high_before_noon}")
                
                # Update 'low_find' based on the condition
                df.loc[group.index, 'low_find'] = low_candle.values[0] > max_high_before_noon
    
    return df

In [27]:
df_spot = low_find(df_spot)

In [28]:
df_spot.head()   

Unnamed: 0,Open,High,Low,Close,Date_plus_1min,day_close,prev_close,PDL,PDH,prev_day_neg,...,date,time,first_higher_high_time,day_low_till_entry,Rolling_MA,Rolling_STD,Upper_Band,Lower_Band,Lower_Band3,low_find
0,2683.73,2686.18,2683.73,2685.58,2018-01-02 09:31:00,2695.81,2673.61,2673.61,2692.12,yes,...,2018-01-02,09:31:00,10:37:00,2682.36,2685.58,,,,,True
1,2685.6,2685.6,2684.3,2684.3,2018-01-02 09:32:00,2695.81,2673.61,2673.61,2692.12,yes,...,2018-01-02,09:32:00,10:37:00,2682.36,2684.94,0.9051,2686.7502,2683.1298,2682.2247,True
2,2684.32,2685.74,2684.23,2685.3,2018-01-02 09:33:00,2695.81,2673.61,2673.61,2692.12,yes,...,2018-01-02,09:33:00,10:37:00,2682.36,2685.06,0.6729,2686.4058,2683.7142,2683.0413,True
3,2685.18,2685.18,2683.73,2683.73,2018-01-02 09:34:00,2695.81,2673.61,2673.61,2692.12,yes,...,2018-01-02,09:34:00,10:37:00,2682.36,2684.7275,0.8626,2686.4527,2683.0023,2682.1397,True
4,2683.8,2684.18,2683.05,2683.07,2018-01-02 09:35:00,2695.81,2673.61,2673.61,2692.12,yes,...,2018-01-02,09:35:00,10:37:00,2682.36,2684.396,1.0524,2686.5008,2682.2912,2681.2388,True


In [29]:
df_spot1 = df_spot.copy()

In [30]:
df_spot1 = df_spot1[df_spot1['first_higher_high_time'].notna()]

In [31]:
df_spot1 = df_spot1[df_spot1['time'] >= df_spot1['first_higher_high_time']]

In [32]:
df_spot1.head()

Unnamed: 0,Open,High,Low,Close,Date_plus_1min,day_close,prev_close,PDL,PDH,prev_day_neg,...,date,time,first_higher_high_time,day_low_till_entry,Rolling_MA,Rolling_STD,Upper_Band,Lower_Band,Lower_Band3,low_find
66,2692.09,2692.39,2692.06,2692.33,2018-01-02 10:37:00,2695.81,2673.61,2673.61,2692.12,yes,...,2018-01-02,10:37:00,10:37:00,2682.36,2691.0175,0.6846,2692.3868,2689.6482,2688.9636,True
67,2692.35,2692.35,2690.16,2690.16,2018-01-02 10:38:00,2695.81,2673.61,2673.61,2692.12,yes,...,2018-01-02,10:38:00,10:37:00,2682.36,2691.016,0.6866,2692.3891,2689.6429,2688.9563,True
68,2690.07,2690.07,2688.18,2688.76,2018-01-02 10:39:00,2695.81,2673.61,2673.61,2692.12,yes,...,2018-01-02,10:39:00,10:37:00,2682.36,2690.9205,0.8505,2692.6215,2689.2195,2688.369,True
69,2688.77,2689.92,2688.75,2689.84,2018-01-02 10:40:00,2695.81,2673.61,2673.61,2692.12,yes,...,2018-01-02,10:40:00,10:37:00,2682.36,2690.8935,0.8767,2692.6469,2689.1401,2688.2633,True
70,2689.82,2690.07,2689.47,2690.07,2018-01-02 10:41:00,2695.81,2673.61,2673.61,2692.12,yes,...,2018-01-02,10:41:00,10:37:00,2682.36,2690.876,0.8901,2692.6561,2689.0959,2688.2058,True


In [33]:
# Calculate the number of unique days
num_unique_days = df_spot1['Date_plus_1min'].dt.date.nunique()

# Display the result
print(num_unique_days)

1025


In [34]:
def find_lbb(df):
    filtered_df_list = []
    
    # Group by each date
    for date, group in df.groupby('date'):
        # Ensure 'quote_datetime' is sorted
        group = group.sort_values(by='Date_plus_1min')
                
        # Apply condition: spot < instrady_ma in the last 30 minutes
        condition = group['Close'] > group['Lower_Band']
        
        # Find the first row where the condition fails
        if condition.any():
            first_fail_idx = condition[~condition].index[0] if (~condition).any() else None
            
            # Keep rows up to the first failure in group_last_30
            if first_fail_idx is not None:
                filtered_group = group.loc[first_fail_idx:]
            
            filtered_df_list.append(filtered_group)
    
    return pd.concat(filtered_df_list, ignore_index=True)
    
    # Concatenate all filtered DataFrames into a single DataFrame
    # return pd.concat(filtered_df_list, ignore_index=True) if filtered_df_list else pd.DataFrame()

In [35]:
df_spot1 = find_lbb(df_spot1)

In [36]:
df_spot1.head()

Unnamed: 0,Open,High,Low,Close,Date_plus_1min,day_close,prev_close,PDL,PDH,prev_day_neg,...,date,time,first_higher_high_time,day_low_till_entry,Rolling_MA,Rolling_STD,Upper_Band,Lower_Band,Lower_Band3,low_find
0,2690.07,2690.07,2688.18,2688.76,2018-01-02 10:39:00,2695.81,2673.61,2673.61,2692.12,yes,...,2018-01-02,10:39:00,10:37:00,2682.36,2690.9205,0.8505,2692.6215,2689.2195,2688.369,True
1,2688.77,2689.92,2688.75,2689.84,2018-01-02 10:40:00,2695.81,2673.61,2673.61,2692.12,yes,...,2018-01-02,10:40:00,10:37:00,2682.36,2690.8935,0.8767,2692.6469,2689.1401,2688.2633,True
2,2689.82,2690.07,2689.47,2690.07,2018-01-02 10:41:00,2695.81,2673.61,2673.61,2692.12,yes,...,2018-01-02,10:41:00,10:37:00,2682.36,2690.876,0.8901,2692.6561,2689.0959,2688.2058,True
3,2690.06,2690.75,2689.98,2690.28,2018-01-02 10:42:00,2695.81,2673.61,2673.61,2692.12,yes,...,2018-01-02,10:42:00,10:37:00,2682.36,2690.88,0.887,2692.6541,2689.1059,2688.2189,True
4,2690.28,2690.46,2689.85,2689.94,2018-01-02 10:43:00,2695.81,2673.61,2673.61,2692.12,yes,...,2018-01-02,10:43:00,10:37:00,2682.36,2690.8515,0.9085,2692.6684,2689.0346,2688.1261,True


In [37]:
def find_close_high(df):
    filtered_df_list = []
    
    # Group by each date
    for date, group in df.groupby('date'):
        # Ensure 'Date_plus_1min' is sorted
        group = group.sort_values(by='Date_plus_1min')
        
        # Filter rows based on 'first_higher_high_time' condition
        group = group[group['Date_plus_1min'].dt.time >= group['first_higher_high_time'].iloc[0]]
        
        # Iterate through rows to find the first instance where Close > prev Close
        for i in range(1, len(group)):  # Start from the second row
            if group['Close'].iloc[i] > group['Close'].iloc[i - 1]:
                # Filter from this row onward
                filtered_group = group.iloc[i:]
                filtered_df_list.append(filtered_group)
                break  # Stop after finding the first instance
    
    # Concatenate all filtered DataFrames into a single DataFrame
    return pd.concat(filtered_df_list, ignore_index=True) if filtered_df_list else pd.DataFrame()

In [38]:
df_spot1 = find_close_high(df_spot1)

In [39]:
df_spot1['first_time'] = df_spot1.groupby('date')['time'].transform('first').ffill()

In [40]:
df_spot1.head()

Unnamed: 0,Open,High,Low,Close,Date_plus_1min,day_close,prev_close,PDL,PDH,prev_day_neg,...,time,first_higher_high_time,day_low_till_entry,Rolling_MA,Rolling_STD,Upper_Band,Lower_Band,Lower_Band3,low_find,first_time
0,2688.77,2689.92,2688.75,2689.84,2018-01-02 10:40:00,2695.81,2673.61,2673.61,2692.12,yes,...,10:40:00,10:37:00,2682.36,2690.8935,0.8767,2692.6469,2689.1401,2688.2633,True,10:40:00
1,2689.82,2690.07,2689.47,2690.07,2018-01-02 10:41:00,2695.81,2673.61,2673.61,2692.12,yes,...,10:41:00,10:37:00,2682.36,2690.876,0.8901,2692.6561,2689.0959,2688.2058,True,10:40:00
2,2690.06,2690.75,2689.98,2690.28,2018-01-02 10:42:00,2695.81,2673.61,2673.61,2692.12,yes,...,10:42:00,10:37:00,2682.36,2690.88,0.887,2692.6541,2689.1059,2688.2189,True,10:40:00
3,2690.28,2690.46,2689.85,2689.94,2018-01-02 10:43:00,2695.81,2673.61,2673.61,2692.12,yes,...,10:43:00,10:37:00,2682.36,2690.8515,0.9085,2692.6684,2689.0346,2688.1261,True,10:40:00
4,2689.94,2690.14,2689.89,2690.12,2018-01-02 10:44:00,2695.81,2673.61,2673.61,2692.12,yes,...,10:44:00,10:37:00,2682.36,2690.8385,0.9174,2692.6732,2689.0038,2688.0864,True,10:40:00


In [41]:
# df_spot1[df_spot1['date'] == pd.to_datetime('2022-04-04').date()]

In [42]:
df_option = pd.merge(df_option, df_spot, left_on = 'quote_datetime', right_on = 'Date_plus_1min', how = 'left')

In [43]:
df_option = df_option[df_option['first_higher_high_time'].notna()]

In [44]:
# Calculate the number of unique days
num_unique_days = df_option['quote_datetime'].dt.date.nunique()

# Display the result
print(num_unique_days)

772


In [45]:
# Check if column3 lies between 0.99 * column1 and 1.01 * column2
mask = (df_option['Close'] >= 0.99 * df_option['underlying_bid']) & (df_option['Close'] <= 1.01 * df_option['underlying_ask'])

# You can apply this mask to filter the DataFrame or add it as a new column
df_option['in_range'] = mask

# To filter rows where column3 satisfies the condition
df_option = df_option[mask]

In [46]:
df_option[df_option['in_range']==False]

Unnamed: 0,quote_datetime,strike,underlying_bid,underlying_ask,ask_C,ask_P,bid_C,bid_P,Open,High,...,time,first_higher_high_time,day_low_till_entry,Rolling_MA,Rolling_STD,Upper_Band,Lower_Band,Lower_Band3,low_find,in_range


In [47]:
# Group by date and calculate the number of unique times per date
df_option = df_option.groupby(df_option['quote_datetime'].dt.date).filter(
    lambda x: len(x['quote_datetime'].dt.time.unique()) == 390
)

In [48]:
# df_option = df_option[df_option['low_find'] == False]

In [49]:
# Calculate the number of unique days
num_unique_days = df_option['quote_datetime'].dt.date.nunique()

# Display the result
print(num_unique_days)

771


In [50]:
# df2 = df.copy()

In [51]:
# # Calculate the number of unique days
# num_unique_days = df['quote_datetime'].dt.date.nunique()

# # Display the result
# print(num_unique_days)

In [52]:
# Filter the DataFrame for '2020-04-28'
filtered_df = df_option[df_option['quote_datetime'].dt.date == pd.to_datetime('2022-08-30').date()]

# Extract the time part and get the unique times
unique_times = filtered_df['quote_datetime'].dt.time.unique()

# Display the unique times
print(len(unique_times))

0


In [53]:
del df_option['in_range']

In [54]:
df_option.head()

Unnamed: 0,quote_datetime,strike,underlying_bid,underlying_ask,ask_C,ask_P,bid_C,bid_P,Open,High,...,date,time,first_higher_high_time,day_low_till_entry,Rolling_MA,Rolling_STD,Upper_Band,Lower_Band,Lower_Band3,low_find
0,2018-01-02 09:31:00,2635.0,2683.49,2687.85,52.3,0.05,48.2,0.0,2683.73,2686.18,...,2018-01-02,09:31:00,10:37:00,2682.36,2685.58,,,,,True
1,2018-01-02 09:31:00,2765.0,2683.49,2687.85,0.05,82.5,0.0,76.2,2683.73,2686.18,...,2018-01-02,09:31:00,10:37:00,2682.36,2685.58,,,,,True
2,2018-01-02 09:31:00,2760.0,2683.49,2687.85,0.05,77.5,0.0,71.2,2683.73,2686.18,...,2018-01-02,09:31:00,10:37:00,2682.36,2685.58,,,,,True
3,2018-01-02 09:31:00,2755.0,2683.49,2687.85,0.05,72.5,0.0,66.2,2683.73,2686.18,...,2018-01-02,09:31:00,10:37:00,2682.36,2685.58,,,,,True
4,2018-01-02 09:31:00,2750.0,2683.49,2687.85,0.05,67.5,0.0,61.2,2683.73,2686.18,...,2018-01-02,09:31:00,10:37:00,2682.36,2685.58,,,,,True


In [55]:
df_option['date'] = df_option['Date_plus_1min'].dt.date 

In [56]:
# #Read the CSV file
# dates = pd.read_excel('E:\marker34date.xlsx')

# #Extract the 'Date' column (replace 'Date' with the actual column name in your CSV)
# dates_to_plot = dates['date'].astype(str).tolist()

In [57]:
# dates_to_plot = pd.to_datetime(dates_to_plot).date
# # Filter the DataFrame for the specified dates
# df_option = df_option[df_option['date'].isin(dates_to_plot)]

In [58]:
# # df2 = df2[df2['date'] >= pd.to_datetime('2021-01-01').date()]
del df_option['date']

In [59]:
df_option = df_option[df_option['quote_datetime'].dt.date != pd.to_datetime('2020-03-09').date()]

In [60]:
df_option = df_option[df_option['quote_datetime'].dt.date != pd.to_datetime('2020-11-09').date()]

In [61]:
# Calculate the number of unique days
num_unique_days = df_option['quote_datetime'].dt.date.nunique()

# Display the result
print(num_unique_days)

770


In [62]:
df_option = df_option.reset_index(drop = True)

In [63]:
df_option['first_higher_high_time_10'] = (pd.to_datetime(df_option['first_higher_high_time'].astype(str)) 
                              + pd.Timedelta(minutes=10)).dt.time

In [64]:
df_option.head()

Unnamed: 0,quote_datetime,strike,underlying_bid,underlying_ask,ask_C,ask_P,bid_C,bid_P,Open,High,...,time,first_higher_high_time,day_low_till_entry,Rolling_MA,Rolling_STD,Upper_Band,Lower_Band,Lower_Band3,low_find,first_higher_high_time_10
0,2018-01-02 09:31:00,2635.0,2683.49,2687.85,52.3,0.05,48.2,0.0,2683.73,2686.18,...,09:31:00,10:37:00,2682.36,2685.58,,,,,True,10:47:00
1,2018-01-02 09:31:00,2765.0,2683.49,2687.85,0.05,82.5,0.0,76.2,2683.73,2686.18,...,09:31:00,10:37:00,2682.36,2685.58,,,,,True,10:47:00
2,2018-01-02 09:31:00,2760.0,2683.49,2687.85,0.05,77.5,0.0,71.2,2683.73,2686.18,...,09:31:00,10:37:00,2682.36,2685.58,,,,,True,10:47:00
3,2018-01-02 09:31:00,2755.0,2683.49,2687.85,0.05,72.5,0.0,66.2,2683.73,2686.18,...,09:31:00,10:37:00,2682.36,2685.58,,,,,True,10:47:00
4,2018-01-02 09:31:00,2750.0,2683.49,2687.85,0.05,67.5,0.0,61.2,2683.73,2686.18,...,09:31:00,10:37:00,2682.36,2685.58,,,,,True,10:47:00


In [65]:
# Calculate the number of unique days
num_unique_days = df_spot1['Date_plus_1min'].dt.date.nunique()

# Display the result
print(num_unique_days)

1025


In [66]:
df_option = pd.merge(df_option, df_spot1[['Date_plus_1min', 'first_time']], left_on = 'quote_datetime', right_on = 'Date_plus_1min', how = 'left')

In [67]:
# Filter rows where the time falls between 9:35 AM and 4:10 PM
# start_time = pd.to_datetime('09:32:00').time()
end_time = pd.to_datetime('16:00:00').time()

# Apply the filter based on the time component of 'quote_datetime'
df3 = df_option[(df_option['time'] >= df_option['first_higher_high_time_10']) & (df_option['quote_datetime'].dt.time <= end_time)]

In [68]:
df3.head()

Unnamed: 0,quote_datetime,strike,underlying_bid,underlying_ask,ask_C,ask_P,bid_C,bid_P,Open,High,...,day_low_till_entry,Rolling_MA,Rolling_STD,Upper_Band,Lower_Band,Lower_Band3,low_find,first_higher_high_time_10,Date_plus_1min_y,first_time
1782,2018-01-02 10:37:00,2710.0,2691.83,2692.81,0.1,20.5,0.05,15.7,2692.09,2692.39,...,2682.36,2691.0175,0.6846,2692.3868,2689.6482,2688.9636,True,10:47:00,NaT,
1783,2018-01-02 10:37:00,2715.0,2691.83,2692.81,0.05,25.5,0.0,20.6,2692.09,2692.39,...,2682.36,2691.0175,0.6846,2692.3868,2689.6482,2688.9636,True,10:47:00,NaT,
1784,2018-01-02 10:37:00,2720.0,2691.83,2692.81,0.05,30.5,0.0,25.1,2692.09,2692.39,...,2682.36,2691.0175,0.6846,2692.3868,2689.6482,2688.9636,True,10:47:00,NaT,
1785,2018-01-02 10:37:00,2725.0,2691.83,2692.81,0.05,35.5,0.0,30.1,2692.09,2692.39,...,2682.36,2691.0175,0.6846,2692.3868,2689.6482,2688.9636,True,10:47:00,NaT,
1786,2018-01-02 10:37:00,2730.0,2691.83,2692.81,0.05,40.5,0.0,35.1,2692.09,2692.39,...,2682.36,2691.0175,0.6846,2692.3868,2689.6482,2688.9636,True,10:47:00,NaT,


In [69]:
df3['atm'] = (df3['strike'] + df3['bid_C'] - df3['ask_P']).apply(lambda x: 5 * round(x / 5) if not np.isnan(x) else np.nan)

In [70]:
# Calculate the number of unique days
num_unique_days = df3['quote_datetime'].dt.date.nunique()

# Display the result
print(num_unique_days)

770


In [71]:
# df3[df3['date'] == pd.to_datetime('2022-04-04').date()]

In [72]:
df3.head()

Unnamed: 0,quote_datetime,strike,underlying_bid,underlying_ask,ask_C,ask_P,bid_C,bid_P,Open,High,...,Rolling_MA,Rolling_STD,Upper_Band,Lower_Band,Lower_Band3,low_find,first_higher_high_time_10,Date_plus_1min_y,first_time,atm
1782,2018-01-02 10:37:00,2710.0,2691.83,2692.81,0.1,20.5,0.05,15.7,2692.09,2692.39,...,2691.0175,0.6846,2692.3868,2689.6482,2688.9636,True,10:47:00,NaT,,2690
1783,2018-01-02 10:37:00,2715.0,2691.83,2692.81,0.05,25.5,0.0,20.6,2692.09,2692.39,...,2691.0175,0.6846,2692.3868,2689.6482,2688.9636,True,10:47:00,NaT,,2690
1784,2018-01-02 10:37:00,2720.0,2691.83,2692.81,0.05,30.5,0.0,25.1,2692.09,2692.39,...,2691.0175,0.6846,2692.3868,2689.6482,2688.9636,True,10:47:00,NaT,,2690
1785,2018-01-02 10:37:00,2725.0,2691.83,2692.81,0.05,35.5,0.0,30.1,2692.09,2692.39,...,2691.0175,0.6846,2692.3868,2689.6482,2688.9636,True,10:47:00,NaT,,2690
1786,2018-01-02 10:37:00,2730.0,2691.83,2692.81,0.05,40.5,0.0,35.1,2692.09,2692.39,...,2691.0175,0.6846,2692.3868,2689.6482,2688.9636,True,10:47:00,NaT,,2690


In [73]:
df3['straddle_exit'] = (df3['bid_C'] + df3['ask_C'] + df3['bid_P'] + df3['ask_P'])/2

In [74]:
df3['abs_diff'] = abs((df3['bid_C'] + df3['ask_C'] - df3['bid_P'] - df3['ask_P'])/2)

In [75]:
df3['call_price'] = (df3['bid_C'] + df3['ask_C'])/2
df3['put_price'] = (df3['bid_P'] + df3['ask_P'])/2

In [76]:
#df3 = df3[df3['below_sma_50'] == 'No']

In [77]:
# df3 = df3[df3['quote_datetime'].dt.date == pd.to_datetime('2024-12-04').date()] 

In [78]:
# def process_group(group):
#     # Filter rows for 15:30
#     group_1530 = group[group['quote_datetime'].dt.time == pd.to_datetime('15:30:00').time()]
    
#     if group_1530.empty:
#         return group  # No 15:30 rows, return original group

#     # Get unique ATM values at 15:30
#     unique_atms = group_1530['atm'].unique()

#     # Define the range based on 'Close' at 15:30
#     close_value = group_1530['Close'].iloc[0]
#     lower_bound = close_value - 15
#     upper_bound = close_value + 15

#     # Filter unique_atms within range
#     filtered_atms = [atm for atm in unique_atms if lower_bound <= atm <= upper_bound]

#     # Filter 15:30 rows by strike and straddle_exit
#     matching_rows = group_1530[(group_1530['strike'].isin(filtered_atms)) & (group_1530['straddle_exit'] > 1)]
    
#     if not matching_rows.empty:
#         # Sort by abs_diff and select the row with the smallest value
#         selected_row = matching_rows.loc[matching_rows['abs_diff'].idxmin()]
#     else:
#         # Adjust the first 15:30 row's values if no match
#         first_row = group_1530.iloc[0]
#         group.loc[first_row.name, 'straddle_exit'] -= abs(first_row['strike'] - first_row['atm'])
#         group.loc[first_row.name, 'strike'] = first_row['atm']
#         selected_row = group.loc[first_row.name]

#     # Remove all 15:30 rows from the group
#     group = group.drop(group_1530.index)

#     # Append the selected row
#     group = pd.concat([group, pd.DataFrame([selected_row])], ignore_index=True)

#     return group

In [79]:
def process_group(group):
    # Step 1: Filter rows where the time is 15:30
    group_1530 = group[group['quote_datetime'].dt.time == group['first_higher_high_time_10']]
    
    # Step 2: Get a list of unique ATM values at 15:30
    unique_atms = group_1530['atm'].unique()
    
    # Step 3: Define the range based on 'Close' at 15:30
    if not group_1530.empty:
        close_value = group_1530['Close'].iloc[0]
        lower_bound = close_value - 15
        upper_bound = close_value + 15

        # Step 4: Trim unique_atms to include only those within the ±50 range
        unique_atms = [atm for atm in unique_atms if lower_bound <= atm <= upper_bound]
    
    # Step 3: Filter rows where strike is in unique_atms
    matching_rows = group_1530[(group_1530['strike'].isin(unique_atms)) & (group_1530['straddle_exit'] > 1)]
    
    # Step 4: If there are matching rows
    if not matching_rows.empty:
        # Sort by abs_diff to get the minimum value, then drop duplicates keeping the first occurrence
        matching_rows = matching_rows.sort_values('abs_diff').head(1)
        selected_rows = matching_rows
    else:
        # Step 5: If no match, adjust the first 15:30 row's strike and straddle_exit for each ATM
        if not group_1530.empty:
            first_row_idx = group_1530.index[0]
            group.loc[first_row_idx, 'straddle_exit'] -= abs(group_1530['strike'].iloc[0] - group_1530['atm'].iloc[0])
            group.at[group.index[0], 'call_price'] += group['strike'].iloc[0] - group['atm'].iloc[0]
            group.at[group.index[0], 'put_price'] += group['atm'].iloc[0] - group['strike'].iloc[0]
            group.loc[first_row_idx, 'strike'] = group_1530['atm'].iloc[0]
            selected_rows = group.loc[[first_row_idx]]
        else:
            selected_rows = pd.DataFrame(columns=group.columns)  # Empty DataFrame if no rows are available

    # Drop all 15:30 rows from the original group
    group = group.drop(group_1530.index)

    # Use pd.concat to combine the original group with the selected rows
    group = pd.concat([group, selected_rows], ignore_index=True)

    # Return the modified group
    return group

In [80]:
# Apply the function to each date group
df3 = df3.groupby(df3['quote_datetime'].dt.date, group_keys=False).apply(process_group)

In [81]:
# df3 = df3.sort_values(by='quote_datetime').reset_index(drop=True)

# chunks = []
# chunk_size = 10**6  # Adjust chunk size based on available memory
# for i in range(0, len(df3), chunk_size):
#     chunk = df3.iloc[i:i+chunk_size].sort_values(by='quote_datetime')
#     chunks.append(chunk)

# df3_sorted = pd.concat(chunks).sort_values(by='quote_datetime').reset_index(drop=True)

In [82]:
df3.tail()

Unnamed: 0,quote_datetime,strike,underlying_bid,underlying_ask,ask_C,ask_P,bid_C,bid_P,Open,High,...,Lower_Band3,low_find,first_higher_high_time_10,Date_plus_1min_y,first_time,atm,straddle_exit,abs_diff,call_price,put_price
16401,2024-12-05 16:00:00,6000.0,6074.91,6076.89,81.5,0.05,71.5,0.0,6074.3,6076.01,...,6069.5669,False,12:57:00,2024-12-05 16:00:00,13:21:00,6070,76.525,76.475,76.5,0.025
16402,2024-12-05 16:00:00,5995.0,6074.91,6076.89,86.5,0.05,76.5,0.0,6074.3,6076.01,...,6069.5669,False,12:57:00,2024-12-05 16:00:00,13:21:00,6070,81.525,81.475,81.5,0.025
16403,2024-12-05 16:00:00,5990.0,6074.91,6076.89,91.5,0.05,81.5,0.0,6074.3,6076.01,...,6069.5669,False,12:57:00,2024-12-05 16:00:00,13:21:00,6070,86.525,86.475,86.5,0.025
16404,2024-12-05 16:00:00,6025.0,6074.91,6076.89,53.2,0.05,48.0,0.0,6074.3,6076.01,...,6069.5669,False,12:57:00,2024-12-05 16:00:00,13:21:00,6075,50.625,50.575,50.6,0.025
16405,2024-12-05 12:47:00,6095.0,6091.15,6094.15,3.3,5.1,3.2,5.0,6091.86,6092.57,...,6088.7319,False,12:57:00,NaT,,6095,8.3,1.8,3.25,5.05


In [83]:
df3['sell_strike'] = df3['strike'].where(df3['quote_datetime'].dt.time == df3['first_higher_high_time_10'])
df3['sell_strike'] = df3['sell_strike'].bfill()  

In [84]:
# Calculate the number of unique days
num_unique_days = df3['quote_datetime'].dt.date.nunique()

# Display the result
print(num_unique_days)

770


In [85]:
# def update_straddle_and_filter(group):
#     # Check if there are matching rows
#     sell_strike = group['sell_strike'].iloc[0]
#     matching_rows = group[group['strike'] == sell_strike]

#     if matching_rows.empty:
#         # Directly update the first row
#         group.iloc[0, group.columns.get_loc('straddle_exit')] -= abs(group['strike'].iloc[0] - sell_strike)
#         group.iloc[0, group.columns.get_loc('call_price')] += group['strike'].iloc[0] - sell_strike
#         group.iloc[0, group.columns.get_loc('put_price')] += sell_strike - group['strike'].iloc[0]
#         group.iloc[0, group.columns.get_loc('strike')] = sell_strike
#         # Keep only the updated first row
#         return group.iloc[[0]]
#     else:
#         # Return only the matching rows
#         return matching_rows

In [86]:
# yeh baad ke strikes theek krha
def update_straddle_and_filter(group):
    # Step 1: Filter rows where 'strike' matches the 'strike' value at iloc[0]
    matching_rows = group[group['strike'] == group['sell_strike'].iloc[0]]
    
    # Step 2: If there are no matching rows, update the first row's 'straddle_exit' and 'strike'
    if matching_rows.empty:
        group.at[group.index[0], 'straddle_exit'] -= abs(group['strike'].iloc[0] - group['sell_strike'].iloc[0])
        group.at[group.index[0], 'call_price'] += group['strike'].iloc[0] - group['sell_strike'].iloc[0]
        group.at[group.index[0], 'put_price'] += group['sell_strike'].iloc[0] - group['strike'].iloc[0]
        group.at[group.index[0], 'strike'] = group['sell_strike'].iloc[0]
        # Select only the updated first row
        selected_rows = group.loc[[group.index[0]]]
    else:
        # If matches are found, use the matching rows
        selected_rows = matching_rows
    
    # Return only the filtered or updated rows
    return selected_rows

# # Apply the function to each group of the same datetime and reset the index
# df3 = df3.groupby('quote_datetime').apply(update_straddle_and_filter).reset_index(drop=True)

In [87]:
# # Get the unique dates
# unique_dates = df3['quote_datetime'].dt.date.unique()

# # Initialize a list to store processed results
# processed_chunks = []

# # Process data for each unique date
# for date in unique_dates:
#     # Filter rows for the current date
#     daily_data = df3[df3['quote_datetime'].dt.date == date]
    
#     # Group by 'quote_datetime' (time within the day) and apply the function
#     processed_chunk = daily_data.groupby('quote_datetime', group_keys=False).apply(update_straddle_and_filter)
#     processed_chunk = processed_chunk.sort_values(by='quote_datetime').reset_index(drop=True)
    
#     # Append the processed data for the current date
#     processed_chunks.append(processed_chunk)

# # Concatenate all processed chunks
# df3 = pd.concat(processed_chunks, ignore_index=True)

In [88]:
# df3 = df3.groupby('quote_datetime', group_keys=False).apply(update_straddle_and_filter)

In [89]:
df3 = df3.groupby('quote_datetime').apply(update_straddle_and_filter).reset_index(drop=True)

In [90]:
columns_to_drop = ['underlying_bid', 'underlying_ask','abs_diff', 'Date_plus_1min_x', 'Date_plus_1min_y', 'atm']
df3 = df3.drop(columns=columns_to_drop)

In [91]:
df3.isna().any().any()

True

In [92]:
df3['date'] = df3['quote_datetime'].dt.date

In [93]:
# df3.to_csv('options_data_eod_fianl1.csv')

In [94]:
# df3.head(50)

In [95]:
df3 = df3.sort_values(by='quote_datetime').reset_index(drop=True)

In [96]:
# df3 = df3.reset_index(drop=True)

In [97]:
# Calculate the number of unique days
num_unique_days = df3['quote_datetime'].dt.date.nunique()

# Display the result
print(num_unique_days)

770


In [98]:
# Identify dates where all `straddle_exit` values are >= 0 for every ticker
valid_dates = df3.groupby('date')['straddle_exit'].apply(lambda x: (x >= 0).all())

In [99]:
# Filter `df_atm` to keep only rows from the valid dates
df3 = df3[df3['date'].isin(valid_dates.index)]

In [100]:
# Calculate the number of unique days
num_unique_days = df3['quote_datetime'].dt.date.nunique()

# Display the result
print(num_unique_days)

770


In [101]:
df3['time'] = df3['quote_datetime'].dt.time

In [102]:
df3['sell_call'] = df3['call_price'].where(df3['quote_datetime'].dt.time == df3['first_higher_high_time_10'])
df3['sell_call'] = df3['sell_call'].ffill()   

In [103]:
df3['sell_put'] = df3['put_price'].where(df3['quote_datetime'].dt.time == df3['first_higher_high_time_10'])
df3['sell_put'] = df3['sell_put'].ffill()   

In [104]:
df3 = df3[df3['sell_call'] > 0]

In [105]:
# Calculate the number of unique days
num_unique_days = df3['quote_datetime'].dt.date.nunique()

# Display the result
print(num_unique_days)

770


In [106]:
# Iterate through groups by 'date'
for date, group in df3.groupby('date'):
    group_updated = group.copy()
    
    # Keep iterating until all zeros are replaced or no progress is made
    while (group_updated['call_price'] == 0).any():
        for idx in group_updated[group_updated['call_price'] == 0].index:  # Use the original index
            # Find the previous and next rows within the group
            prev_rows = group_updated.loc[:idx].iloc[:-1]  # Previous rows
            next_rows = group_updated.loc[idx:].iloc[1:]  # Next rows

            prev_row = prev_rows.iloc[-1:] if not prev_rows.empty else pd.DataFrame()
            next_row = next_rows.iloc[:1] if not next_rows.empty else pd.DataFrame()

            # Initialize replacement value
            replacement_value = None

            # Check conditions
            if not prev_row.empty and not next_row.empty:  # Both exist
                prev_price = prev_row['call_price'].values[0]
                next_price = next_row['call_price'].values[0]
                if prev_price != 0 and next_price != 0:
                    replacement_value = (prev_price + next_price) / 2  # Take the average
                elif prev_price != 0:
                    replacement_value = prev_price  # Use the non-zero previous price
                elif next_price != 0:
                    replacement_value = next_price  # Use the non-zero next price
            elif not prev_row.empty:  # Only previous row exists
                prev_price = prev_row['call_price'].values[0]
                if prev_price != 0:
                    replacement_value = prev_price  # Use previous price
            elif not next_row.empty:  # Only next row exists
                next_price = next_row['call_price'].values[0]
                if next_price != 0:
                    replacement_value = next_price  # Use next price

            # Update the call_price value if a replacement was found
            if replacement_value is not None:
                group_updated.loc[idx, 'call_price'] = replacement_value

        # Break the loop if no more replacements can be made
        if (group_updated['call_price'] == group['call_price']).all():
            break

    # Update the original DataFrame with the processed group
    df3.loc[group_updated.index, 'call_price'] = group_updated['call_price']

In [107]:
# Iterate through groups by 'date'
for date, group in df3.groupby('date'):
    group_updated = group.copy()
    
    # Keep iterating until all zeros are replaced or no progress is made
    while (group_updated['put_price'] == 0).any():
        for idx in group_updated[group_updated['put_price'] == 0].index:  # Use the original index
            # Find the previous and next rows within the group
            prev_rows = group_updated.loc[:idx].iloc[:-1]  # Previous rows
            next_rows = group_updated.loc[idx:].iloc[1:]  # Next rows

            prev_row = prev_rows.iloc[-1:] if not prev_rows.empty else pd.DataFrame()
            next_row = next_rows.iloc[:1] if not next_rows.empty else pd.DataFrame()

            # Initialize replacement value
            replacement_value = None

            # Check conditions
            if not prev_row.empty and not next_row.empty:  # Both exist
                prev_price = prev_row['put_price'].values[0]
                next_price = next_row['put_price'].values[0]
                if prev_price != 0 and next_price != 0:
                    replacement_value = (prev_price + next_price) / 2  # Take the average
                elif prev_price != 0:
                    replacement_value = prev_price  # Use the non-zero previous price
                elif next_price != 0:
                    replacement_value = next_price  # Use the non-zero next price
            elif not prev_row.empty:  # Only previous row exists
                prev_price = prev_row['put_price'].values[0]
                if prev_price != 0:
                    replacement_value = prev_price  # Use previous price
            elif not next_row.empty:  # Only next row exists
                next_price = next_row['put_price'].values[0]
                if next_price != 0:
                    replacement_value = next_price  # Use next price

            # Update the put_price value if a replacement was found
            if replacement_value is not None:
                group_updated.loc[idx, 'put_price'] = replacement_value

        # Break the loop if no more replacements can be made
        if (group_updated['put_price'] == group['put_price']).all():
            break

    # Update the original DataFrame with the processed group
    df3.loc[group_updated.index, 'put_price'] = group_updated['put_price']

In [108]:
# Calculate the number of unique days
num_unique_days = df3['quote_datetime'].dt.date.nunique()

# Display the result
print(num_unique_days)

770


In [109]:
df3.tail()

Unnamed: 0,quote_datetime,strike,ask_C,ask_P,bid_C,bid_P,Open,High,Low,Close,...,low_find,first_higher_high_time_10,first_time,straddle_exit,call_price,put_price,sell_strike,date,sell_call,sell_put
222754,2024-12-05 15:56:00,6095.0,0.05,21.2,0.0,20.4,6076.28,6076.95,6073.77,6073.93,...,False,12:57:00,13:21:00,20.825,0.025,20.8,6095.0,2024-12-05,3.25,5.05
222755,2024-12-05 15:57:00,6095.0,0.05,20.6,0.0,19.7,6073.83,6075.21,6073.83,6074.32,...,False,12:57:00,13:21:00,20.175,0.025,20.15,6095.0,2024-12-05,3.25,5.05
222756,2024-12-05 15:58:00,6095.0,0.05,21.2,0.0,20.0,6074.2,6074.4,6072.9,6074.18,...,False,12:57:00,13:21:00,20.625,0.025,20.6,6095.0,2024-12-05,3.25,5.05
222757,2024-12-05 15:59:00,6095.0,0.05,20.8,0.0,19.3,6074.09,6074.95,6073.9,6074.13,...,False,12:57:00,13:21:00,20.075,0.025,20.05,6095.0,2024-12-05,3.25,5.05
222758,2024-12-05 16:00:00,6095.0,0.05,22.0,0.0,16.8,6074.3,6076.01,6073.06,6075.95,...,False,12:57:00,13:21:00,19.425,0.025,19.4,6095.0,2024-12-05,3.25,5.05


In [110]:
# df_atm[df_atm['quote_datetime'].dt.date == pd.to_datetime('2024-09-09').date()].to_csv('ch.csv')

In [111]:
# df3[df3['quote_datetime'].dt.date == pd.to_datetime('2024-09-09').date()].to_csv('ch1.csv')

In [112]:
# df3 = df_atm2.copy()

In [113]:
df3.columns

Index(['quote_datetime', 'strike', 'ask_C', 'ask_P', 'bid_C', 'bid_P', 'Open',
       'High', 'Low', 'Close', 'day_close', 'prev_close', 'PDL', 'PDH',
       'prev_day_neg', 'range', 'realized_volatility_post30min',
       'realized_volatility_full_day', 'sma_50', 'sma_21', 'below_sma_50',
       'gap_new931', 'intraday_sma_11', 'intraday_sma_20', 'gap_new931931',
       'date1', 'high931', 'low931', 'close931', 'open931', 'time',
       'first_higher_high_time', 'day_low_till_entry', 'Rolling_MA',
       'Rolling_STD', 'Upper_Band', 'Lower_Band', 'Lower_Band3', 'low_find',
       'first_higher_high_time_10', 'first_time', 'straddle_exit',
       'call_price', 'put_price', 'sell_strike', 'date', 'sell_call',
       'sell_put'],
      dtype='object')

In [114]:
# df3['put_price'] /= 2
# df3['sell_put'] /= 2

In [115]:
df3['straddle_exit'] = df3['call_price'] + df3['put_price']

In [116]:
df3['sell_straddle'] = df3['straddle_exit'].where(df3['quote_datetime'].dt.time == df3['first_higher_high_time_10'])
df3['sell_straddle'] = df3['sell_straddle'].ffill()    

In [117]:
df3['sell_spot'] = df3['Close'].where(df3['quote_datetime'].dt.time == df3['first_higher_high_time_10'])
df3['sell_spot'] = df3['sell_spot'].ffill() 

In [118]:
df3.columns

Index(['quote_datetime', 'strike', 'ask_C', 'ask_P', 'bid_C', 'bid_P', 'Open',
       'High', 'Low', 'Close', 'day_close', 'prev_close', 'PDL', 'PDH',
       'prev_day_neg', 'range', 'realized_volatility_post30min',
       'realized_volatility_full_day', 'sma_50', 'sma_21', 'below_sma_50',
       'gap_new931', 'intraday_sma_11', 'intraday_sma_20', 'gap_new931931',
       'date1', 'high931', 'low931', 'close931', 'open931', 'time',
       'first_higher_high_time', 'day_low_till_entry', 'Rolling_MA',
       'Rolling_STD', 'Upper_Band', 'Lower_Band', 'Lower_Band3', 'low_find',
       'first_higher_high_time_10', 'first_time', 'straddle_exit',
       'call_price', 'put_price', 'sell_strike', 'date', 'sell_call',
       'sell_put', 'sell_straddle', 'sell_spot'],
      dtype='object')

In [119]:
# df3 = df3[df3['close931'] > df3['sell_spot']]

In [120]:
# def exit_strategy(df):
#     filtered_df_list = []
    
#     # Group by each date
#     for date, group in df.groupby('date'):
#         # Step 1: Get the first 'Close' value for each date (first_close)
#         #first_close = group['Close'].iloc[0]
#         first_put = group['call_price'].iloc[0]
        
#         # Step 2: Calculate the condition and filter the rows accordingly
#         condition = group['call_price'] < 1.7 * first_put
        
#         # Step 3: Find the first row where the condition fails and filter accordingly
#         if condition.any():
#             first_fail_idx = condition[~condition].index[0] if (~condition).any() else None
            
#             # Keep rows before the first failure (inclusive of the row where it fails)
#             if first_fail_idx:
#                 filtered_group = group.loc[:first_fail_idx+1]
#             else:
#                 filtered_group = group  # Keep the entire group if the condition never fails
            
#             filtered_df_list.append(filtered_group)
    
#     return pd.concat(filtered_df_list, ignore_index=True)

In [121]:
# # Step 5: Apply the exit strategy
# df3 = exit_strategy(df3)

In [122]:
# def exit_strategy1(df):
#     filtered_df_list = []
    
#     # Group by each date
#     for date, group in df.groupby('date'):
#         # Step 1: Get the first 'put_price' and 'call_price' for the date
#         first_put = group['put_price'].iloc[0]
#         first_put = group['call_price'].iloc[0]
        
#         # Step 2: Define the stop-loss level
#         stop_loss_level = 1.7 * first_put
#         condition_level = 1.5 * first_put
        
#         # Step 3: Apply the stop-loss logic
#         stop_loss_triggered = False  # Track if the stop-loss condition has been hit
        
#         group['put_stop1_price'] = np.nan
#         group['put_stoploss_time1'] = np.nan
        
#         for i in range(len(group)):
#             # Stop-loss condition
#             if not stop_loss_triggered and group['put_price'].iloc[i] >= stop_loss_level:
#                 stop_loss_triggered = True
#                 stop_row_index = i
#                 group['put_stop1_price'] = stop_loss_level
#                 group['put_stoploss_time1'] = group['time'].iloc[i]
#                 #group = group.iloc[:stop_row_index + 1]  # Keep rows up to the stop row
#                 #break  # Exit loop once stop-loss is triggered
            
#             # Condition logic: No slicing here
#             elif group['call_price'].iloc[i] >= condition_level:
#                 group['put_stop1_price'] = group['put_price'].iloc[i]
#                 group['put_stoploss_time1'] = group['time'].iloc[i]
        
#         # Step 4: Add the adjusted group to the filtered list
#         filtered_df_list.append(group)
    
#     # Combine all the filtered groups
#     return pd.concat(filtered_df_list, ignore_index=True)

In [123]:
# # Sort the DataFrame to ensure proper order
# df3 = df3.sort_values(by=['date', 'time'])

# # Define a function to calculate the max of the first 15 rows for 'call_price'
# def calculate_call_max_15(group):
#     group['call_max_15'] = group['call_price'].iloc[:15].max()
#     return group

# # Apply the function to each group based on 'date'
# df3 = df3.groupby('date').apply(calculate_call_max_15).reset_index(drop=True)

# # Forward-fill any missing values in 'call_max_15'
# df3['call_max_15'] = df3['call_max_15'].ffill()

In [124]:
df3.head()

Unnamed: 0,quote_datetime,strike,ask_C,ask_P,bid_C,bid_P,Open,High,Low,Close,...,first_time,straddle_exit,call_price,put_price,sell_strike,date,sell_call,sell_put,sell_straddle,sell_spot
0,2018-01-02 10:37:00,2690.0,4.5,1.95,4.3,1.85,2692.09,2692.39,2692.06,2692.33,...,,6.3,4.4,1.9,2690.0,2018-01-02,4.4,1.9,6.3,2692.33
1,2018-01-02 10:38:00,2690.0,3.1,3.1,2.8,2.9,2692.35,2692.35,2690.16,2690.16,...,,5.95,2.95,3.0,2690.0,2018-01-02,4.4,1.9,6.3,2692.33
2,2018-01-02 10:39:00,2690.0,2.45,3.8,2.3,3.4,2690.07,2690.07,2688.18,2688.76,...,,5.975,2.375,3.6,2690.0,2018-01-02,4.4,1.9,6.3,2692.33
3,2018-01-02 10:40:00,2690.0,3.0,2.95,2.85,2.75,2688.77,2689.92,2688.75,2689.84,...,10:40:00,5.775,2.925,2.85,2690.0,2018-01-02,4.4,1.9,6.3,2692.33
4,2018-01-02 10:41:00,2690.0,3.3,2.75,2.95,2.55,2689.82,2690.07,2689.47,2690.07,...,10:40:00,5.775,3.125,2.65,2690.0,2018-01-02,4.4,1.9,6.3,2692.33


In [125]:
# def calculate_day_lbb(df):
#     df['lbb_cross_time'] = np.nan
#     df['lbb3_cross_time'] = np.nan    
    
#     for date, group in df.groupby('date'):
#         # Filter rows where 'Close' is less than 'Lower_Band' and 'Lower_Band3'
#         before_group1 = group[group['Close'] < group['Lower_Band']]
#         before_group2 = group[group['Close'] < group['Lower_Band3']]
        
#         # Check if before_group1 is not empty and set 'lbb_cross_time'
#         if not before_group1.empty:
#             df.loc[group.index, 'lbb_cross_time'] = before_group1['time'].iloc[0]
        
#         # Check if before_group2 is not empty and set 'lbb3_cross_time'
#         if not before_group2.empty:
#             df.loc[group.index, 'lbb3_cross_time'] = before_group2['time'].iloc[0]
    
#     return df

In [126]:
# df3 = calculate_day_lbb(df3)

In [127]:
# df3 = exit_strategy(df3)

In [128]:
df3.head()

Unnamed: 0,quote_datetime,strike,ask_C,ask_P,bid_C,bid_P,Open,High,Low,Close,...,first_time,straddle_exit,call_price,put_price,sell_strike,date,sell_call,sell_put,sell_straddle,sell_spot
0,2018-01-02 10:37:00,2690.0,4.5,1.95,4.3,1.85,2692.09,2692.39,2692.06,2692.33,...,,6.3,4.4,1.9,2690.0,2018-01-02,4.4,1.9,6.3,2692.33
1,2018-01-02 10:38:00,2690.0,3.1,3.1,2.8,2.9,2692.35,2692.35,2690.16,2690.16,...,,5.95,2.95,3.0,2690.0,2018-01-02,4.4,1.9,6.3,2692.33
2,2018-01-02 10:39:00,2690.0,2.45,3.8,2.3,3.4,2690.07,2690.07,2688.18,2688.76,...,,5.975,2.375,3.6,2690.0,2018-01-02,4.4,1.9,6.3,2692.33
3,2018-01-02 10:40:00,2690.0,3.0,2.95,2.85,2.75,2688.77,2689.92,2688.75,2689.84,...,10:40:00,5.775,2.925,2.85,2690.0,2018-01-02,4.4,1.9,6.3,2692.33
4,2018-01-02 10:41:00,2690.0,3.3,2.75,2.95,2.55,2689.82,2690.07,2689.47,2690.07,...,10:40:00,5.775,3.125,2.65,2690.0,2018-01-02,4.4,1.9,6.3,2692.33


In [129]:
df3.columns

Index(['quote_datetime', 'strike', 'ask_C', 'ask_P', 'bid_C', 'bid_P', 'Open',
       'High', 'Low', 'Close', 'day_close', 'prev_close', 'PDL', 'PDH',
       'prev_day_neg', 'range', 'realized_volatility_post30min',
       'realized_volatility_full_day', 'sma_50', 'sma_21', 'below_sma_50',
       'gap_new931', 'intraday_sma_11', 'intraday_sma_20', 'gap_new931931',
       'date1', 'high931', 'low931', 'close931', 'open931', 'time',
       'first_higher_high_time', 'day_low_till_entry', 'Rolling_MA',
       'Rolling_STD', 'Upper_Band', 'Lower_Band', 'Lower_Band3', 'low_find',
       'first_higher_high_time_10', 'first_time', 'straddle_exit',
       'call_price', 'put_price', 'sell_strike', 'date', 'sell_call',
       'sell_put', 'sell_straddle', 'sell_spot'],
      dtype='object')

In [130]:
def exit_strategy12(df):
    filtered_df_list = []
    
    # Group by each date
    for date, group in df.groupby('date'):
        # Ensure 'quote_datetime' is sorted
        group = group.sort_values(by='quote_datetime')
        
        # Determine the last 30 minutes' data
        last_30_min = group['quote_datetime'].max() - pd.Timedelta(minutes=30)
        group_before30 = group[group['quote_datetime'] < last_30_min]
        group_last_30 = group[group['quote_datetime'] >= last_30_min]
        
        # Apply condition: spot < instrady_ma in the last 30 minutes
        condition = group_last_30['Close'] > group_last_30['Rolling_MA']
        
        # Find the first row where the condition fails
        if condition.any():
            first_fail_idx = condition[~condition].index[0] if (~condition).any() else None
            
            # Keep rows up to the first failure in group_last_30
            if first_fail_idx is not None:
                filtered_last_30 = group_last_30.loc[:first_fail_idx]
            else:
                filtered_last_30 = group_last_30  # Keep all rows if the condition never fails
            
            # Concatenate group_before30 with the filtered group_last_30
            concatenated_group = pd.concat([group_before30, filtered_last_30])
            filtered_df_list.append(concatenated_group)
    
    # Concatenate all filtered DataFrames into a single DataFrame
    return pd.concat(filtered_df_list, ignore_index=True) if filtered_df_list else pd.DataFrame()

In [131]:
df6 = exit_strategy12(df3)
# df6 = df3.copy()

In [132]:
# df6[df6['date'] == pd.to_datetime('2022-04-04').date()]

In [133]:
df6.tail()

Unnamed: 0,quote_datetime,strike,ask_C,ask_P,bid_C,bid_P,Open,High,Low,Close,...,first_time,straddle_exit,call_price,put_price,sell_strike,date,sell_call,sell_put,sell_straddle,sell_spot
199611,2024-12-05 15:26:00,6095.0,0.1,15.3,0.05,14.9,6079.75,6079.75,6079.02,6079.24,...,13:21:00,15.175,0.075,15.1,6095.0,2024-12-05,3.25,5.05,8.3,6092.51
199612,2024-12-05 15:27:00,6095.0,0.1,17.0,0.05,16.5,6079.25,6079.25,6077.37,6078.06,...,13:21:00,16.825,0.075,16.75,6095.0,2024-12-05,3.25,5.05,8.3,6092.51
199613,2024-12-05 15:28:00,6095.0,0.1,17.1,0.05,16.7,6077.97,6078.02,6076.84,6077.5,...,13:21:00,16.975,0.075,16.9,6095.0,2024-12-05,3.25,5.05,8.3,6092.51
199614,2024-12-05 15:29:00,6095.0,0.1,17.6,0.05,17.2,6077.47,6078.08,6077.11,6077.11,...,13:21:00,17.475,0.075,17.4,6095.0,2024-12-05,3.25,5.05,8.3,6092.51
199615,2024-12-05 15:30:00,6095.0,0.1,17.3,0.05,16.9,6077.05,6078.14,6076.95,6077.33,...,13:21:00,17.175,0.075,17.1,6095.0,2024-12-05,3.25,5.05,8.3,6092.51


In [134]:
# df31 = df6[df6['low_find'] == False]
# df32 = df6[df6['low_find'] == True]

In [135]:
# df31 = df6[df6['low_find'] == False]

In [136]:
# def calculate_check(df):
#     check_list = []

#     # Group by 'date'
#     for date, group in df.groupby('date'):
#         try:
#             # Extract the put_price at first_higher_high_time
#             put_price_high = group.loc[group['quote_datetime'].dt.time == group['first_higher_high_time'].iloc[0], 'put_price'].iloc[0]
            
#             # Extract the put_price at first_higher_high_time
#             put_price_high_10 = group.loc[group['quote_datetime'].dt.time == group['first_higher_high_time_10'].iloc[0], 'put_price'].iloc[0]
            
#             # Calculate the condition
#             check_value = 1.25 * put_price_high < put_price_high_10
#         except (IndexError, KeyError):  # Handle cases where the required rows are missing
#             check_value = False

#         # Assign the result to all rows of the group
#         group['check'] = check_value
#         check_list.append(group)
    
#     # Concatenate all groups back into a single DataFrame
#     return pd.concat(check_list, ignore_index=True)

In [137]:
# # Apply the function to df31
# df31 = calculate_check(df31)

In [138]:
# df31['put_at_high'] = df31['put_price'].where(df31['quote_datetime'].dt.time == df31['first_higher_high_time'])
# df31['put_at_high'] = df31['put_at_high'].ffill() 

In [139]:
# df31.head()

In [140]:
# df6 = df6[df6['quote_datetime'].dt.time >= df6['first_higher_high_time_10']]

In [141]:
# df31.head()

In [142]:
def exit_strategy1(df):
    filtered_df_list = []
    
    # Group by each date
    for date, group in df.groupby('date'):
        # Step 1: Get the first 'Close' value for each date (first_close)
        #first_close = group['Close'].iloc[0]        
        first_put = group['put_price'].iloc[0]

        #group = group[group['quote_datetime'].dt.time >= group['first_higher_high_time_10']]
        
        # Step 2: Calculate the condition and filter the rows accordingly
        condition = group['put_price'] < 1.5 * first_put
        
        # Step 3: Find the first row where the condition fails and filter accordingly
        if condition.any():
            first_fail_idx = condition[~condition].index[0] if (~condition).any() else None
            
            # Keep rows before the first failure (inclusive of the row where it fails)
            if first_fail_idx:
                filtered_group = group.loc[:first_fail_idx]
            else:
                filtered_group = group  # Keep the entire group if the condition never fails
            
            filtered_df_list.append(filtered_group)
    
    return pd.concat(filtered_df_list, ignore_index=True)

In [143]:
# df31

In [144]:
# df32

In [145]:
df31 = exit_strategy1(df6)

In [146]:
df31[df31['date'] == pd.to_datetime('2022-04-04').date()]

Unnamed: 0,quote_datetime,strike,ask_C,ask_P,bid_C,bid_P,Open,High,Low,Close,...,first_time,straddle_exit,call_price,put_price,sell_strike,date,sell_call,sell_put,sell_straddle,sell_spot
65043,2022-04-04 10:48:00,4570.0,4.5,16.00,4.2,15.30,4559.00,4559.14,4557.56,4558.87,...,10:46:00,20.000,4.35,15.650,4570.0,2022-04-04,6.1,12.45,18.55,4563.8
65044,2022-04-04 10:49:00,4570.0,5.6,13.00,5.4,12.50,4558.90,4562.72,4558.90,4562.72,...,10:46:00,18.250,5.50,12.750,4570.0,2022-04-04,6.1,12.45,18.55,4563.8
65045,2022-04-04 10:50:00,4570.0,5.4,13.60,5.1,13.10,4562.58,4563.33,4562.10,4562.10,...,10:46:00,18.600,5.25,13.350,4570.0,2022-04-04,6.1,12.45,18.55,4563.8
65046,2022-04-04 10:51:00,4570.0,5.3,13.70,5.1,13.20,4562.19,4563.17,4561.94,4562.19,...,10:46:00,18.650,5.20,13.450,4570.0,2022-04-04,6.1,12.45,18.55,4563.8
65047,2022-04-04 10:52:00,4570.0,5.6,13.00,5.4,12.50,4562.23,4563.07,4562.14,4562.96,...,10:46:00,18.250,5.50,12.750,4570.0,2022-04-04,6.1,12.45,18.55,4563.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65321,2022-04-04 15:26:00,4570.0,10.7,1.20,10.4,1.15,4579.33,4579.73,4578.97,4579.46,...,10:46:00,11.725,10.55,1.175,4570.0,2022-04-04,6.1,12.45,18.55,4563.8
65322,2022-04-04 15:27:00,4570.0,10.0,1.25,9.5,1.20,4579.48,4579.77,4578.74,4578.82,...,10:46:00,10.975,9.75,1.225,4570.0,2022-04-04,6.1,12.45,18.55,4563.8
65323,2022-04-04 15:28:00,4570.0,9.5,1.45,9.1,1.35,4578.81,4578.88,4577.62,4578.09,...,10:46:00,10.700,9.30,1.400,4570.0,2022-04-04,6.1,12.45,18.55,4563.8
65324,2022-04-04 15:29:00,4570.0,9.0,1.70,8.5,1.55,4578.05,4578.05,4576.87,4577.31,...,10:46:00,10.375,8.75,1.625,4570.0,2022-04-04,6.1,12.45,18.55,4563.8


In [147]:
df31.tail()

Unnamed: 0,quote_datetime,strike,ask_C,ask_P,bid_C,bid_P,Open,High,Low,Close,...,first_time,straddle_exit,call_price,put_price,sell_strike,date,sell_call,sell_put,sell_straddle,sell_spot
124324,2024-12-05 13:15:00,6095.0,2.6,5.6,2.55,5.4,6092.07,6092.13,6091.62,6091.84,...,,8.075,2.575,5.5,6095.0,2024-12-05,3.25,5.05,8.3,6092.51
124325,2024-12-05 13:16:00,6095.0,2.7,5.4,2.6,5.2,6091.63,6092.2,6091.61,6091.91,...,,7.95,2.65,5.3,6095.0,2024-12-05,3.25,5.05,8.3,6092.51
124326,2024-12-05 13:17:00,6095.0,2.2,6.3,2.15,6.1,6091.89,6091.93,6090.32,6090.47,...,,8.375,2.175,6.2,6095.0,2024-12-05,3.25,5.05,8.3,6092.51
124327,2024-12-05 13:18:00,6095.0,1.9,7.0,1.85,6.8,6090.33,6090.35,6089.43,6089.43,...,,8.775,1.875,6.9,6095.0,2024-12-05,3.25,5.05,8.3,6092.51
124328,2024-12-05 13:19:00,6095.0,1.5,8.2,1.4,8.0,6089.36,6089.58,6088.17,6088.17,...,,9.55,1.45,8.1,6095.0,2024-12-05,3.25,5.05,8.3,6092.51


In [148]:
# df5 = pd.concat([df31, df32], ignore_index=True)

In [149]:
# df32 = exit_strategy1(df32)

In [412]:
df5 = df31.copy()

In [413]:
# Sort by 'quote_datetime'
df5 = df5.sort_values(by='quote_datetime').reset_index(drop=True)

In [416]:
df5

Unnamed: 0,quote_datetime,strike,ask_C,ask_P,bid_C,bid_P,Open,High,Low,Close,...,first_time,straddle_exit,call_price,put_price,sell_strike,date,sell_call,sell_put,sell_straddle,sell_spot
0,2018-01-02 10:47:00,2690.0,3.1,2.85,2.80,2.65,2690.39,2690.58,2690.08,2690.15,...,10:40:00,5.700,2.950,2.750,2690.0,2018-01-02,4.40,1.90,6.3,2692.33
1,2018-01-02 10:48:00,2690.0,3.4,2.60,3.00,2.50,2690.13,2690.57,2690.12,2690.57,...,10:40:00,5.750,3.200,2.550,2690.0,2018-01-02,4.40,1.90,6.3,2692.33
2,2018-01-02 10:49:00,2690.0,3.3,2.70,2.90,2.50,2690.57,2690.67,2690.34,2690.40,...,10:40:00,5.700,3.100,2.600,2690.0,2018-01-02,4.40,1.90,6.3,2692.33
3,2018-01-02 10:50:00,2690.0,3.3,2.60,3.00,2.55,2690.39,2690.71,2690.30,2690.40,...,10:40:00,5.725,3.150,2.575,2690.0,2018-01-02,4.40,1.90,6.3,2692.33
4,2018-01-02 10:51:00,2690.0,3.4,2.60,3.00,2.40,2690.48,2690.77,2690.46,2690.61,...,10:40:00,5.700,3.200,2.500,2690.0,2018-01-02,4.40,1.90,6.3,2692.33
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124324,2024-12-05 13:15:00,6095.0,2.6,5.60,2.55,5.40,6092.07,6092.13,6091.62,6091.84,...,,8.075,2.575,5.500,6095.0,2024-12-05,3.25,5.05,8.3,6092.51
124325,2024-12-05 13:16:00,6095.0,2.7,5.40,2.60,5.20,6091.63,6092.20,6091.61,6091.91,...,,7.950,2.650,5.300,6095.0,2024-12-05,3.25,5.05,8.3,6092.51
124326,2024-12-05 13:17:00,6095.0,2.2,6.30,2.15,6.10,6091.89,6091.93,6090.32,6090.47,...,,8.375,2.175,6.200,6095.0,2024-12-05,3.25,5.05,8.3,6092.51
124327,2024-12-05 13:18:00,6095.0,1.9,7.00,1.85,6.80,6090.33,6090.35,6089.43,6089.43,...,,8.775,1.875,6.900,6095.0,2024-12-05,3.25,5.05,8.3,6092.51


In [417]:
# Calculate the number of unique days
num_unique_days = df5['quote_datetime'].dt.date.nunique()

# Display the result
print(num_unique_days)

754


In [420]:
def exit_strategy4(df):
    filtered_df_list = []
    
    # Group by each date
    for date, group in df.groupby('date'):
        first_put = group['put_price'].iloc[0]
        
        # Step 1: Find the timestamp where 'put_price' drops to 0.5 * first_put
        drop_idx = group[group['put_price'] <= 0.5 * first_put].index.min()
                
        threshold = 1.5 * first_put
        group['adjusted_threshold'] = threshold
        
        # If the drop index is found, adjust the threshold after this point
        if pd.notna(drop_idx):
            # Set 1 * first_put after the drop_idx
            group.loc[drop_idx + 1:, 'adjusted_threshold'] = 1 * first_put
        # else:
        #     # Otherwise, use the original threshold for the entire group
        #     group['adjusted_threshold'] = threshold
        
        # Step 2: Calculate the condition and filter the rows accordingly
        condition = group['put_price'] < group['adjusted_threshold']
        
        # Step 3: Find the first row where the condition fails and filter accordingly
        if condition.any():
            first_fail_idx = condition[~condition].index[0] if (~condition).any() else None
            
            # Keep rows before the first failure (inclusive of the row where it fails)
            if first_fail_idx:
                filtered_group = group.loc[:first_fail_idx]
            else:
                filtered_group = group  # Keep the entire group if the condition never fails
            
            filtered_df_list.append(filtered_group)
    
    return pd.concat(filtered_df_list, ignore_index=True)

In [422]:
df4 = exit_strategy4(df5)

In [423]:
# df4 = df5.copy()

In [424]:
df4.head()

Unnamed: 0,quote_datetime,strike,ask_C,ask_P,bid_C,bid_P,Open,High,Low,Close,...,straddle_exit,call_price,put_price,sell_strike,date,sell_call,sell_put,sell_straddle,sell_spot,adjusted_threshold
0,2018-01-02 10:47:00,2690.0,3.1,2.85,2.8,2.65,2690.39,2690.58,2690.08,2690.15,...,5.7,2.95,2.75,2690.0,2018-01-02,4.4,1.9,6.3,2692.33,4.125
1,2018-01-02 10:48:00,2690.0,3.4,2.6,3.0,2.5,2690.13,2690.57,2690.12,2690.57,...,5.75,3.2,2.55,2690.0,2018-01-02,4.4,1.9,6.3,2692.33,4.125
2,2018-01-02 10:49:00,2690.0,3.3,2.7,2.9,2.5,2690.57,2690.67,2690.34,2690.4,...,5.7,3.1,2.6,2690.0,2018-01-02,4.4,1.9,6.3,2692.33,4.125
3,2018-01-02 10:50:00,2690.0,3.3,2.6,3.0,2.55,2690.39,2690.71,2690.3,2690.4,...,5.725,3.15,2.575,2690.0,2018-01-02,4.4,1.9,6.3,2692.33,4.125
4,2018-01-02 10:51:00,2690.0,3.4,2.6,3.0,2.4,2690.48,2690.77,2690.46,2690.61,...,5.7,3.2,2.5,2690.0,2018-01-02,4.4,1.9,6.3,2692.33,4.125


In [425]:
# df4

In [426]:
# def close_lbb(df):
#     df['Close_at_LBB'] = np.nan  # Initialize the column with NaN values
    
#     # Group by each date
#     for date, group in df.groupby('date'):
#         # Step 1: Get the first put price for the date
#         first_put = group['put_price'].iloc[0]
        
#         # Step 2: Find the timestamp where 'Close' drops to the Lower_Band
#         drop_idx = group[group['Close'] <= group['Lower_Band']].index.min()
        
#         if pd.notna(drop_idx):
#             close_value = group.loc[drop_idx, 'Close']
#             df.loc[group.index, 'Close_at_LBB'] = close_value
    
#     return df

In [427]:
# df4 = close_lbb(df4)

In [428]:
# df4[df4['Close_at_LBB'].notna()]

In [436]:
# df4 = calculate_day_lbb(df4)

In [437]:
df4.head()

Unnamed: 0,quote_datetime,strike,ask_C,ask_P,bid_C,bid_P,Open,High,Low,Close,...,straddle_exit,call_price,put_price,sell_strike,date,sell_call,sell_put,sell_straddle,sell_spot,adjusted_threshold
0,2018-01-02 10:47:00,2690.0,3.1,2.85,2.8,2.65,2690.39,2690.58,2690.08,2690.15,...,5.7,2.95,2.75,2690.0,2018-01-02,4.4,1.9,6.3,2692.33,4.125
1,2018-01-02 10:48:00,2690.0,3.4,2.6,3.0,2.5,2690.13,2690.57,2690.12,2690.57,...,5.75,3.2,2.55,2690.0,2018-01-02,4.4,1.9,6.3,2692.33,4.125
2,2018-01-02 10:49:00,2690.0,3.3,2.7,2.9,2.5,2690.57,2690.67,2690.34,2690.4,...,5.7,3.1,2.6,2690.0,2018-01-02,4.4,1.9,6.3,2692.33,4.125
3,2018-01-02 10:50:00,2690.0,3.3,2.6,3.0,2.55,2690.39,2690.71,2690.3,2690.4,...,5.725,3.15,2.575,2690.0,2018-01-02,4.4,1.9,6.3,2692.33,4.125
4,2018-01-02 10:51:00,2690.0,3.4,2.6,3.0,2.4,2690.48,2690.77,2690.46,2690.61,...,5.7,3.2,2.5,2690.0,2018-01-02,4.4,1.9,6.3,2692.33,4.125


In [438]:
df4.columns

Index(['quote_datetime', 'strike', 'ask_C', 'ask_P', 'bid_C', 'bid_P', 'Open',
       'High', 'Low', 'Close', 'day_close', 'prev_close', 'PDL', 'PDH',
       'prev_day_neg', 'range', 'realized_volatility_post30min',
       'realized_volatility_full_day', 'sma_50', 'sma_21', 'below_sma_50',
       'gap_new931', 'intraday_sma_11', 'intraday_sma_20', 'gap_new931931',
       'date1', 'high931', 'low931', 'close931', 'open931', 'time',
       'first_higher_high_time', 'day_low_till_entry', 'Rolling_MA',
       'Rolling_STD', 'Upper_Band', 'Lower_Band', 'Lower_Band3', 'low_find',
       'first_higher_high_time_10', 'first_time', 'straddle_exit',
       'call_price', 'put_price', 'sell_strike', 'date', 'sell_call',
       'sell_put', 'sell_straddle', 'sell_spot', 'adjusted_threshold'],
      dtype='object')

In [441]:
# df4['putlbb'] = df4['put_price'].where(df4['quote_datetime'].dt.time == df4['lbb_cross_time'])
# df4['putlbb3'] = df4['put_price'].where(df4['quote_datetime'].dt.time == df4['lbb3_cross_time'])

In [444]:
df4['put10'] = df4['put_price'].where(df4['quote_datetime'].dt.time == df4['first_higher_high_time_10'])
df4['call10'] = df4['call_price'].where(df4['quote_datetime'].dt.time == df4['first_higher_high_time_10'])
df4['spot10'] = df4['Close'].where(df4['quote_datetime'].dt.time == df4['first_higher_high_time_10'])

In [445]:
# df4[df4['putlbb'].notna()].head()

In [448]:
# Identify the last row for each date
is_last_row = df4.groupby('date')['quote_datetime'].transform('idxmax') == df4.index

# Get the 'putlbb' value for the row where time equals 'lbb_cross_time' for each date
lbb_values = df4[df4['quote_datetime'].dt.time == df4['first_higher_high_time_10']].set_index('date')['put10']

# Get the 'putlbb3' value for the row where time equals 'lbb3_cross_time' for each date
lbb3_values = df4[df4['quote_datetime'].dt.time == df4['first_higher_high_time_10']].set_index('date')['call10']

# Get the 'putlbb3' value for the row where time equals 'lbb3_cross_time' for each date
lbb3_values1 = df4[df4['quote_datetime'].dt.time == df4['first_higher_high_time_10']].set_index('date')['spot10']

# Fill the last row of each date with the corresponding values
df4.loc[is_last_row, 'put10'] = df4['date'].map(lbb_values)
df4.loc[is_last_row, 'call10'] = df4['date'].map(lbb3_values)
df4.loc[is_last_row, 'spot10'] = df4['date'].map(lbb3_values1)

In [449]:
df4

Unnamed: 0,quote_datetime,strike,ask_C,ask_P,bid_C,bid_P,Open,High,Low,Close,...,sell_strike,date,sell_call,sell_put,sell_straddle,sell_spot,adjusted_threshold,put10,call10,spot10
0,2018-01-02 10:47:00,2690.0,3.10,2.85,2.80,2.65,2690.39,2690.58,2690.08,2690.15,...,2690.0,2018-01-02,4.40,1.90,6.3,2692.33,4.125,2.75,2.95,2690.15
1,2018-01-02 10:48:00,2690.0,3.40,2.60,3.00,2.50,2690.13,2690.57,2690.12,2690.57,...,2690.0,2018-01-02,4.40,1.90,6.3,2692.33,4.125,,,
2,2018-01-02 10:49:00,2690.0,3.30,2.70,2.90,2.50,2690.57,2690.67,2690.34,2690.40,...,2690.0,2018-01-02,4.40,1.90,6.3,2692.33,4.125,,,
3,2018-01-02 10:50:00,2690.0,3.30,2.60,3.00,2.55,2690.39,2690.71,2690.30,2690.40,...,2690.0,2018-01-02,4.40,1.90,6.3,2692.33,4.125,,,
4,2018-01-02 10:51:00,2690.0,3.40,2.60,3.00,2.40,2690.48,2690.77,2690.46,2690.61,...,2690.0,2018-01-02,4.40,1.90,6.3,2692.33,4.125,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113108,2024-12-05 13:14:00,6095.0,2.75,5.30,2.65,5.20,6092.46,6092.51,6092.03,6092.04,...,6095.0,2024-12-05,3.25,5.05,8.3,6092.51,6.900,,,
113109,2024-12-05 13:15:00,6095.0,2.60,5.60,2.55,5.40,6092.07,6092.13,6091.62,6091.84,...,6095.0,2024-12-05,3.25,5.05,8.3,6092.51,6.900,,,
113110,2024-12-05 13:16:00,6095.0,2.70,5.40,2.60,5.20,6091.63,6092.20,6091.61,6091.91,...,6095.0,2024-12-05,3.25,5.05,8.3,6092.51,6.900,,,
113111,2024-12-05 13:17:00,6095.0,2.20,6.30,2.15,6.10,6091.89,6091.93,6090.32,6090.47,...,6095.0,2024-12-05,3.25,5.05,8.3,6092.51,6.900,,,


In [451]:
df_trade2 = df4.groupby('date').tail(1).reset_index(drop=True)

In [454]:
# Calculate the number of unique days
num_unique_days = df_trade2['quote_datetime'].dt.date.nunique()

# Display the result
print(num_unique_days)

754


In [456]:
df_trade2[df_trade2['day_close'].isna()]

Unnamed: 0,quote_datetime,strike,ask_C,ask_P,bid_C,bid_P,Open,High,Low,Close,...,sell_strike,date,sell_call,sell_put,sell_straddle,sell_spot,adjusted_threshold,put10,call10,spot10


In [458]:
df_trade2

Unnamed: 0,quote_datetime,strike,ask_C,ask_P,bid_C,bid_P,Open,High,Low,Close,...,sell_strike,date,sell_call,sell_put,sell_straddle,sell_spot,adjusted_threshold,put10,call10,spot10
0,2018-01-02 11:33:00,2690.0,2.15,3.00,1.95,2.70,2689.30,2689.30,2689.02,2689.06,...,2690.0,2018-01-02,4.40,1.900,6.300,2692.33,4.125,2.75,2.95,2690.15
1,2018-01-03 10:55:00,2705.0,3.20,1.80,2.90,1.65,2706.40,2706.60,2706.34,2706.53,...,2705.0,2018-01-03,3.85,1.350,5.200,2707.56,1.725,1.15,4.30,2708.24
2,2018-01-05 15:44:00,2735.0,4.30,0.25,3.00,0.15,2739.31,2739.37,2738.82,2738.82,...,2735.0,2018-01-05,1.85,2.700,4.550,2734.13,3.975,2.65,1.80,2734.19
3,2018-01-08 15:30:00,2740.0,7.40,0.10,5.40,0.05,2745.88,2745.97,2745.68,2745.69,...,2740.0,2018-01-08,3.80,1.475,5.275,2742.35,1.575,1.05,4.60,2743.61
4,2018-01-10 10:50:00,2745.0,2.35,3.90,2.20,3.60,2743.74,2743.81,2743.13,2743.34,...,2745.0,2018-01-10,3.70,2.500,6.200,2745.83,5.175,3.45,2.45,2743.77
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
749,2024-11-22 13:41:00,5970.0,4.10,8.90,4.00,8.70,5966.47,5966.47,5965.62,5966.02,...,5970.0,2024-11-22,8.40,5.750,14.150,5972.61,9.450,6.30,6.95,5970.33
750,2024-11-26 16:00:00,6010.0,13.00,0.05,10.20,0.00,6023.07,6023.35,6020.28,6020.74,...,6010.0,2024-11-26,6.15,7.400,13.550,6008.09,10.875,7.25,6.25,6009.31
751,2024-12-02 14:17:00,6050.0,0.80,8.80,0.75,8.50,6043.45,6043.45,6042.08,6042.10,...,6050.0,2024-12-02,4.25,6.600,10.850,6047.18,8.550,5.70,4.55,6048.64
752,2024-12-04 13:33:00,6070.0,3.40,9.90,3.30,9.70,6066.20,6066.65,6063.96,6063.96,...,6070.0,2024-12-04,8.35,5.900,14.250,6072.72,9.000,6.00,7.90,6072.64


In [460]:
# Update 'call_price' based on the condition
df_trade2['call_price'] = np.where(
    (df_trade2['time'] == pd.Timestamp('16:00').time()) & ~df_trade2['day_close'].isna(),
    np.maximum(df_trade2['day_close'] - df_trade2['strike'], 0),
    df_trade2['call_price']  # Set to NaN if 'time' is not 16:00 or 'day_close' is NaN
)

In [462]:
df_trade2['put_price'] = np.where(
    (df_trade2['time'] == pd.Timestamp('16:00').time()) & ~df_trade2['day_close'].isna(),
    np.maximum(-df_trade2['day_close'] + df_trade2['strike'], 0),
    df_trade2['put_price']  # Set to NaN if 'time' is not 16:00 or 'day_close' is NaN
)

In [464]:
df_trade2['call_pnl'] = df_trade2['call10'] - df_trade2['call_price']
df_trade2['put_pnl'] = df_trade2['put10'] - df_trade2['put_price']
df_trade2['straddle_pnl'] = df_trade2['put_pnl'] + df_trade2['call_pnl']

In [466]:
df_trade2

Unnamed: 0,quote_datetime,strike,ask_C,ask_P,bid_C,bid_P,Open,High,Low,Close,...,sell_put,sell_straddle,sell_spot,adjusted_threshold,put10,call10,spot10,call_pnl,put_pnl,straddle_pnl
0,2018-01-02 11:33:00,2690.0,2.15,3.00,1.95,2.70,2689.30,2689.30,2689.02,2689.06,...,1.900,6.300,2692.33,4.125,2.75,2.95,2690.15,0.900,-0.100,0.800
1,2018-01-03 10:55:00,2705.0,3.20,1.80,2.90,1.65,2706.40,2706.60,2706.34,2706.53,...,1.350,5.200,2707.56,1.725,1.15,4.30,2708.24,1.250,-0.575,0.675
2,2018-01-05 15:44:00,2735.0,4.30,0.25,3.00,0.15,2739.31,2739.37,2738.82,2738.82,...,2.700,4.550,2734.13,3.975,2.65,1.80,2734.19,-1.850,2.450,0.600
3,2018-01-08 15:30:00,2740.0,7.40,0.10,5.40,0.05,2745.88,2745.97,2745.68,2745.69,...,1.475,5.275,2742.35,1.575,1.05,4.60,2743.61,-1.800,0.975,-0.825
4,2018-01-10 10:50:00,2745.0,2.35,3.90,2.20,3.60,2743.74,2743.81,2743.13,2743.34,...,2.500,6.200,2745.83,5.175,3.45,2.45,2743.77,0.175,-0.300,-0.125
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
749,2024-11-22 13:41:00,5970.0,4.10,8.90,4.00,8.70,5966.47,5966.47,5965.62,5966.02,...,5.750,14.150,5972.61,9.450,6.30,6.95,5970.33,2.900,-2.500,0.400
750,2024-11-26 16:00:00,6010.0,13.00,0.05,10.20,0.00,6023.07,6023.35,6020.28,6020.74,...,7.400,13.550,6008.09,10.875,7.25,6.25,6009.31,-5.380,7.250,1.870
751,2024-12-02 14:17:00,6050.0,0.80,8.80,0.75,8.50,6043.45,6043.45,6042.08,6042.10,...,6.600,10.850,6047.18,8.550,5.70,4.55,6048.64,3.775,-2.950,0.825
752,2024-12-04 13:33:00,6070.0,3.40,9.90,3.30,9.70,6066.20,6066.65,6063.96,6063.96,...,5.900,14.250,6072.72,9.000,6.00,7.90,6072.64,4.550,-3.800,0.750


In [468]:
# df_trade2['call_price_eod'] = (df_trade2['day_close', 'intraday_sma_20'] - df_trade2['strike']).clip(lower=0)

In [470]:
df_trade2['call_pnl'] *= -1

In [472]:
# final_df = pd.concat([df3_isna1, df3_notna_call_above1, df3_notna_call_below1], ignore_index=True)

In [474]:
df_trade2 = df_trade2.sort_values(by='quote_datetime').reset_index(drop=True)

In [476]:
df_trade2

Unnamed: 0,quote_datetime,strike,ask_C,ask_P,bid_C,bid_P,Open,High,Low,Close,...,sell_put,sell_straddle,sell_spot,adjusted_threshold,put10,call10,spot10,call_pnl,put_pnl,straddle_pnl
0,2018-01-02 11:33:00,2690.0,2.15,3.00,1.95,2.70,2689.30,2689.30,2689.02,2689.06,...,1.900,6.300,2692.33,4.125,2.75,2.95,2690.15,-0.900,-0.100,0.800
1,2018-01-03 10:55:00,2705.0,3.20,1.80,2.90,1.65,2706.40,2706.60,2706.34,2706.53,...,1.350,5.200,2707.56,1.725,1.15,4.30,2708.24,-1.250,-0.575,0.675
2,2018-01-05 15:44:00,2735.0,4.30,0.25,3.00,0.15,2739.31,2739.37,2738.82,2738.82,...,2.700,4.550,2734.13,3.975,2.65,1.80,2734.19,1.850,2.450,0.600
3,2018-01-08 15:30:00,2740.0,7.40,0.10,5.40,0.05,2745.88,2745.97,2745.68,2745.69,...,1.475,5.275,2742.35,1.575,1.05,4.60,2743.61,1.800,0.975,-0.825
4,2018-01-10 10:50:00,2745.0,2.35,3.90,2.20,3.60,2743.74,2743.81,2743.13,2743.34,...,2.500,6.200,2745.83,5.175,3.45,2.45,2743.77,-0.175,-0.300,-0.125
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
749,2024-11-22 13:41:00,5970.0,4.10,8.90,4.00,8.70,5966.47,5966.47,5965.62,5966.02,...,5.750,14.150,5972.61,9.450,6.30,6.95,5970.33,-2.900,-2.500,0.400
750,2024-11-26 16:00:00,6010.0,13.00,0.05,10.20,0.00,6023.07,6023.35,6020.28,6020.74,...,7.400,13.550,6008.09,10.875,7.25,6.25,6009.31,5.380,7.250,1.870
751,2024-12-02 14:17:00,6050.0,0.80,8.80,0.75,8.50,6043.45,6043.45,6042.08,6042.10,...,6.600,10.850,6047.18,8.550,5.70,4.55,6048.64,-3.775,-2.950,0.825
752,2024-12-04 13:33:00,6070.0,3.40,9.90,3.30,9.70,6066.20,6066.65,6063.96,6063.96,...,5.900,14.250,6072.72,9.000,6.00,7.90,6072.64,-4.550,-3.800,0.750


In [478]:
df_trade2 = df_trade2[df_trade2['call_price'] >= 0]

In [480]:
df_trade2 = df_trade2[df_trade2['put_price'] >= 0]

In [482]:
# df_trade2.head(6).to_csv('che.csv')

In [484]:
# df_filter.heed1['time'] = df_filtered1['quote_datetime'].dt.time

In [486]:
# df_trade2['straddle_pnl_bps'] =  (df_trade2['straddle_pnl']/ df_trade2['sell_spot'])*10000
df_trade2['put_pnl_bps'] =  (df_trade2['put_pnl']/ df_trade2['sell_spot'])*10000
df_trade2['call_pnl_bps'] =  (df_trade2['call_pnl']/ df_trade2['sell_spot'])*10000

In [488]:
df_trade2.columns

Index(['quote_datetime', 'strike', 'ask_C', 'ask_P', 'bid_C', 'bid_P', 'Open',
       'High', 'Low', 'Close', 'day_close', 'prev_close', 'PDL', 'PDH',
       'prev_day_neg', 'range', 'realized_volatility_post30min',
       'realized_volatility_full_day', 'sma_50', 'sma_21', 'below_sma_50',
       'gap_new931', 'intraday_sma_11', 'intraday_sma_20', 'gap_new931931',
       'date1', 'high931', 'low931', 'close931', 'open931', 'time',
       'first_higher_high_time', 'day_low_till_entry', 'Rolling_MA',
       'Rolling_STD', 'Upper_Band', 'Lower_Band', 'Lower_Band3', 'low_find',
       'first_higher_high_time_10', 'first_time', 'straddle_exit',
       'call_price', 'put_price', 'sell_strike', 'date', 'sell_call',
       'sell_put', 'sell_straddle', 'sell_spot', 'adjusted_threshold', 'put10',
       'call10', 'spot10', 'call_pnl', 'put_pnl', 'straddle_pnl',
       'put_pnl_bps', 'call_pnl_bps'],
      dtype='object')

In [490]:
df5 = df_trade2[['quote_datetime', 'date', 'time', 'sell_strike', 'low931', 'high931', 'open931', 'close931', 'Close', 'sell_spot', 'gap_new931', 'realized_volatility_post30min', 'prev_close', 'PDL', 'PDH', 'gap_new931', 'prev_day_neg', 'range', 'realized_volatility_full_day', 'sma_50', 'sma_21', 'intraday_sma_11', 'below_sma_50', 'day_close', 'intraday_sma_20', 'sell_straddle', 'straddle_exit', 'straddle_pnl', 'first_higher_high_time', 'first_higher_high_time_10', 'sell_put', 'put_price', 'put_pnl', 'put_pnl_bps', 'sell_call', 'call_price', 'call_pnl', 'call_pnl_bps', 'day_low_till_entry', 'low_find', 'put10', 'call10', 'spot10', 'first_time']]

In [492]:
# Calculate the number of unique days
num_unique_days = df5['quote_datetime'].dt.date.nunique()

# Display the result
print(num_unique_days)

754


In [494]:
df5['year'] = df5['quote_datetime'].dt.year

In [496]:
# df5[df5['Close_at_LBB'].isna()]

In [498]:
# df5 = df5[df5['below_sma_50'] == 'Yes']

In [500]:
df5 = df5[df5['close931']-df5['PDL']>0]

In [502]:
# Calculate the number of unique days
num_unique_days = df5['quote_datetime'].dt.date.nunique()

# Display the result
print(num_unique_days)

646


In [504]:
filtered_df = df5[
        (df5['realized_volatility_full_day'] > 0.1) |  # Either 'rv' is greater than 0.1
        ((df5['realized_volatility_full_day'] <= 0.1) & (df5['prev_day_neg'] == "no"))  # Or 'rv' <= 0.1 and 'prev_Day_neg' is "no"
    ]

In [506]:
# Calculate the number of unique days
num_unique_days = filtered_df['quote_datetime'].dt.date.nunique()

# Display the result
print(num_unique_days)

533


In [508]:
# Initialize an empty DataFrame to store yearly drawdown information
yearly_drawdowns = []

# Group the DataFrame by year and calculate the drawdowns
for year, group in filtered_df.groupby('year'):
    # Calculate the cumulative sum of pnl_bps
    group['cumulative_pnl'] = group['put_pnl_bps'].cumsum()
    
    # Calculate the running maximum of the cumulative pnl
    group['running_max'] = group['cumulative_pnl'].cummax()
    
    # Calculate the drawdown as the difference between the running max and the current cumulative pnl
    group['drawdown'] = group['running_max'] - group['cumulative_pnl']
    
    # Find the maximum drawdown for the year
    max_drawdown = group['drawdown'].max()
    
    # Append the results to the list
    yearly_drawdowns.append({'year': year, 'max_drawdown': max_drawdown})

# Create a DataFrame from the yearly drawdown information
yearly_drawdowns_df = pd.DataFrame(yearly_drawdowns)

print(yearly_drawdowns_df)

   year  max_drawdown
0  2018      118.5666
1  2019       23.2678
2  2020       63.3892
3  2021       37.6702
4  2022      195.6631
5  2023       70.0221
6  2024       49.3396


In [510]:
filtered_df.to_excel('lbb_entry50stop_alldays_50decay3_true.xlsx')

In [200]:
# 279 low break hua 

In [201]:
# 266

In [202]:
# rv< 0.1 then pdp, rv> 0.1