# Hypothesis Testing:
* How often can we expect being able to close a trade for profit (5%, 10%, 15%) by 1pm if we open that morning

In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import datetime

In [None]:
# Enter Parameters
start_date = '2024-01-01'
end_date = '2024-12-31'

# read_from_filepath = r"ENTER FILE PATH TO READ DATA FROM"
# save_to_filepath = r"ENTER FILE PATH TO SAVE TO"

## How often can we expect being able to close a trade for profit (5%, 10%, 15%) by 1pm if we open that morning

In [3]:
def check_profitable_exit(df, start_time='09:31:00', end_time='16:00:00', profit_targets=[0.05, 0.10, 0.15,0.20,0.25,0.30,0.35,0.40,0.45,0.50]):
    # Create a list to store the results for each entry point
    results = []
    
    # Get today's date from the 'Date' column for comparison
    df['Date'] = pd.to_datetime(df['Date'])  # Ensure 'Date' is in datetime format

    # Extract expiration date from symbol and calculate DTE
    def calculate_dte(symbol, current_date):
        try:
            # Extract the 6-digit expiration date (YYMMDD) from after the 9th position starting from the right
            expiration_str = symbol[-15:-9]  # Extract YYMMDD from the symbol
            expiration_date = pd.to_datetime('20' + expiration_str, format='%Y%m%d')  # Parse to full date
            dte = (expiration_date - current_date).days  # Calculate days to expiration
            return dte
        except Exception as e:
            print(f"Error parsing DTE for symbol {symbol}: {e}")
            return None

    # Calculate DTE based on the contract symbol and Date field in the DataFrame
    df['DTE'] = df.apply(lambda row: calculate_dte(row['symbol'], row['Date']), axis=1)

    # Drop rows where DTE could not be calculated
    df = df.dropna(subset=['DTE'])

    # Add a 'Day of Week' column based on 'Date' (Monday=0, Sunday=6)
    df['Day of Week'] = df['Date'].dt.day_name()

    # Add 'Before 1 PM' column: True for times before 1 PM, False otherwise
    df['Before 1 PM'] = df['Time'] < datetime.time(13, 0)

    # Ensure 'Time' is correctly formatted for filtering
    df['Time'] = pd.to_datetime(df['Time'], format='%H:%M:%S').dt.time

    # Filter data for entries within the defined time range
    df_entry_points = df[(df['Time'] >= datetime.time.fromisoformat(start_time)) & (df['Time'] <= datetime.time.fromisoformat(end_time))]

    # Generate 10-minute interval time points
    entry_times = pd.date_range(start=start_time, end=end_time, freq='10min').time

    for entry_time in entry_times:
        # Filter only relevant entry rows for this time using the 'Time' field
        entry_row_candidates = df_entry_points[df_entry_points['Time'] >= entry_time].groupby('symbol').first()

        for symbol, entry_row in entry_row_candidates.iterrows():
            entry_price = entry_row['c']  # 'c' represents close price at entry
            contract_symbol = symbol  # Contract symbol is from groupby index
            moneyness = entry_row['moneyness']  # Moneyness (ITM, ATM, OTM)
            option_type = entry_row['Option Type']  # Call or Put
            strike_distance = entry_row['Strike Distance']  # Strike distance
            date = entry_row['Date']  # Date of the entry
            time = entry_row['Time']  # Time of the entry
            day_of_week = entry_row['Day of Week']  # Day of the week
            before_1pm = entry_row['Before 1 PM']  # True if before 1 PM, else False
            dte = entry_row['DTE']  # Days to expiration

            # Determine profit targets based on entry price
            target_prices = {f'{int(profit * 100)}%': entry_price * (1 + profit) for profit in profit_targets}

            # Filter data from entry time until the end time for this contract
            df_after_entry = df[(df['Time'] >= entry_time) & 
                                (df['Time'] <= datetime.time.fromisoformat(end_time)) & 
                                (df['symbol'] == contract_symbol)]

            # Calculate the maximum close price between entry time and end time
            max_close_price = df_after_entry['c'].max()

            # Check if any of the profit targets are hit by comparing to the maximum close price
            profit_achieved = {target: max_close_price >= price for target, price in target_prices.items()}

            # Consolidate into a single result for each symbol per entry time
            results.append({
                'Date': date,  # Include date
                'Day of Week': day_of_week,  # Include day of week
                'Time': time,  # Include time
                'Before 1 PM': 'Y' if before_1pm else 'N',  # Indicate before or after 1 PM
                'entry_time': entry_time,  # Store entry time as is
                'contract_symbol': contract_symbol,
                'moneyness': moneyness,  # Include moneyness
                'Option Type': option_type,  # Include option type
                'Strike Distance': strike_distance,  # Include strike distance
                'DTE': dte,  # Include DTE (Days to Expiration)
                'entry_price': entry_price,
                **profit_achieved
            })

    # Convert results to DataFrame for better readability
    results_df = pd.DataFrame(results)
    return results_df

In [None]:
# Step 1: Load the CSV just once
df_processed = pd.read_csv(read_from_filepath)

# Step 2: Convert 't' to datetime format and adjust time to EST
df_processed['t'] = pd.to_datetime(df_processed['t']).dt.tz_localize('UTC').dt.tz_convert('America/New_York').dt.tz_localize(None)

# Step 3: Create 'Date' and 'Time' columns
df_processed['Date'] = df_processed['t'].dt.date  # Extract just the date
df_processed['Time'] = df_processed['t'].dt.time  # Extract just the time

# Step 4: Add 'Option Type' column based on contract symbol
df_processed['Option Type'] = df_processed['symbol'].apply(lambda x: 'Call' if x[9] == 'C' else 'Put')

# Step 5: Calculate 'Strike Distance'
df_processed['Strike Distance'] = df_processed['strike_price'] - df_processed['close']
df_processed['Strike Distance'] = np.floor(df_processed['Strike Distance']).astype(int)

# Function to filter data by month and run the analysis
def process_monthly_data(df_processed, start_month, end_month):
    results_all = []
    current_date = pd.to_datetime(df_processed['Date'].min())  # Get the minimum date in the data

    # Loop through each month between start_month and end_month
    while current_date <= end_month:
        # Get the first day of the current month
        month_start = current_date.replace(day=1)

        # Get the last day of the current month
        month_end = (month_start + pd.DateOffset(months=1)) - pd.DateOffset(days=1)

        # Filter df_processed to include only data from the current month
        df_filtered = df_processed[
            (df_processed['Date'] >= month_start.date()) &
            (df_processed['Date'] <= month_end.date())
        ]

        print(f"Processing data from {month_start.date()} to {month_end.date()} with {len(df_filtered)} rows.")

        # Run the analysis on the filtered data
        if not df_filtered.empty:
            result_df = check_profitable_exit(df_filtered)
            results_all.append(result_df)

        # Move to the next month
        current_date += pd.DateOffset(months=1)

    # Concatenate all the monthly results into one final DataFrame
    if results_all:
        final_results = pd.concat(results_all, ignore_index=True)
    else:
        final_results = pd.DataFrame()

    return final_results

# Specify the start and end months for the 9-month period
start_month = pd.to_datetime(start_date)
end_month = pd.to_datetime(end_date)

# Run the monthly processing function
final_result = process_monthly_data(df_processed, start_month, end_month)

# Save the final result to a CSV or display it
final_result.to_csv(save_to_filepath, index=False)
