In [1]:
# ====================== IMPORTS ======================
import pandas as pd
import numpy as np
import warnings
import plotly.graph_objects as go
from datetime import datetime
from data.stock_data import get_stock_data
from analysis.metrics import (
    calculate_volume_metrics,
    calculate_price_changes,
    identify_breakout_signals,
    calculate_forward_returns,
    generate_breakout_summary
)

warnings.filterwarnings("ignore")



In [2]:
# ====================== DATA FETCHING ======================
# Get NVIDIA data for 2023
start_date = datetime(2023, 1, 1)
end_date = datetime.now()
ticker = "AAPL"

df = get_stock_data(ticker, start_date, end_date, holding_period=10)
print("Initial Data Check:")
print("Data Shape:", df.shape)
print("\nFirst few rows:")
print(df.head())
print("\nData Types:")
print(df.dtypes)


2025-01-01 00:31:53.600 
  command:

    streamlit run c:\Users\shrey\anaconda3\envs\quant\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]


Initial Data Check:
Data Shape: (502, 5)

First few rows:
Price            Close        High         Low        Open     Volume
Date                                                                 
2023-01-03  123.768448  129.537772  122.877812  128.924229  112117500
2023-01-04  125.045044  127.321112  123.778365  125.569527   89113600
2023-01-05  123.718979  126.440361  123.461690  125.807022   80962700
2023-01-06  128.271103  128.934129  123.590330  124.698677   87754700
2023-01-09  128.795563  132.021647  128.538274  129.112240   70790800

Data Types:
Price
Close     float64
High      float64
Low       float64
Open      float64
Volume      int64
dtype: object


In [3]:
# ====================== VOLUME METRICS TESTING ======================
# Test 1: Verify the shift operation
print("\nTesting Volume Metrics Calculation:")
df_vol = calculate_volume_metrics(df)

# Check if Volume_MA is actually shifted by 1 day
print("\nVerifying shift operation:")
print("Date | Volume | Raw 20-day MA | Shifted MA")
print("-" * 50)
raw_ma = df['Volume'].rolling(window=20).mean()
for i in range(20, 25):  # Look at a few days after we have enough data
    print(f"{df.index[i].date()} | {df['Volume'][i]:,.0f} | {raw_ma[i]:,.0f} | {df_vol['Volume_MA'][i]:,.0f}")

# Verify no lookahead bias
print("\nChecking for lookahead bias:")
future_date = df.index[25]
past_dates = df.index[5:25]
ma_value = df_vol.loc[future_date, 'Volume_MA']
volumes_used = df.loc[past_dates, 'Volume']
print(f"MA for {future_date.date()} uses data from: {past_dates[0].date()} to {past_dates[-1].date()}")
print(f"Calculated MA: {ma_value:,.0f}")
print(f"Manual MA: {volumes_used.mean():,.0f}")



Testing Volume Metrics Calculation:

Verifying shift operation:
Date | Volume | Raw 20-day MA | Shifted MA
--------------------------------------------------
2023-02-01 | 77,663,600 | 70,459,930 | 72,182,625
2023-02-02 | 118,339,000 | 71,921,200 | 70,459,930
2023-02-03 | 154,357,300 | 75,590,930 | 71,921,200
2023-02-06 | 69,858,300 | 74,696,110 | 75,590,930
2023-02-07 | 83,322,600 | 75,322,700 | 74,696,110

Checking for lookahead bias:
MA for 2023-02-08 uses data from: 2023-01-10 to 2023-02-07
Calculated MA: 75,322,700
Manual MA: 75,322,700


In [4]:

# ====================== PRICE CHANGES TESTING ======================
print("\nTesting Price Change Calculation:")
df_price = calculate_price_changes(df_vol)

# Verify price change calculation
print("\nVerifying price change calculations:")
print("Date | Close | Previous Close | Calculated Change | Manual Change")
print("-" * 80)
for i in range(1, 6):  # Check first 5 days
    curr_close = df['Close'][i]
    prev_close = df['Close'][i-1]
    calc_change = df_price['Price_Change_Pct'][i]
    manual_change = (curr_close - prev_close) / prev_close * 100
    print(f"{df.index[i].date()} | {curr_close:.2f} | {prev_close:.2f} | {calc_change:.2f}% | {manual_change:.2f}%")



Testing Price Change Calculation:

Verifying price change calculations:
Date | Close | Previous Close | Calculated Change | Manual Change
--------------------------------------------------------------------------------
2023-01-04 | 125.05 | 123.77 | 1.03% | 1.03%
2023-01-05 | 123.72 | 125.05 | -1.06% | -1.06%
2023-01-06 | 128.27 | 123.72 | 3.68% | 3.68%
2023-01-09 | 128.80 | 128.27 | 0.41% | 0.41%
2023-01-10 | 129.37 | 128.80 | 0.45% | 0.45%


In [5]:
# ====================== BREAKOUT SIGNAL TESTING ======================
# Test with specific thresholds
volume_threshold = 2.0
price_threshold = 2.0

df_signals = identify_breakout_signals(df_price, volume_threshold, price_threshold)

# Verify signal generation
print("\nVerifying breakout signals:")
breakouts = df_signals[df_signals['Is_Breakout']]
print(f"\nTotal breakout signals: {len(breakouts)}")
print("\nFirst 5 breakout days with conditions:")
print("Date | Volume Ratio | Price Change | Meets Volume? | Meets Price? | Is Breakout?")
print("-" * 100)
for idx in breakouts.index[:5]:
    vol_ratio = df_signals.loc[idx, 'Volume_Ratio']
    price_change = df_signals.loc[idx, 'Price_Change_Pct']
    meets_vol = vol_ratio > volume_threshold
    meets_price = price_change > price_threshold
    is_breakout = df_signals.loc[idx, 'Is_Breakout']
    print(f"{idx.date()} | {vol_ratio:.2f}x | {price_change:.2f}% | {meets_vol} | {meets_price} | {is_breakout}")



Verifying breakout signals:

Total breakout signals: 2

First 5 breakout days with conditions:
Date | Volume Ratio | Price Change | Meets Volume? | Meets Price? | Is Breakout?
----------------------------------------------------------------------------------------------------
2024-06-11 | 3.22x | 7.26% | True | True | True
2024-06-12 | 3.38x | 2.86% | True | True | True


In [6]:
# ====================== FORWARD RETURNS TESTING ======================
holding_period = 10
df_returns = calculate_forward_returns(df_signals, holding_period)

# Verify forward returns calculation
print("\nVerifying forward returns calculation:")
print("Breakout Date | Entry Price | Exit Date | Exit Price | Calculated Return | Manual Return")
print("-" * 100)
for idx in breakouts.index[:5]:
    entry_price = df_returns.loc[idx, 'Close']
    exit_idx = df_returns.index.get_loc(idx) + holding_period
    if exit_idx < len(df_returns):
        exit_date = df_returns.index[exit_idx]
        exit_price = df_returns.loc[exit_date, 'Close']
        calc_return = df_returns.loc[idx, 'Forward_Returns']
        manual_return = (exit_price - entry_price) / entry_price * 100
        print(f"{idx.date()} | {entry_price:.2f} | {exit_date.date()} | {exit_price:.2f} | {calc_return:.2f}% | {manual_return:.2f}%")



Verifying forward returns calculation:
Breakout Date | Entry Price | Exit Date | Exit Price | Calculated Return | Manual Return
----------------------------------------------------------------------------------------------------
2024-06-11 | 206.68 | 2024-06-26 | 212.77 | 2.94% | 2.94%
2024-06-12 | 212.59 | 2024-06-27 | 213.62 | 0.48% | 0.48%


In [7]:
# ====================== SUMMARY STATISTICS TESTING ======================
summary = generate_breakout_summary(df_returns)

print("\nStrategy Summary:")
for metric, value in summary.items():
    if isinstance(value, float):
        print(f"{metric}: {value:.2f}")
    else:
        print(f"{metric}: {value}")



Strategy Summary:
Total_Breakout_Days: 2
Average_Return: 1.71
Win_Rate: 100.00
Return_Std_Dev: 1.74
Best_Trade: 2.94
Worst_Trade: 0.48
Avg_Win_Size: 1.71
Avg_Loss_Size: 0
Avg_Days_Between_Signals: 1.00
Avg_Volume_Ratio: 3.30
Max_Volume_Ratio: 3.38
Min_Volume_Ratio: 3.22
Volume_Ratio_Std: 0.12
Avg_Price_Change: 5.06
Max_Price_Change: 7.26
Min_Price_Change: 2.86
First_Signal: 2024-06-11 00:00:00
Last_Signal: 2024-06-12 00:00:00
Total_Days_Analyzed: 1


In [8]:
# ====================== SIGNALS REPORT TESTING ======================
from analysis.metrics import generate_signals_report

print("\nTesting generate_signals_report function:")
summary_df, signals_df = generate_signals_report(df_returns)
print("\nSummary Statistics DataFrame:")
print("Shape:", summary_df.shape)
print("\nFirst few rows of summary:")
print(summary_df.head())

print("\nSignals DataFrame:")
print("Shape:", signals_df.shape)
print("\nColumns:", signals_df.columns.tolist())
print("\nFirst few signals:")
print(signals_df.head())

# Verify key metrics in both formats match
print("\nVerifying consistency between summary formats:")
summary_dict = dict(zip(summary_df['Metric'], summary_df['Value']))
original_summary = generate_breakout_summary(df_returns)

for key in ['Total_Breakout_Days', 'Average_Return', 'Win_Rate']:
    if key in summary_dict and key in original_summary:
        print(f"\n{key}:")
        print(f"In summary_df: {summary_dict[key]}")
        print(f"In original : {original_summary[key]:.2f}")

# Verify signal dates match breakout days
print("\nVerifying signal dates:")
breakout_dates = df_returns[df_returns['Is_Breakout']].index
signal_dates = signals_df.index
print(f"Number of breakout dates: {len(breakout_dates)}")
print(f"Number of signals in report: {len(signal_dates)}")
print(f"Dates match: {all(d in breakout_dates for d in signal_dates)}")

# Check for any missing or unexpected columns
expected_columns = [
    'Signal_Date', 'Entry_Price', 'Volume', 'Volume_MA_20', 
    'Volume_Ratio', 'Price_Change_Pct', 'Forward_Return',
    'Exit_Date', 'Exit_Price'
]

print("\nChecking for required columns:")
missing_cols = [col for col in expected_columns if col not in signals_df.columns]
extra_cols = [col for col in signals_df.columns if col not in expected_columns]
print(f"Missing columns: {missing_cols}")
print(f"Extra columns: {extra_cols}")

# Verify calculations for first signal
if len(signals_df) > 0:
    first_signal = signals_df.iloc[0]
    print("\nDetailed verification of first signal:")
    print(f"Signal Date: {first_signal.name}")
    print(f"Entry Price: {first_signal['Entry_Price']:.2f}")
    print(f"Exit Price: {first_signal['Exit_Price']:.2f}")
    print(f"Forward Return: {first_signal['Forward_Return']:.2f}%")
    print(f"Volume Ratio: {first_signal['Volume_Ratio']:.2f}x")
    
    # Manual calculation of return
    manual_return = (first_signal['Exit_Price'] - first_signal['Entry_Price']) / first_signal['Entry_Price'] * 100
    print(f"Manually calculated return: {manual_return:.2f}%")


Testing generate_signals_report function:

Summary Statistics DataFrame:
Shape: (19, 2)

First few rows of summary:
                Metric    Value
0  Total_Breakout_Days        2
1       Average_Return    1.71%
2             Win_Rate  100.00%
3       Return_Std_Dev    1.74%
4           Best_Trade     2.94

Signals DataFrame:
Shape: (2, 9)

Columns: ['Signal_Date', 'Entry_Price', 'Volume', 'Volume_MA_20', 'Volume_Ratio', 'Price_Change_Pct', 'Forward_Return', 'Exit_Date', 'Exit_Price']

First few signals:
           Signal_Date  Entry_Price     Volume  Volume_MA_20  Volume_Ratio  \
Date                                                                         
2024-06-11  2024-06-11   206.683105  172373300    53564850.0          3.22   
2024-06-12  2024-06-12   212.589783  198134300    58581275.0          3.38   

            Price_Change_Pct  Forward_Return  Exit_Date  Exit_Price  
Date                                                                 
2024-06-11              7.26        

In [9]:
# ====================== VISUALIZATION TESTING ======================
from visualization.charts import (
    create_stock_chart,
)

print("\nGenerating Interactive Charts:")

# 1. Main Stock Chart with Breakout Points
print("\n1. Creating main stock chart with breakouts...")
stock_chart = create_stock_chart(df_returns, title=f"{ticker} Stock Price with Breakout Signals")
stock_chart.show()

# Add some analysis of the visualization
print("\nVisualization Analysis:")
print(f"Total Breakout Points Marked: {len(df_returns[df_returns['Is_Breakout']])}")
print(f"Date Range: {df_returns.index[0].date()} to {df_returns.index[-1].date()}")
print("\nBreakout Points Details:")
breakouts = df_returns[df_returns['Is_Breakout']].sort_values('Volume_Ratio', ascending=False)
print("\nTop 5 Breakouts by Volume Ratio:")
for idx, row in breakouts.head().iterrows():
    print(f"Date: {idx.date()} | Volume Ratio: {row['Volume_Ratio']:.2f}x | Return: {row['Forward_Returns']:.2f}%")


Generating Interactive Charts:

1. Creating main stock chart with breakouts...



Visualization Analysis:
Total Breakout Points Marked: 2
Date Range: 2023-01-03 to 2024-12-31

Breakout Points Details:

Top 5 Breakouts by Volume Ratio:
Date: 2024-06-12 | Volume Ratio: 3.38x | Return: 0.48%
Date: 2024-06-11 | Volume Ratio: 3.22x | Return: 2.94%
