In [21]:
import sys
from pathlib import Path
import pandas as pd
import os
from IPython.display import display, Markdown  # Assuming you use these for display

# --- 1. PANDAS OPTIONS (No change) ---
# pd.set_option('display.max_rows', 200)  
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 3000)

# --- 2. IPYTHON AUTORELOAD (No change) ---
%load_ext autoreload
%autoreload 2

# --- 3. ROBUST PATH CONFIGURATION (MODIFIED) ---

# Get the current working directory of the notebook
NOTEBOOK_DIR = Path.cwd()

# Find the project ROOT directory by going up from the notebook's location
# This is robust and works even if you move the notebook deeper.
ROOT_DIR = NOTEBOOK_DIR.parent.parent

# Define key project directories relative to the ROOT
DATA_DIR = ROOT_DIR / 'data'
SRC_DIR = ROOT_DIR / 'src'
# You could also define an output directory here if needed
OUTPUT_DIR = ROOT_DIR / 'output'

# Add the 'src' directory to the Python path so you can import 'utils'
if str(SRC_DIR) not in sys.path:
    sys.path.append(str(SRC_DIR))

# --- 4. VERIFICATION (IMPROVED) ---
print(f"✅ Project Root Directory: {ROOT_DIR}")
print(f"✅ Notebook Directory: {NOTEBOOK_DIR}")
print(f"✅ Source Directory (for utils): {SRC_DIR}")
print(f"✅ Data Directory (for input): {DATA_DIR}")

# Verify that the key directories exist. This helps catch path errors early.
assert ROOT_DIR.exists(), f"ROOT directory not found at: {ROOT_DIR}"
assert SRC_DIR.exists(), f"Source directory not found at: {SRC_DIR}"
assert DATA_DIR.exists(), f"Data directory not found at: {DATA_DIR}"

# --- 5. IMPORT YOUR CUSTOM MODULE ---
import utils
print("\n✅ Successfully imported 'utils' module.")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
✅ Project Root Directory: c:\Users\ping\Files_win10\python\py311\stocks
✅ Notebook Directory: c:\Users\ping\Files_win10\python\py311\stocks\notebooks_rank\_working
✅ Source Directory (for utils): c:\Users\ping\Files_win10\python\py311\stocks\src
✅ Data Directory (for input): c:\Users\ping\Files_win10\python\py311\stocks\data

✅ Successfully imported 'utils' module.


In [22]:
# To get ALL matching files, sorted by recency
file_list = utils.get_recent_files(
    directory_path=DATA_DIR,
    extension='parquet',
    prefix=None,
    contains_pattern='df_finviz_merged_stocks_etfs',
    count=None
)

# Print the file_list
print(f'\nfile_list (len(file_list): {len(file_list)}:')
for i, f, in enumerate(file_list):
    print(f'{i}: {f}')



file_list (len(file_list): 59:
0: 2025-07-18_df_finviz_merged_stocks_etfs.parquet
1: 2025-07-17_df_finviz_merged_stocks_etfs.parquet
2: 2025-07-16_df_finviz_merged_stocks_etfs.parquet
3: 2025-07-15_df_finviz_merged_stocks_etfs.parquet
4: 2025-07-14_df_finviz_merged_stocks_etfs.parquet
5: 2025-07-11_df_finviz_merged_stocks_etfs.parquet
6: 2025-07-10_df_finviz_merged_stocks_etfs.parquet
7: 2025-07-09_df_finviz_merged_stocks_etfs.parquet
8: 2025-07-08_df_finviz_merged_stocks_etfs.parquet
9: 2025-07-07_df_finviz_merged_stocks_etfs.parquet
10: 2025-07-03_df_finviz_merged_stocks_etfs.parquet
11: 2025-06-06_df_finviz_merged_stocks_etfs.parquet
12: 2025-06-05_df_finviz_merged_stocks_etfs.parquet
13: 2025-06-04_df_finviz_merged_stocks_etfs.parquet
14: 2025-06-03_df_finviz_merged_stocks_etfs.parquet
15: 2025-06-02_df_finviz_merged_stocks_etfs.parquet
16: 2025-05-30_df_finviz_merged_stocks_etfs.parquet
17: 2025-05-29_df_finviz_merged_stocks_etfs.parquet
18: 2025-05-28_df_finviz_merged_stocks_etf

In [23]:
latest_df_finviz_merged_stocks_etfs = file_list[0]
print(f'Latest df_finviz_merged_stocks_etfs:\n{latest_df_finviz_merged_stocks_etfs}')
# Verify by loading it back
df_finviz_merged_stocks_etfs = pd.read_parquet(DATA_DIR / latest_df_finviz_merged_stocks_etfs, engine='pyarrow')

print(f'\ndf_finviz_merged_stocks_etfs.head(20):\n{df_finviz_merged_stocks_etfs.head(20)}')


Latest df_finviz_merged_stocks_etfs:
2025-07-18_df_finviz_merged_stocks_etfs.parquet

df_finviz_merged_stocks_etfs.head(20):
       No.                                 Company               Index                  Sector                        Industry Country Exchange                                               Info  MktCap AUM, M  Rank  Market Cap, M     P/E  Fwd P/E    PEG    P/S    P/B     P/C   P/FCF    Book/sh    Cash/sh  Dividend %  Dividend TTM Dividend Ex Date  Payout Ratio %       EPS  EPS next Q  EPS this Y %  EPS next Y %  EPS past 5Y %  EPS next 5Y %  Sales past 5Y %  Sales Q/Q %  EPS Q/Q %  EPS YoY TTM %  Sales YoY TTM %  Sales, M  Income, M  EPS Surprise %  Revenue Surprise %  Outstanding, M  Float, M  Float %  Insider Own %  Insider Trans %  Inst Own %  Inst Trans %  Short Float %  Short Ratio  Short Interest, M  ROA %   ROE %  ROIC %  Curr R  Quick R  LTDebt/Eq  Debt/Eq  Gross M %  Oper M %  Profit M %  Perf 3D %  Perf Week %  Perf Month %  Perf Quart %  Perf Half %  

In [24]:
tickers_to_check = ['AVAV', 'RKLB', 'JOBY', 'SYM', 'SOFI', 'MRNA', 'SN', 'U', 'EVR', 'CG']
# Check if items are in the DataFrame index
is_in_index = [ticker in df_finviz_merged_stocks_etfs .index for ticker in tickers_to_check]
# Print tickers next to their corresponding True or False values
print((f'Are these tickers in the DataFrame index?'))
for ticker, is_in in zip(tickers_to_check, is_in_index):
    print(f"{ticker.ljust(4)}: {is_in}")

Are these tickers in the DataFrame index?
AVAV: True
RKLB: True
JOBY: True
SYM : True
SOFI: True
MRNA: True
SN  : True
U   : True
EVR : True
CG  : True


In [25]:
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns

def create_rank_history_df(file_list, data_dir):
    """
    Reads daily files to compile a history of ranks for each ticker.

    Args:
        file_list (list): A list of sorted parquet filenames.
        data_dir (Path): The directory where files are stored.

    Returns:
        pd.DataFrame: A DataFrame with tickers as the index, dates as columns,
                      and ranks as the values. NaN indicates the ticker was not
                      present on that day.
    """
    daily_ranks_list = []
    for filename in file_list:
        # Extract the date string from the start of the filename
        date_str = filename.split('_')[0]
        
        # Read the daily data file
        df_daily = pd.read_parquet(data_dir / filename)
        
        # Extract the 'Rank' column (it's a Series with tickers as its index)
        ranks_series = df_daily['Rank']
        
        # Rename the Series to the date, which will become the column name
        ranks_series.name = pd.to_datetime(date_str)
        
        daily_ranks_list.append(ranks_series)

    # Concatenate all Series into a single DataFrame, aligning on the ticker index
    df_tickers_rank_history = pd.concat(daily_ranks_list, axis=1)
    
    # Sort columns by date just in case the file list wasn't perfectly sorted
    df_tickers_rank_history = df_tickers_rank_history.sort_index(axis=1)
    
    return df_tickers_rank_history


In [26]:
import plotly.express as px
import pandas as pd
import math

def plot_rank_with_criteria(df_rank_history, ticker_list, title_suffix="", filter_criteria=None,
                            width=1100, height=700):
    """
    Plots rank history with period markers, interactive buttons, and size controls.
    - Adds vertical lines and shaded regions for lookback and recent periods.
    - 'Clear All' sets traces to 'legendonly', hiding them and graying out the legend item.
    - 'Reset View' makes all traces fully visible.
    
    Args:
        df_rank_history (pd.DataFrame): The full rank history DataFrame.
        ticker_list (list): A list of ticker symbols to plot.
        title_suffix (str, optional): Text to append to the main plot title.
        filter_criteria (dict, optional): A dictionary of filter parameters to display.
        width (int, optional): The width of the figure in pixels.
        height (int, optional): The height of the figure in pixels.
    """
    
    if not ticker_list:
        print("Ticker list is empty. Nothing to plot.")
        return

    # Prepare data for plotting
    plot_df = df_rank_history.loc[ticker_list].T
    plot_df.index = pd.to_datetime(plot_df.index)

    custom = px.colors.qualitative.Plotly.copy()
    # Replace the 7th color '#B6E880' with '#1F77B4' with a darker blue
    custom[7] = '#1F77B4'        

    fig = px.line(
        plot_df, 
        x=plot_df.index, 
        y=plot_df.columns,
        # Line color sequence 
        color_discrete_sequence=custom,
        title=f"Rank History: {title_suffix}",
        labels={'value': 'Rank', 'x': 'Date', 'variable': 'Ticker'}
    )

    # Force x-axis title as "Date"
    fig.update_xaxes(title_text="Date", title_standoff=25)

    # Y-Axis configuration (unchanged and correct)
    fig.update_yaxes(
        autorange="reversed", 
        dtick=100,
        showgrid=True,
        gridcolor='LightGrey'
    )
    
    # --- CORRECTED X-Axis configuration ---
    fig.update_xaxes(
        type='date',  # Ensures the axis is treated as a continuous date axis
        
        # --- Major Ticks and Grid (for labels) ---
        showgrid=True,
        gridcolor='LightGrey',
        dtick="D7",  #<-- MAJOR CHANGE: Place labels and major grid lines only every 7 days
        tickformat="%b %d",  # Format as "May 04". Now readable with weekly spacing.

        # --- Minor Ticks and Grid (for dense grid lines WITHOUT labels) ---
        minor=dict(
            showgrid=True, 
            gridcolor="rgba(235, 235, 235, 0.5)", # A fainter color for minor lines
            dtick="D1" #<-- Place an unlabeled minor grid line every 1 day
        )
    )

    # --- Vertical lines and shaded regions section (unchanged) ---
    if filter_criteria and 'recent_days' in filter_criteria and 'lookback_days' in filter_criteria:
        recent_days = filter_criteria.get('recent_days', 0)
        lookback_days = filter_criteria.get('lookback_days', 0)
        all_dates = pd.to_datetime(df_rank_history.columns)
        if len(all_dates) >= (lookback_days + recent_days):
            last_date = all_dates[-1]
            recent_period_start_date = all_dates[-recent_days]
            lookback_period_end_date = all_dates[-(recent_days + 1)]
            lookback_period_start_date = all_dates[-(recent_days + lookback_days)]
            fig.add_vrect(x0=recent_period_start_date, x1=last_date, fillcolor="LightSkyBlue", opacity=0.2, layer="below", line_width=0, annotation_text="Recent", annotation_position="top left")
            fig.add_vrect(x0=lookback_period_start_date, x1=lookback_period_end_date, fillcolor="LightGreen", opacity=0.2, layer="below", line_width=0, annotation_text="Lookback", annotation_position="top left")
            fig.add_vline(x=recent_period_start_date, line_width=2, line_dash="dash", line_color="grey")

    # --- Interactive buttons section (unchanged) ---
    num_traces = len(fig.data)
    fig.update_layout(
        updatemenus=[
            dict(
                type="buttons", direction="left", x=0.01, xanchor="left",
                y=1.1, yanchor="top", showactive=False,
                buttons=list([
                    dict(label="Reset View", method="restyle", args=[{"visible": [True] * num_traces}]),
                    dict(label="Clear All", method="restyle", args=[{"visible": ['legendonly'] * num_traces}]),
                ]),
            )
        ]
    )
    
    # --- Filter criteria annotation section (unchanged) ---
    criteria_text = ""
    num_rows = 0
    if filter_criteria:
        active_criteria = {k: v for k, v in filter_criteria.items() if v is not None}
        col_width = 38
        criteria_lines = []
        items = list(active_criteria.items())
        for i in range(0, len(items), 3):
            chunk = items[i:i+3]
            line_parts = [f"{f'  • {k}: {v}':<{col_width}}" for k, v in chunk]
            criteria_lines.append("".join(line_parts))
        num_rows = len(criteria_lines)
        criteria_text = "<b>Filter Criteria:</b><br>" + "<br>".join(criteria_lines)

    if criteria_text:
        FONT_SIZE = 14                       # whatever you want
        LINE_HEIGHT = FONT_SIZE * 1.4        # ~40 % leading is comfortable

        criteria_text = "<b>Filter Criteria:</b><br>" + "<br>".join(criteria_lines)

        fig.add_annotation(
            showarrow=False,
            text=criteria_text,
            xref="paper", yref="paper",
            x=0, y=-0.25,            
            xanchor="left", yanchor="top",
            align="left",
            font=dict(family="Courier New, monospace", size=FONT_SIZE)
        )


    # compute y-range manually
    y_vals = plot_df.values.ravel()          # all y values
    y_range = (y_vals.min(), y_vals.max())
    
    # ------------------------------------------------------------------
    # horizontal red boundary lines – use the data range we just computed
    # ------------------------------------------------------------------
    y_min, y_max = y_range       # comes from the manual computation above

    # xref and yref = "paper", uses plot area as a reference. 
    # x0, y0 = 0, 0 is lower left corner
    # x1, y1 = 1, 1 is upper right corner
    fig.add_shape(type="line",
                  xref="paper", yref="paper",
                  x0=0, x1=1,
                  y0=1, y1=1,  
                  line=dict(color="red", width=2))  # top of plot area

    fig.add_shape(type="line",
                  xref="paper", yref="paper",
                  x0=0, x1=1,
                  y0=0, y1=0,  
                  line=dict(color="blue", width=2))  # bottom of plot area

    # # optional thin line just below the filter-text annotation
    # fig.add_shape(type="line",
    #               xref="paper", yref="paper",
    #               x0=0, x1=1,
    #               y0=-0.05, y1=-0.05,                 
    #               line=dict(color="green", width=2))  # line 5% below the plot area


    # --- Layout update section (unchanged) ---
    bottom_margin = 140 + num_rows * LINE_HEIGHT
    fig.update_layout(width=width, height=height, margin=dict(b=bottom_margin))

    # Set the x-axis range to a suitable range
    fig.update_xaxes(range=[plot_df.index.min(), plot_df.index.max()])

    fig.show()
    return fig

In [27]:
import pandas as pd
import numpy as np

def calculate_rank_metrics(df_rank_history, tickers_list, lookback_days=20, recent_days=4):
    """
    Calculates a comprehensive set of rank metrics for a given list of tickers.

    This function does NOT filter tickers based on performance criteria. It processes
    every ticker provided in the tickers_list and returns its calculated metrics,
    making it suitable for generating features for analysis or other models.

    Args:
        df_rank_history (pd.DataFrame): DataFrame with tickers as index and dates as columns.
        tickers_list (list): A list of ticker symbols to calculate metrics for.
        lookback_days (int): The number of days for the "lookback" period.
        recent_days (int): The number of days for the "recent" period.

    Returns:
        list: A list of dictionaries, where each dictionary contains the calculated
              rank metrics for one ticker. Tickers with insufficient data in the
              specified period are skipped.
    """
    # --- Guard Clause & Date Setup ---
    total_days_needed = lookback_days + recent_days
    if len(df_rank_history.columns) < total_days_needed:
        print(f"Error: Not enough data. Need {total_days_needed} days, have {len(df_rank_history.columns)}.")
        return []

    all_dates = df_rank_history.columns
    last_date = all_dates[-1]
    recent_period_start_date = all_dates[-recent_days]
    lookback_period_end_date = all_dates[-(recent_days + 1)]
    lookback_period_start_date = all_dates[-(recent_days + lookback_days)]
    
    lookback_dates = df_rank_history.loc[:, lookback_period_start_date:lookback_period_end_date].columns
    recent_dates = df_rank_history.loc[:, recent_period_start_date:last_date].columns
    
    all_ticker_metrics = []

    print(f"Calculating metrics for {len(tickers_list)} tickers...")

    # --- Calculation Loop ---
    for ticker in tickers_list:
        # Skip if ticker is not in the dataframe index
        if ticker not in df_rank_history.index:
            continue

        lookback_ranks = df_rank_history.loc[ticker, lookback_dates].dropna()
        recent_ranks = df_rank_history.loc[ticker, recent_dates].dropna()
        
        # Skip if there's not enough data for this specific ticker in the required periods
        if len(lookback_ranks) < lookback_days or len(recent_ranks) < recent_days:
            continue
            
        # --- Perform all calculations without any filtering 'if' statements ---
        slope, _ = np.polyfit(np.arange(len(lookback_ranks)), lookback_ranks, 1)
        all_ranks_in_period = pd.concat([lookback_ranks, recent_ranks])

        # Key reference points
        current_rank = int(recent_ranks.iloc[-1])
        rank_at_start_of_recent = int(recent_ranks.iloc[0])
        recent_bottom_rank = int(recent_ranks.max()) # Worst rank in recent period
        total_peak_rank = int(all_ranks_in_period.min()) # Best rank over the whole period
        
        # This dictionary holds all calculated metrics for one ticker
        metrics_dict = {
            'ticker': ticker,
            'lookback_slope': round(slope, 2),
            
            # Key Ranks
            'current': current_rank,
            'recent_start': rank_at_start_of_recent,
            'lookback_start': int(lookback_ranks.iloc[0]),
            'lookback_end': int(lookback_ranks.iloc[-1]),

            # Best/Worst Ranks by Period
            'best_lookback': int(lookback_ranks.min()),
            'worst_lookback': int(lookback_ranks.max()),
            'best_recent': int(recent_ranks.min()),
            'worst_recent': recent_bottom_rank, # Same value, more descriptive name
            'best_total': total_peak_rank,
            'worst_total': int(all_ranks_in_period.max()),

            # Derived Metrics (using clearer names)
            'current_to_total_peak': current_rank - total_peak_rank,
            'current_to_recent_start': current_rank - rank_at_start_of_recent,
            'recent_bottom_to_recent_start': recent_bottom_rank - rank_at_start_of_recent,
            'recent_bottom_to_current': recent_bottom_rank - current_rank,
        }
        all_ticker_metrics.append(metrics_dict)

    return all_ticker_metrics

In [28]:
import pandas as pd

def filter_rank_metrics(
    all_metrics_data,
    min_lookback_improvement=15,
    # --- Mode-specific parameters ---
    min_current_to_recent_start=None,  # Only used in 'Dip' mode, positive number means recent_start is better than current 
    min_recent_bottom_to_recent_start=None, # Only used in 'Reversal' mode, positive number means recent_start is better than recent_bottom
    min_recent_bottom_to_current=None,  # Only used in 'Reversal' mode, positive number means current is better than recent_bottom 
    # --- General filters ---
    current_rank_bracket_start=1, 
    current_rank_bracket_end=None):
    """
    Filters a DataFrame of pre-calculated rank metrics based on specified criteria.

    This function acts as the second stage in a two-part pipeline, taking the output of
    `calculate_rank_metrics` and applying filtering rules to find specific opportunities.

    Args:
        all_metrics_data (list or pd.DataFrame): The list of dictionaries or DataFrame
                                                  from `calculate_rank_metrics`.
        min_lookback_improvement (int): Min improvement from the start to the end of the lookback period.
        min_current_to_recent_start (int, optional): For 'Dip' mode, positive number means recent_start is better than current.
        min_recent_bottom_to_recent_start (int, optional): For 'Reversal' mode, positive number means recent_start is better than recent_bottom.
        min_recent_bottom_to_current (int, optional): For 'Reversal' mode, positive number means current is better than recent_bottom.
        current_rank_bracket_start (int): The minimum current rank to include.
        current_rank_bracket_end (int, optional): The maximum current rank to include.

    Returns:
        pd.DataFrame: A filtered and sorted DataFrame containing the tickers that meet all criteria.
    """
    # --- Parameter Validation (Mode Detection) ---
    # This logic is identical to the original function to ensure correct mode is selected.
    if min_current_to_recent_start is not None and (min_recent_bottom_to_recent_start is not None or min_recent_bottom_to_current is not None):
        raise ValueError("Cannot specify parameters for both 'Dip' and 'Reversal' modes.")
    
    # --- Data Preparation ---
    if not isinstance(all_metrics_data, pd.DataFrame):
        df = pd.DataFrame(all_metrics_data)
    else:
        df = all_metrics_data.copy() # Use a copy to avoid modifying the original DataFrame

    if df.empty:
        return df # Return an empty DataFrame if there's no data

    # --- Apply Filters Sequentially ---
    
    # Filter 1: Lookback period must show a minimum rank improvement
    # This is calculated from 'lookback_start' - 'lookback_end'
    lookback_improvement = df['lookback_start'] - df['lookback_end']
    df = df[lookback_improvement >= min_lookback_improvement]

    # Filter 2: Lookback trend slope must be negative (improving rank)
    df = df[df['lookback_slope'] < 0]

    # Filter 3: Ticker's current rank must be within the specified bracket
    df = df[df['current'] >= current_rank_bracket_start]
    if current_rank_bracket_end is not None:
        df = df[df['current'] <= current_rank_bracket_end]

    # Filter 4: Mode-specific filtering
    if min_current_to_recent_start is not None:
        # --- Dip Mode ---
        # The metric 'min_current_to_recent_start' corresponds to the required drop.
        print("Filtering in 'Dip' mode...")
        df = df[df['current_to_recent_start'] >= min_current_to_recent_start]
    elif min_recent_bottom_to_recent_start is not None and min_recent_bottom_to_current is not None:
        # --- Reversal Mode ---
        print("Filtering in 'Reversal' mode...")
        df = df[df['recent_bottom_to_recent_start'] >= min_recent_bottom_to_recent_start]
        df = df[df['recent_bottom_to_current'] >= min_recent_bottom_to_current]
    else:
        print("Warning: No mode-specific filters applied. Returning candidates based on general filters only.")

    # --- Final Sorting ---
    # Sort the final list of candidates by the strength of their past uptrend.
    df_filtered = df.sort_values(by='lookback_slope', ascending=True)
    
    return df_filtered

# WOW keep this one
    df_tickers_rank_history,
    lookback_days=20,
    recent_days=4,
    min_lookback_improvement=15,
    min_recent_bottom_to_recent_start=0,       # The dip must be at least 15 ranks deep
    min_recent_bottom_to_current=0, # It must have recovered at least 5 ranks from the bottom
    # current_rank_bracket_start=601,
    current_rank_bracket_end=800,

In [29]:
df_tickers_rank_history = create_rank_history_df(file_list, DATA_DIR)

print("Shape of the resulting DataFrame:", df_tickers_rank_history.shape)
print(f'\nLast date in df_tickers_rank_history: {df_tickers_rank_history.columns[-1].strftime("%Y-%m-%d")}')  
print(f'\ndf_tickers_rank_history:\n{df_tickers_rank_history}')

Shape of the resulting DataFrame: (1615, 59)

Last date in df_tickers_rank_history: 2025-07-18

df_tickers_rank_history:
       2025-04-25  2025-04-28  2025-04-29  2025-04-30  2025-05-01  2025-05-02  2025-05-05  2025-05-06  2025-05-07  2025-05-08  2025-05-09  2025-05-12  2025-05-13  2025-05-14  2025-05-15  2025-05-16  2025-05-19  2025-05-20  2025-05-21  2025-05-22  2025-05-23  2025-05-27  2025-05-28  2025-05-29  2025-05-30  2025-06-02  2025-06-03  2025-06-04  2025-06-05  2025-06-06  2025-06-09  2025-06-10  2025-06-11  2025-06-12  2025-06-13  2025-06-16  2025-06-17  2025-06-18  2025-06-19  2025-06-20  2025-06-23  2025-06-24  2025-06-25  2025-06-26  2025-06-27  2025-06-30  2025-07-01  2025-07-02  2025-07-03  2025-07-07  2025-07-08  2025-07-09  2025-07-10  2025-07-11  2025-07-14  2025-07-15  2025-07-16  2025-07-17  2025-07-18
NVDA          3.0         3.0         3.0         3.0         3.0         3.0         3.0         3.0         3.0         3.0         3.0         3.0         3.0    

In [30]:
filter_criteria = {
    "lookback_days": 20,
    "recent_days": 4,
    "min_lookback_improvement": 0,
    "min_current_to_recent_start": None,  # Only used in 'Dip' mode, positive number means recent_start is better than current
    "min_recent_bottom_to_recent_start": 0,  # Only used in 'Reversal' mode, positive number means recent_start is better than recent_bottom
    "min_recent_bottom_to_current": 2,  # Only used in 'Reversal' mode, positive number means current is better than recent_bottom 
    "current_rank_bracket_start": 1,
    "current_rank_bracket_end": 1000,
}

In [31]:
# Assume 'selected_df' is your historical rank data
# And all filter parameters (lookback_days, min_current_to_recent_start, etc.) are defined.

# === STAGE 1: Calculate metrics for ALL tickers ONCE ===
all_tickers = df_tickers_rank_history.index.tolist() 

# Calculate metrics for every single ticker without any filtering
all_metrics_data = calculate_rank_metrics(
    df_tickers_rank_history,
    tickers_list=all_tickers,
    lookback_days=filter_criteria['lookback_days'],
    recent_days=filter_criteria['recent_days'],
)

# Now 'all_metrics_data' is a list of dictionaries that you can:
# 1. Convert to a DataFrame for further analysis:
#    metrics_df = pd.DataFrame(all_metrics_data)
#
# 2. Use as a feature set for machine learning.
#
# 3. Manually filter later in your code.
#    dip_candidates = [d for d in all_metrics_data if d['current_to_recent_start'] >= 2]

# === STAGE 2: Filter the calculated data ===
# This step is very fast because all the heavy calculation is done.
# You can now experiment with different filtering parameters here without re-calculating.
df_candidates = filter_rank_metrics(
    all_metrics_data,
    min_lookback_improvement=filter_criteria['min_lookback_improvement'],
    # === Mode Parameters ===
    min_current_to_recent_start=filter_criteria['min_current_to_recent_start'],
    min_recent_bottom_to_recent_start=filter_criteria['min_recent_bottom_to_recent_start'],
    min_recent_bottom_to_current=filter_criteria['min_recent_bottom_to_current'],
    # === Bracket Filters ===
    current_rank_bracket_start=filter_criteria['current_rank_bracket_start'],
    current_rank_bracket_end=filter_criteria['current_rank_bracket_end'],
)

# The resulting 'df_candidates' is identical to the DataFrame you created before.
print(f'df_candidates.head(20):\n{df_candidates.head(20)}')

Calculating metrics for 1615 tickers...
Filtering in 'Reversal' mode...
df_candidates.head(20):
    ticker  lookback_slope  current  recent_start  lookback_start  lookback_end  best_lookback  worst_lookback  best_recent  worst_recent  best_total  worst_total  current_to_total_peak  current_to_recent_start  recent_bottom_to_recent_start  recent_bottom_to_current
872   AVAV          -13.25      888           894            1160           901            901            1160          876           894         876         1160                     12                       -6                              0                         6
845   JOBY          -12.77      860          1005            1288          1072           1072            1333          860          1005         860         1333                      0                     -145                              0                       145
565   RKLB          -11.48      575           638             923           667            667      

In [32]:
import pprint

# Map 'Price' and 'ATR/Price %' from df_finviz_merged_stocks_etfs to df_candidates
df_candidates['Price'] = df_candidates['ticker'].map(df_finviz_merged_stocks_etfs['Price'])
df_candidates['Change %'] = df_candidates['ticker'].map(df_finviz_merged_stocks_etfs['Change %'])
df_candidates['MktCap AUM, M'] = df_candidates['ticker'].map(df_finviz_merged_stocks_etfs['MktCap AUM, M'])
df_candidates['ATR/Price %'] = df_candidates['ticker'].map(df_finviz_merged_stocks_etfs['ATR/Price %'])

# Reorder the columns to ensure 'Price' is the second column and 'ATR/Price %' is the third column
columns = df_candidates.columns.tolist()

# Remove 'Price' and 'ATR/Price %' from their current positions
if 'Price' in columns:
    columns.remove('Price')
if 'Change %' in columns:
    columns.remove('Change %')    
if 'MktCap AUM, M' in columns:
    columns.remove('MktCap AUM, M')
if 'ATR/Price %' in columns:
    columns.remove('ATR/Price %')    

# Insert 'Price' as the second column and 'ATR/Price %' as the third column
columns.insert(1, 'MktCap AUM, M')
columns.insert(2, 'Price')
columns.insert(3, 'Change %')
columns.insert(4, 'ATR/Price %')


# Reorder the DataFrame columns
df_candidates = df_candidates[columns]


# Display the updated DataFrame
print(f'\ndf_candidates.head(50):\n{df_candidates.head(50)}')


df_candidates.head(50):
    ticker  MktCap AUM, M    Price  Change %  ATR/Price %  lookback_slope  current  recent_start  lookback_start  lookback_end  best_lookback  worst_lookback  best_recent  worst_recent  best_total  worst_total  current_to_total_peak  current_to_recent_start  recent_bottom_to_recent_start  recent_bottom_to_current
872   AVAV        13520.0   274.03     -1.63     4.838886          -13.25      888           894            1160           901            901            1160          876           894         876         1160                     12                       -6                              0                         6
845   JOBY        14080.0    17.78      0.34     5.680540          -12.77      860          1005            1288          1072           1072            1333          860          1005         860         1333                      0                     -145                              0                       145
565   RKLB        24630.0    5

In [41]:
sort_order_dict = {'lookback_slope': True,  # negative slope (i.e. small) is better
                   'current_to_total_peak': True,  # small is better, best is 0                   
                   'recent_bottom_to_current': False,}  # sort large to small, large means big dip before recovered to current level 

sort_order_dict = {'current_to_total_peak': True,  # small is better, best is 0  
                   'recent_bottom_to_current': False,# sort large to small, large means big dip before recovered to current level 
                   'lookback_slope': True,}  # negative slope (i.e. small) is better

# keys in the exact order they appear in the dict
sort_keys = list(sort_order_dict.keys())
sort_values = list(sort_order_dict.values())
df_sorted_candidates = df_candidates.sort_values(by=sort_keys, ascending=sort_values)

leading_columns = ['ticker', 'MktCap AUM, M', 'Price', 'Change %', 'ATR/Price %', 'current', 'best_total'] 
fixed_cols = set(leading_columns) | set(sort_keys)
remaining_columns = [c for c in df_candidates.columns if c not in fixed_cols]

# final order
new_order = leading_columns+ sort_keys + remaining_columns
df_sorted_candidates = df_sorted_candidates.reindex(columns=new_order)

tickers_to_plot = df_sorted_candidates.head(10)['ticker'].tolist()

print("filter_criteria:")
pprint.pprint(filter_criteria, sort_dicts=False)
print(f"\nsort order:")
# False to maintain the original insertion order of the dictionary.
pprint.pprint(sort_order_dict, sort_dicts=False)  
print(f'\ndf_sorted_candidates.head(20):\n{df_sorted_candidates.head(20)}')
print(f'\ntickers_to_plot: {tickers_to_plot}')


filter_criteria:
{'lookback_days': 20,
 'recent_days': 4,
 'min_lookback_improvement': 0,
 'min_current_to_recent_start': None,
 'min_recent_bottom_to_recent_start': 0,
 'min_recent_bottom_to_current': 2,
 'current_rank_bracket_start': 1,
 'current_rank_bracket_end': 1000}

sort order:
{'current_to_total_peak': True,
 'recent_bottom_to_current': False,
 'lookback_slope': True}

df_sorted_candidates.head(20):
    ticker  MktCap AUM, M   Price  Change %  ATR/Price %  current  best_total  current_to_total_peak  recent_bottom_to_current  lookback_slope  recent_start  lookback_start  lookback_end  best_lookback  worst_lookback  best_recent  worst_recent  worst_total  current_to_recent_start  recent_bottom_to_recent_start
845   JOBY        14080.0   17.78      0.34     5.680540      860         860                      0                       145          -12.77          1005            1288          1072           1072            1333          860          1005         1333                 

In [42]:
plot_days = filter_criteria['lookback_days'] + filter_criteria['recent_days'] + 10  # add 10 extra days for plotting 

# 4. Call the new plotting function
if tickers_to_plot:
    plot_rank_with_criteria(
        # df_rank_history=selected_df.iloc[:, -plot_days::],
        df_rank_history=df_tickers_rank_history.iloc[:, -plot_days::],        
        # df_rank_history=selected_df,
        ticker_list=tickers_to_plot,
        title_suffix="'Reversal' Candidates",
        filter_criteria=filter_criteria, # Pass the dictionary here
        width=1150,
        height=700,
    )
else:
    print("No candidates found to plot.")

In [35]:
# List of tickers
portfolio_tickers = ["JOBY",
                     "SYM",
                     "RKLB",
                     "MSTR",
                     "ORCL",
                     "SHOP",
                     "COIN",
                     "VGT",
                     "AVAV",
                     "META",
                     "NVDA",]

In [36]:
# 4. Call the new plotting function
if portfolio_tickers:
    plot_rank_with_criteria(
        df_rank_history=df_tickers_rank_history.iloc[:, -plot_days::],
        # df_rank_history=selected_df,
        ticker_list=portfolio_tickers,
        title_suffix="'Reversal' Candidates",
        filter_criteria=filter_criteria, # Pass the dictionary here
        width=1150,
        height=700,
    )
else:
    print("No candidates found to plot.")

In [37]:
metrics_column_order = ['ticker',
                        'lookback_slope',
                        'current_to_total_peak',                
                        'current',
                        'recent_start',
                        'best_recent',
                        'worst_recent',                
                        'lookback_end',                
                        'lookback_start',
                        'best_lookback',
                        'worst_lookback',
                        'best_total',
                        'worst_total',
                        'current_to_recent_start',
                        'recent_bottom_to_recent_start',
                        'recent_bottom_to_current']

In [38]:
# 1. Convert to a DataFrame for further analysis:
metrics_df = pd.DataFrame(all_metrics_data)
metrics_df = metrics_df[metrics_column_order]
print(f'metrics_df.head(200):\n{metrics_df.head(200)}')

metrics_df.head(200):
    ticker  lookback_slope  current_to_total_peak  current  recent_start  best_recent  worst_recent  lookback_end  lookback_start  best_lookback  worst_lookback  best_total  worst_total  current_to_recent_start  recent_bottom_to_recent_start  recent_bottom_to_current
0     NVDA           -0.07                      0        1             1            1             1             1               2              1               2           1            2                        0                              0                         0
1     MSFT            0.07                      1        2             2            2             2             2               1              1               2           1            2                        0                              0                         0
2     AAPL           -0.00                      0        3             3            3             3             3               3              3               3           3  

In [39]:
sort_order_dict = {'lookback_slope': True,  # negative slope (i.e. small) is better
                   'current_to_total_peak': True,  # small is better, best is 0                   
                   'recent_bottom_to_current': False,}  # sort large to small, large means big dip before recovered to current level 

# keys in the exact order they appear in the dict
sort_keys = list(sort_order_dict.keys())
sort_values = list(sort_order_dict.values())
sorted_metrics_df = metrics_df.sort_values(by=sort_keys, ascending=sort_values)

print(f'sorted_metrics_df:\n{sorted_metrics_df}')

sorted_metrics_df:
     ticker  lookback_slope  current_to_total_peak  current  recent_start  best_recent  worst_recent  lookback_end  lookback_start  best_lookback  worst_lookback  best_total  worst_total  current_to_recent_start  recent_bottom_to_recent_start  recent_bottom_to_current
872    AVAV          -13.25                     12      888           894          876           894           901            1160            901            1160         876         1160                       -6                              0                         6
845    JOBY          -12.77                      0      860          1005          860          1005          1072            1288           1072            1333         860         1333                     -145                              0                       145
565    RKLB          -11.48                      0      575           638          575           638           667             923            667             923         575 

In [40]:
# Filter metrics_df to keep only rows whose 'ticker' is in portfolio_tickers
portfolio_metrics = metrics_df[metrics_df['ticker'].isin(portfolio_tickers)]
# portfolio_metrics = portfolio_metrics[metrics_column_order]
print(f'portfolio_metrics: \n{portfolio_metrics}')

portfolio_metrics: 
    ticker  lookback_slope  current_to_total_peak  current  recent_start  best_recent  worst_recent  lookback_end  lookback_start  best_lookback  worst_lookback  best_total  worst_total  current_to_recent_start  recent_bottom_to_recent_start  recent_bottom_to_current
0     NVDA           -0.07                      0        1             1            1             1             1               2              1               2           1            2                        0                              0                         0
6     META           -0.00                      0        7             7            7             7             7               7              7               7           7            7                        0                              0                         0
16    ORCL           -0.07                      1       17            18           16            18            18              19             18              20          16    