pmgus.ipynb
"Pre-Market Gap-Up Screener"

In [568]:
import pandas as pd
import numpy as np
# show all pandas row width
pd.set_option('display.max_rows', None)
# show all pandas column width
pd.set_option('display.max_columns', None)

import yfinance as yf
import datetime as datetime
from datetime import timedelta

| LEVEL #0 - DATA IMPORT |
|-|
| file import setup |

In [569]:
# base file path and file path setup
base_dir_path = '/Users/sudz4/Desktop/SPS_local/sps/x_pre_market_gap_up_screener/' 
std_file_name_str = 'tv_screen_gap-up_'

#####---------------------#####
# screen_date = '2024-11-05' 
# screen_date = '2024-11-07'

# screen_date = '2024-11-15'
screen_date = '2024-11-20'
#####---------------------#####

file_type = '.csv'
filename = base_dir_path + std_file_name_str + screen_date + file_type

# read the csv file
print(filename)
trading_view_df = pd.read_csv(filename)
print(len(trading_view_df))
# print first 5 ticker Symbols only
print(trading_view_df['Symbol'].head(5))

/Users/sudz4/Desktop/SPS_local/sps/x_pre_market_gap_up_screener/tv_screen_gap-up_2024-11-20.csv
1437
0     NVDA
1     MSFT
2     META
3    BRK.B
4      LLY
Name: Symbol, dtype: object


| LEVEL #1 - SCREENER |
|-|
| xxx |

In [570]:
def categorize_market_cap(df):
    """Categorize stocks based on market capitalization."""
    df['Market capitalization'] = pd.to_numeric(df['Market capitalization'], errors='coerce')
    conditions = [
        (df['Market capitalization'] >= 200_000_000_000),  # Titans
        (df['Market capitalization'] >= 10_000_000_000) & (df['Market capitalization'] < 200_000_000_000),  # Large caps
        (df['Market capitalization'] >= 2_000_000_000) & (df['Market capitalization'] < 10_000_000_000),  # Mid caps
        (df['Market capitalization'] >= 300_000_000) & (df['Market capitalization'] < 2_000_000_000),  # Small caps
        (df['Market capitalization'] > 50_000_000) & (df['Market capitalization'] < 300_000_000),  # Micro caps
        (df['Market capitalization'] <= 50_000_000)  # Shrimp
    ]
    categories = ['Titans', 'Large caps', 'Mid caps', 'Small caps', 'Micro caps', 'Shrimp']
    df['marketCapType'] = np.select(conditions, categories, default='Undefined')
    return df

# execute categorization
category_setup_df = categorize_market_cap(trading_view_df).copy()

# drop Undefined marketCapType
category_setup_df = category_setup_df[category_setup_df['marketCapType'] != 'Undefined']

# convert necessary columns to numeric
def convert_columns_to_numeric(df, columns):
    """Convert specified columns to numeric types."""
    for col in columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')
    return df

# list of columns to convert
numeric_columns = [
    'Market capitalization', 'Float shares outstanding', 'Relative Volume 1 day',
    'Relative Volume at Time', 'Pre-market Change %', 'Pre-market Gap %',
    'Price', 'Volume Weighted Average Price 1 day', 'Volatility 1 day',
    'Volatility 1 week', 'Volatility 1 month', 'Pre-market Volume'
]

# Apply conversion
category_setup_df = convert_columns_to_numeric(category_setup_df, numeric_columns)

# Criteria configuration for each market cap category
criteria_config = {
    "Titans": {
        "pre_market_change_pct_threshold": 0.002,  # 0.2% for Titans
        "float_shares_outstanding_threshold": 1_000_000_000,  # 1 billion shares
        "relative_volume_threshold": 1.2,
        "relative_volume_at_time_threshold": 0.03,
        "pre_market_gap_percentage_threshold": 0.001,  # 0.1%
        "pre_market_vwap_drawdown_threshold": 0.003,  # 0.3% drawdown from VWAP
        "pre_market_volume_threshold": 50_000  # Minimum pre-market volume
    },
    "Large caps": {
        "pre_market_change_pct_threshold": 0.005,  # 0.5% for Large caps
        "float_shares_outstanding_threshold": 200000000,  # 200 million shares
        "relative_volume_threshold": 1.3,  # More inclusive
        "relative_volume_at_time_threshold": 0.04,  # More inclusive
        "pre_market_gap_percentage_threshold": 0.005,  # 0.5%
        "pre_market_vwap_drawdown_threshold": 0.004,  # 0.4% drawdown from VWAP
        "pre_market_volume_threshold": 50000  # Minimum pre-market volume
    },
    # "Midlers" in TradingView
    "Midlers": { 
        "pre_market_change_pct_threshold": 0.02,  # 2% for Midlers 
        "float_shares_outstanding_threshold": 50000000,  # 50 million shares
        "relative_volume_threshold": 1.3,
        "relative_volume_at_time_threshold": 0.05,
        "pre_market_gap_percentage_threshold": 0.02,
        "pre_market_vwap_drawdown_threshold": 0.005,  # 0.5% drawdown from VWAP
        "pre_market_volume_threshold": 50000  # Minimum pre-market volume
    },
    "Small caps": {
        "pre_market_change_pct_threshold": 0.03,  # 3% for Small caps
        "float_shares_outstanding_threshold": 20000000,  # 20 million shares
        "relative_volume_threshold": 1.2,
        "relative_volume_at_time_threshold": 0.05,
        "pre_market_gap_percentage_threshold": 0.03,
        "pre_market_vwap_drawdown_threshold": 0.006,  # 0.6% drawdown from VWAP
        "pre_market_volume_threshold": 50000  # Minimum pre-market volume
    },
    "Micro caps": {
        "pre_market_change_pct_threshold": 0.04,  # 4% for Micro caps
        "float_shares_outstanding_threshold": 5000000,  # 5 million shares
        "relative_volume_threshold": 1.1,
        "relative_volume_at_time_threshold": 0.05,
        "pre_market_gap_percentage_threshold": 0.04,
        "pre_market_vwap_drawdown_threshold": 0.007,  # 0.7% drawdown from VWAP
        "pre_market_volume_threshold": 50000  # Minimum pre-market volume
    },
    "Shrimp": {
        "pre_market_change_pct_threshold": 0.05,  # 5% for Shrimp
        "float_shares_outstanding_threshold": 1000000,  # 1 million shares
        "relative_volume_threshold": 1.0,
        "relative_volume_at_time_threshold": 0.05,
        "pre_market_gap_percentage_threshold": 0.05,
        "pre_market_vwap_drawdown_threshold": 0.008, # 0.8% drawdown from VWAP
        "pre_market_volume_threshold": 50000  # Minimum pre-market volume
    }
}

def filter_stocks(df, config):
    """Filter stocks based on configuration criteria."""
    conditions = (
        (df['Pre-market Change %'] >= config.get('pre_market_change_pct_threshold', 0)) &
        (df['Float shares outstanding'] <= config.get('float_shares_outstanding_threshold', float('inf'))) &
        (df['Relative Volume 1 day'] >= config.get('relative_volume_threshold', 0)) &
        (df['Relative Volume at Time'] >= config.get('relative_volume_at_time_threshold', 0)) &
        (df['Pre-market Gap %'] >= config.get('pre_market_gap_percentage_threshold', 0)) &
        (df['Price'] >= df['Volume Weighted Average Price 1 day'] * (1 - config.get('pre_market_vwap_drawdown_threshold', 0))) &
        (df['Volatility 1 day'] >= df['Volatility 1 week']) &
        (df['Volatility 1 day'] >= df['Volatility 1 month']) &
        (df['Pre-market Volume'] >= config.get('pre_market_volume_threshold', 0))
    )
    return df[conditions]

def screen_stocks_by_category(df, category):
    """Filter stocks in a category using predefined criteria."""
    config = criteria_config.get(category, {})
    filtered_df = filter_stocks(df, config)
    return filtered_df

# execute filtering
smash_df = pd.DataFrame()
categories = category_setup_df['marketCapType'].unique()

for category in categories:
    category_df = category_setup_df[category_setup_df['marketCapType'] == category]
    gap_up_stage_df = screen_stocks_by_category(category_df, category)
    smash_df = pd.concat([smash_df, gap_up_stage_df], ignore_index=True)

# add 'Market capitalization' to the reordered columns list
cols_list = [
    'Symbol', 
    'Description', 
    'marketCapType', 
    'Pre-market Change %', 
    'Pre-market Gap %', 
    'marketCapType',
    'Market capitalization',
    'Price', 
    'Pre-market Open', 
    'Industry', 
    'Index', 
    'Sector', 
    'Exchange',
    'Recent earnings date', 
    'Upcoming earnings date', 
    'Float shares outstanding', 
    'Average Volume 10 days',
    'Average Volume 30 days', 
    'Average Volume 90 days',
    'Relative Volume 1 day', 
    'Relative Volume 5 minutes', 
    'Relative Volume 30 minutes', 
    'Relative Volume at Time', 
    'Analyst Rating'
]

# filter columns to only include those present in the DataFrame
existing_cols = [col for col in cols_list if col in smash_df.columns]
smash_df = smash_df[existing_cols]

# sort and reset index
smash_df = smash_df.sort_values(
    by=['Pre-market Change %', 'Price'],
    ascending=[False, False]).reset_index(drop=True)

print(f"{smash_df.shape[0]} stockes found in the screener.")
# ## ISSUE ##
# #### HARD CODED FILE PATH ####
# # also change this if you add another level, you probably will to screener_smoke 
# # output_filename = f'pre_market_gap_up_screener_targets_{today_date_str}.csv'
# output_filename = f'/Users/sudz4/Desktop/BOOK-II/nowbear/stgy_pm-gap-up-screener/data_pmgus/output_pmgus/pre_market_gap_up_screener_targets_2024-09-26.csv'

# smash_df.to_csv(output_filename, index=False)

84 stockes found in the screener.


In [571]:
# display(smash_df.head(4))
display(smash_df)

Unnamed: 0,Symbol,Description,marketCapType,Pre-market Change %,Pre-market Gap %,marketCapType.1,Market capitalization,Price,Pre-market Open,Industry,Index,Sector,Exchange,Recent earnings date,Upcoming earnings date,Float shares outstanding,Average Volume 10 days,Average Volume 30 days,Average Volume 90 days,Relative Volume 1 day,Relative Volume 5 minutes,Relative Volume 30 minutes,Relative Volume at Time,Analyst Rating
0,XCUR,"Exicure, Inc.",Shrimp,21.649485,3.092784,Shrimp,12642920.0,5.82,6.0,Biotechnology,NASDAQ Composite,Health technology,NASDAQ,2024-11-14,2025-03-06,660325.3,699298.9,355235.4,1093804.0,11.137062,1.21737,1.389796,12.228141,
1,WIX,Wix.com Ltd.,Large caps,14.846426,6.496331,Large caps,10236040000.0,183.95,195.9,Information technology services,"NASDAQ Composite, ISE CTA Cloud Computing, NAS...",Technology services,NASDAQ,2024-11-20,2025-02-19,53457700.0,576124.3,433619.7,415283.0,1.604416,4.068996,4.587508,1.724574,Strong buy
2,GLBE,Global-E Online Ltd.,Mid caps,9.967244,0.0,Mid caps,7137246000.0,42.74,42.74,Internet retail,"NASDAQ Composite, NASDAQ Computer",Retail trade,NASDAQ,2024-11-20,2025-02-19,94224230.0,1175714.4,945741.1,974229.1,1.90907,3.89696,2.03237,2.2184,Strong buy
3,ZIM,ZIM Integrated Shipping Services Ltd.,Mid caps,9.036594,2.165795,Mid caps,3223106000.0,26.78,27.36,Marine shipping,,Transportation,NYSE,2024-11-20,2025-03-18,99229210.0,5217970.6,4874967.0,5355548.0,1.232968,4.107419,3.104733,1.242141,Sell
4,MSTR,MicroStrategy Incorporated,Large caps,7.311748,3.830074,Large caps,87246340000.0,430.54,447.03,Internet software/Services,"NASDAQ Composite, Nasdaq US Large Cap Growth, ...",Technology services,NASDAQ,2024-10-30,2025-02-04,162924300.0,33836953.4,24856920.0,16278090.0,1.668274,3.192327,2.885155,1.626463,Strong buy
5,LMND,"Lemonade, Inc.",Mid caps,5.580468,0.672646,Mid caps,2866178000.0,40.14,40.41,Property/Casualty insurance,"Russell 2000, Nasdaq US Small Cap Growth, Russ...",Finance,NYSE,2024-10-30,2025-03-04,51635240.0,3170123.2,2286955.0,1723081.0,2.197303,4.12091,2.836229,2.405141,Sell
6,EE,"Excelerate Energy, Inc.",Mid caps,5.058626,5.058626,Mid caps,3172592000.0,29.85,31.36,Gas distributors,"Russell 2000, Russell 3000, Mini-Russell 2000",Utilities,NYSE,2024-11-06,2025-03-31,24491150.0,338810.5,256295.5,220565.1,1.113509,5.796261,3.395804,0.961408,Buy
7,RUM,Rumble Inc.,Mid caps,4.401408,5.457746,Mid caps,2212463000.0,5.68,5.99,Internet software/Services,"NASDAQ Composite, Nasdaq US Small Cap Growth, ...",Technology services,NASDAQ,2024-11-12,2025-03-27,74207310.0,4628863.9,3346150.0,1798119.0,1.668649,3.674887,1.21473,1.709191,Neutral
8,CORZ,"Core Scientific, Inc.",Mid caps,3.885853,3.035823,Mid caps,4599297000.0,16.47,16.97,Data processing services,"NASDAQ Composite, Russell 2000, Nasdaq US Smal...",Technology services,NASDAQ,2024-11-06,2025-04-01,238432400.0,12857316.2,11127290.0,11203630.0,0.827972,5.67792,2.529089,0.847713,Strong buy
9,BPMC,Blueprint Medicines Corporation,Mid caps,2.970402,3.054968,Mid caps,6009544000.0,94.6,97.49,Biotechnology,"NASDAQ Composite, Russell 2000, Nasdaq US Mid ...",Health technology,NASDAQ,2024-10-30,2025-02-13,62341740.0,644300.5,668460.5,676459.7,1.179957,7.165445,4.02253,1.209767,Buy


In [572]:
# return smash_list of Symbols
smash_list = smash_df['Symbol'].tolist()


print(len(smash_df))
print(len(smash_list))
print(smash_list)


84
84
['XCUR', 'WIX', 'GLBE', 'ZIM', 'MSTR', 'LMND', 'EE', 'RUM', 'CORZ', 'BPMC', 'LNTH', 'ACHR', 'KT', 'ERJ', 'HAFN', 'SKYW', 'MTSI', 'EXAS', 'CRDO', 'SOBO', 'UFPI', 'BKE', 'SOUN', 'KNF', 'OSCR', 'AMBA', 'MGNI', 'AEIS', 'RSI', 'GVA', 'NXE', 'KBH', 'TRUP', 'CRS', 'ROAD', 'ENIC', 'GEO', 'U', 'PRKS', 'MASI', 'SWTX', 'RDNT', 'NOV', 'PI', 'PTON', 'LFST', 'CXW', 'CXT', 'NPWR', 'MAIN', 'BRBR', 'INTA', 'SHAK', 'CWAN', 'CSWI', 'SMTC', 'IESC', 'CPA', 'CNXC', 'ADUS', 'OBDC', 'BROS', 'ALK', 'CZR', 'VRNA', 'FSK', 'WB', 'S', 'CRGY', 'CLF', 'PLNT', 'STR', 'AVPT', 'BXSL', 'RVLV', 'GOGL', 'CALX', 'ITRI', 'SG', 'ADMA', 'VCTR', 'CFLT', 'AXSM', 'RLX']


return current stock price

"quick back test"
which tickers from smash_list did well on today
1-baseline buy sell w/same time b/s
2-dynamic sell same buy

1-specific day to back test
2-move code over for specific times
3-baseline times
4-other ideas time segments
5-dynamic buy windows IF only?

In [None]:
# start----quick back test function
print(screen_date)

2024-11-20


In [580]:
from datetime import datetime

# Use datetime.strptime() where needed

In [583]:
def quick_backtest(df, test_date, num_stocks=4):
    """
    Perform quick backtest for specified stocks on a specific historical date.
    
    Parameters:
    df (pd.DataFrame): DataFrame containing screened stocks
    test_date (str): Date to test in 'YYYY-MM-DD' format
    num_stocks (int): Number of stocks to test (default=4)
    
    Returns:
    pd.DataFrame: DataFrame with historical intraday data for tested stocks
    """
    # Take first n stocks
    symbols = df['Symbol'].tolist()[:num_stocks]
    stock_data = {}
    
    # Calculate end date (next day)
    start_date = datetime.strptime(test_date, '%Y-%m-%d')
    end_date = start_date + timedelta(days=1)
    
    for symbol in symbols:
        try:
            ticker = yf.Ticker(symbol)
            # Get 1-minute data for the specified day
            hist_data = ticker.history(
                start=start_date.strftime('%Y-%m-%d'),
                end=end_date.strftime('%Y-%m-%d'),
                interval='1m'
                # interval='5m'
            )
            
            if not hist_data.empty:
                # Store all minute data for the day
                stock_data[symbol] = {
                    'Open': hist_data['Open'].tolist(),
                    'High': hist_data['High'].tolist(),
                    'Low': hist_data['Low'].tolist(),
                    'Close': hist_data['Close'].tolist(),
                    'Volume': hist_data['Volume'].tolist(),
                    'Timestamps': hist_data.index.tolist(),
                    'Day_High': hist_data['High'].max(),
                    'Day_Low': hist_data['Low'].min(),
                    'Open_Price': hist_data['Open'].iloc[0],
                    'Close_Price': hist_data['Close'].iloc[-1],
                    'Total_Volume': hist_data['Volume'].sum()
                }
                
                # Calculate basic statistics
                stock_data[symbol]['Intraday_Return'] = (
                    (stock_data[symbol]['Close_Price'] - stock_data[symbol]['Open_Price']) 
                    / stock_data[symbol]['Open_Price'] * 100
                )
                
                stock_data[symbol]['Max_Gain'] = (
                    (stock_data[symbol]['Day_High'] - stock_data[symbol]['Open_Price'])
                    / stock_data[symbol]['Open_Price'] * 100
                )

                ####---- add time of day high
                stock_data[symbol]['Time_of_Day_High'] = hist_data['High'].idxmax()
                ####----- add time of day low
                stock_data[symbol]['Time_of_Day_Low'] = hist_data['Low'].idxmin()
        
                stock_data[symbol]['Max_Loss'] = (
                    (stock_data[symbol]['Day_Low'] - stock_data[symbol]['Open_Price'])
                    / stock_data[symbol]['Open_Price'] * 100
                )
                
            else:
                print(f"No data available for {symbol} on {test_date}")
                stock_data[symbol] = {
                    'Open': None, 'High': None, 'Low': None, 'Close': None,
                    'Volume': None, 'Timestamps': None, 'Day_High': None,
                    'Day_Low': None, 'Open_Price': None, 'Close_Price': None,
                    'Total_Volume': None, 'Intraday_Return': None,
                    'Max_Gain': None, 'Max_Loss': None
                }
                
        except Exception as e:
            print(f"Error processing {symbol}: {str(e)}")
            continue

    # Create DataFrame from results
    results_df = pd.DataFrame.from_dict(stock_data, orient='index')
    
    # Add symbol as a column instead of index
    results_df.reset_index(inplace=True)
    results_df.rename(columns={'index': 'Symbol'}, inplace=True)
    
    return results_df

In [584]:
# execute
print(screen_date)

2024-11-20


In [585]:
qbt_df = quick_backtest(smash_df, screen_date)

print(len(qbt_df))
display(qbt_df)

# # Display summary results
# summary_cols = ['Symbol', 'Open_Price', 'Close_Price', 'Day_High', 'Day_Low', 
#                 'Intraday_Return', 'Max_Gain', 'Max_Loss', 'Total_Volume']
# print("\nBacktest Summary:")
# print(qbt_df[summary_cols])

4


Unnamed: 0,Symbol,Open,High,Low,Close,Volume,Timestamps,Day_High,Day_Low,Open_Price,Close_Price,Total_Volume,Intraday_Return,Max_Gain,Time_of_Day_High,Time_of_Day_Low,Max_Loss
0,XCUR,"[6.90500020980835, 7.079999923706055, 7.25, 7....","[7.079999923706055, 7.289999961853027, 7.74779...","[6.829999923706055, 7.079999923706055, 7.25, 7...","[7.079999923706055, 7.199999809265137, 7.56500...","[246043, 11270, 34654, 23639, 27908, 14637, 93...","[2024-11-20 09:30:00-05:00, 2024-11-20 09:31:0...",14.8,6.83,6.905,11.01,22744149,59.449673,114.337433,2024-11-20 11:43:00-05:00,2024-11-20 09:30:00-05:00,-1.086174
1,WIX,"[209.0, 204.17999267578125, 200.0, 199.9400024...","[210.0, 205.7100067138672, 200.98989868164062,...","[203.7449951171875, 198.0, 198.0, 199.57000732...","[204.68499755859375, 199.54100036621094, 199.5...","[217488, 27110, 30141, 33769, 37109, 30589, 51...","[2024-11-20 09:30:00-05:00, 2024-11-20 09:31:0...",213.860001,198.0,209.0,209.970001,2583625,0.464115,2.325359,2024-11-20 09:52:00-05:00,2024-11-20 09:31:00-05:00,-5.263158
2,GLBE,"[49.2599983215332, 49.279998779296875, 49.7249...","[49.2599983215332, 50.0, 51.619998931884766, 5...","[49.25, 48.619998931884766, 49.5, 50.650001525...","[49.25, 49.75, 51.619998931884766, 50.94039916...","[167165, 50085, 129740, 89728, 40835, 82442, 6...","[2024-11-20 09:30:00-05:00, 2024-11-20 09:31:0...",52.264999,46.57,49.259998,47.779999,5470914,-3.004465,6.100287,2024-11-20 09:33:00-05:00,2024-11-20 10:17:00-05:00,-5.460818
3,ZIM,"[29.670000076293945, 29.459999084472656, 29.77...","[29.690000534057617, 29.799999237060547, 29.79...","[29.309999465942383, 29.389999389648438, 29.29...","[29.3799991607666, 29.770000457763672, 29.5100...","[1989131, 114888, 768496, 317891, 309882, 3512...","[2024-11-20 09:30:00-05:00, 2024-11-20 09:31:0...",30.15,26.92,29.67,27.0,23406376,-8.998989,1.617794,2024-11-20 09:47:00-05:00,2024-11-20 14:43:00-05:00,-9.268621


In [574]:
####---quick start back tests
def quick_backtest(df, day):
    symbols = df['Symbol'].tolist()
    stock_data = {}

    for symbol in symbols:
        ticker = yf.Ticker(symbol)
        qbt_data = ticker.history(start=day, end=(datetime.strptime(day, '%Y-%m-%d') + timedelta(days=1)).strftime('%Y-%m-%d'), interval='1m')

        if not qbt_data.empty:
            stock_data[symbol] = {
                'High': qbt_data['High'].values[0],
                'Low': qbt_data['Low'].values[0],
                'Open': qbt_data['Open'].values[0],
                'Close': qbt_data['Close'].values[0],
                'Volume': qbt_data['Volume'].values[0],
                'Date': qbt_data.index[0]
            }
        else:
            qbt_data[symbol] = {
                'High': None,
                'Low': None,
                'Open': None,
                'Close': None,
                'Volume': None,
                'Date': None
            }

    qbt_df = pd.DataFrame(qbt_data).T.reset_index().rename(columns={'index': 'Symbol'})
    
    ####

    return qbt_df

In [None]:
# create a DataFrame from the stock data dictionary
    stock_data_df = pd.DataFrame(stock_data).T.reset_index().rename(columns={'index': 'Symbol'})
    
    # merge the new stock data with the existing DataFrame
    updated_df = pd.merge(df, stock_data_df, on='Symbol', how='left')
    
    return updated_df

old code

In [None]:
# add high low close
#### ---- Quick Stary Backtesting ---- ####
def basic_daily_technicals(df):
    symbols = df['Symbol'].tolist()
    stock_data = {}

    for symbol in symbols:
        stock_info = yf.Ticker(symbol)
        today_data = stock_info.history(period='1d')
        
        if not today_data.empty:
            stock_data[symbol] = {
                'High': today_data['High'].values[0],
                'Low': today_data['Low'].values[0],
                'Open': today_data['Open'].values[0],
                'Close': today_data['Close'].values[0],
                'Volume': today_data['Volume'].values[0],
                'Date': today_data.index[0]
            }
        else:
            stock_data[symbol] = {
                'High': None,
                'Low': None,
                'Open': None,
                'Close': None,
                'Volume': None,
                'Date': None
            }

    # create a DataFrame from the stock data dictionary
    stock_data_df = pd.DataFrame(stock_data).T.reset_index().rename(columns={'index': 'Symbol'})
    
    # merge the new stock data with the existing DataFrame
    updated_df = pd.merge(df, stock_data_df, on='Symbol', how='left')
    
    return updated_df

| LEVEL #2 - SCREENER |
|-|
| TECHNICAL INDICATORS - via yahoo finance |

*note yahoo finance data source transition
*feels like this is more of a swing screen????

In [517]:
# import yfinance as yf

# from ta.trend import MACD
# from ta.momentum import RSIIndicator
# from ta.trend import SMAIndicator
# from ta.momentum import RSIIndicator, StochasticOscillator
# from ta.trend import SMAIndicator, MACD, PSARIndicator
# from ta.volatility import BollingerBands, AverageTrueRange
# from ta.volume import OnBalanceVolumeIndicator
# # import vectorbt as vbt
# # vbt.settings.set_theme('dark')


In [518]:
# import yfinance as yf
# import pandas as pd
# from ta.momentum import RSIIndicator
# from ta.trend import SMAIndicator, MACD

need to add start date as a var for retroactive testing / backtesting

In [519]:
# def compute_technical_indicators(df):
#     tech_data = []

#     for symbol in df['Symbol'].unique():
#         data = yf.download(symbol, period="6mo", interval="1d")

#         if not data.empty:
#             close_prices = data['Close'].squeeze()  # Ensure it's a 1-dimensional series
#             rsi = RSIIndicator(close_prices, window=14).rsi().iloc[-1] # last value!
#             ma20 = SMAIndicator(close_prices, window=20).sma_indicator().iloc[-1] # last value!
#             ma50 = SMAIndicator(close_prices, window=50).sma_indicator().iloc[-1] # last value!
#             macd = MACD(close_prices).macd_diff().iloc[-1] # last value!

#             tech_data.append({
#                 'Symbol': symbol,
#                 'RSI': rsi,
#                 'MA20': ma20,
#                 'MA50': ma50,
#                 'MACD': macd,
#                 'Yahoo Price': close_prices.iloc[-1], # last value! (price from Yahoo!)
#             })

#     tech_df = pd.DataFrame(tech_data)
#     return tech_df

note the last value because,
technically for back testing you want to pay attention here.
real time go time i think we want it most recent. meaning yesterday
if we are making decisions and in pre-market trading hours based on the Close from yesterday
yesterday = last Close $

In [520]:
# explode_df = compute_technical_indicators(smash_df)

# print(explode_df.shape)
# print(explode_df.head())

In [521]:
# print(explode_df.shape)
# print(explode_df.head())

# print(smash_df.shape)
# print(smash_df.iloc[:5,:3])

In [522]:
# def screen_stocks_by_technical_indicators(df):
#     # filter based on RSI, MA, and MACD
#     df = df[(df['RSI'] >= 0) & (df['RSI'] < 70)]  # RSI filter condition
#     df = df[df['Yahoo Price'] > df['MA20']]
#     df = df[df['Yahoo Price'] > df['MA50']]
#     df = df[df['MACD'] > 0]
#     """ 
#     # should i add more to this?
#     # level 3 will start to get more advanced filering
#     # dont want to overfilter to soon also and miss something
#     # patience
#     """
#     return df

In [523]:
# explode_df = screen_stocks_by_technical_indicators(explode_df)

# print(len(explode_df))
# print(explode_df.head())

In [524]:
# # return explode_list of Symbols
# explode_list = explode_df['Symbol'].tolist()


# print(len(smash_df))
# print()

# print(len(explode_df))
# print(len(explode_list))
# print(explode_list)

In [525]:
# print(explode_df)

In [526]:
# print(screen_date)

In [527]:
# # sory descending by RSI
# explode_df = explode_df.sort_values(by='RSI', ascending=True)

# print(explode_df)

| LEVEL #3 - SCREENER |
|-|
| xxx |

i think this is the place for those small tactical screeners
maybe even think about deploying dynamically for instance one above this ta technical screener filter
different days, different vols, and volatilty

see index specific
see vix
companies in same industry, same sector, same location? 

| LEVEL #4 - SCREENER |
|-|
| xxx |

could think about doing additional available (but separate now) on the subset, additional technical indicators from ta library

In [528]:
# from ta.momentum import RSIIndicator, StochasticOscillator
# from ta.trend import SMAIndicator, MACD, CCIIndicator
# from ta.volatility import BollingerBands
# from ta.volume import OnBalanceVolumeIndicator
# from ta.volatility import AverageTrueRange

In [529]:
# def compute_additional_indicators(df):
#     additional_data = []

#     for symbol in df['Symbol'].unique():
#         data = yf.download(symbol, period="6mo", interval="1d")

#         if not data.empty:
#             close_prices = data['Close'].squeeze()  # Ensure it's a 1-dimensional series
#             high_prices = data['High'].squeeze()
#             low_prices = data['Low'].squeeze()
#             volume = data['Volume'].squeeze()

#             # Compute additional indicators
#             bb = BollingerBands(close_prices)
#             atr = AverageTrueRange(high=high_prices, low=low_prices, close=close_prices)
#             stoch = StochasticOscillator(close=close_prices, high=high_prices, low=low_prices)
#             obv = OnBalanceVolumeIndicator(close=close_prices, volume=volume)
#             cci = CCIIndicator(high=high_prices, low=low_prices, close=close_prices)

#             additional_data.append({
#                 'Symbol': symbol,
#                 'Bollinger High': bb.bollinger_hband().iloc[-1],
#                 'Bollinger Low': bb.bollinger_lband().iloc[-1],
#                 'ATR': atr.average_true_range().iloc[-1],
#                 'Stochastic Oscillator': stoch.stoch().iloc[-1],
#                 'OBV': obv.on_balance_volume().iloc[-1],
#                 'CCI': cci.cci().iloc[-1],
#             })

#     additional_df = pd.DataFrame(additional_data)
#     return additional_df

In [530]:
# double_explode_df = compute_additional_indicators(explode_df)

# print(len(explode_df))
# print(len(double_explode_df))

In [531]:
# double_explode_df

In [532]:
# def filter_additional_indicators(df):
#     # Example filter criteria
#     filtered_df = df[
#         (df['Stochastic Oscillator'] < 20) &  # Stochastic Oscillator indicates oversold
#         (df['CCI'] < -100) &  # CCI indicates oversold
#         (df['ATR'] > 1)  # ATR indicates high volatility
#     ]
#     return filtered_df

In [533]:
# filtered_double_explode_df = filter_additional_indicators(double_explode_df)


# print(len(explode_df))
# print(len(filtered_double_explode_df))

| LEVEL #5,#6 - SCREENER |
|-|
| (mini) tight screeners - (toggle on and off w/comments?) |

return index data for relevant?
good time to see if that chart works opensource similar to tradingview

could also plot historical data for 53 stocks who cares. check it out on same chart?
watch a youtube
restart as new feature 

what about news
what about europe /asia price action
market and industry?

| Notes |
|-|
| END of WORKING CODE |

In [534]:
# # read data file from TradingView to pandas df
# """ 
# Market : US
# Exchange : NASDAQ, NYSE
# Pre-market Chg > $0.00 USD

# Stocks = ~1100
# """
# base_file_path = '/Users/sudz4/Desktop/SPS_local/sps/x_pre_market_gap_up_screener/'
# file_name =  'tv_screen_gap-up_2024-10-29.csv'
# pmgus_df = pd.read_csv(base_file_path + file_name)


# print(len(pmgus_df))
# pmgus_df.tail(2)

In [535]:
# def categorize_market_cap(df):
#     """Categorize stocks based on market capitalization."""
#     df['Market capitalization'] = pd.to_numeric(df['Market capitalization'], errors='coerce')
#     conditions = [
#         (df['Market capitalization'] >= 200_000_000_000),  # Titans
#         (df['Market capitalization'] >= 10_000_000_000) & (df['Market capitalization'] < 200_000_000_000),  # Large caps
#         (df['Market capitalization'] >= 2_000_000_000) & (df['Market capitalization'] < 10_000_000_000),  # Mid caps
#         (df['Market capitalization'] >= 300_000_000) & (df['Market capitalization'] < 2_000_000_000),  # Small caps
#         (df['Market capitalization'] > 50_000_000) & (df['Market capitalization'] < 300_000_000),  # Micro caps
#         (df['Market capitalization'] <= 50_000_000)  # Shrimp
#     ]
#     categories = ['Titans', 'Large caps', 'Mid caps', 'Small caps', 'Micro caps', 'Shrimp']
#     df['marketCapType'] = np.select(conditions, categories, default='Undefined')
#     # drop Undefined
#     df = df[df['marketCapType'] != 'Undefined']
#     # convert to numeric the Market capitalization column
#     df['Market capitalization'] = pd.to_numeric(df['Market capitalization'])
    
#     return df

In [536]:
# # APPLY - categorize market cap
# pmgus_df = categorize_market_cap(pmgus_df).copy()
# print(len(pmgus_df))


In [537]:
# # convert necessary columns to numeric
# def convert_columns_to_numeric(df, columns):
#     """Convert specified columns to numeric types."""
#     for col in columns:
#         df[col] = pd.to_numeric(df[col], errors='coerce')
#     return df

# # list of columns to convert
# numeric_columns = [
#     'Market capitalization', 'Float shares outstanding', 'Relative Volume 1 day',
#     'Relative Volume at Time', 'Pre-market Change %', 'Pre-market Gap %',
#     'Price', 'Volume Weighted Average Price 1 day', 'Volatility 1 day',
#     'Volatility 1 week', 'Volatility 1 month', 'Pre-market Volume'
# ]

In [538]:
# # APPLY - convert columns to numeric
# pmgus_df = convert_columns_to_numeric(pmgus_df, numeric_columns).copy()
# print(len(pmgus_df))

In [539]:
# # Criteria configuration for each market cap category
# criteria_config = {
#     "Titans": {
#         "pre_market_change_pct_threshold": 0.002,  # 0.2% for Titans
#         "float_shares_outstanding_threshold": 1_000_000_000,  # 1 billion shares
#         "relative_volume_threshold": 1.2,
#         "relative_volume_at_time_threshold": 0.03,
#         "pre_market_gap_percentage_threshold": 0.001,  # 0.1%
#         "pre_market_vwap_drawdown_threshold": 0.003,  # 0.3% drawdown from VWAP
#         "pre_market_volume_threshold": 50_000  # Minimum pre-market volume
#     },
#     "Large caps": {
#         "pre_market_change_pct_threshold": 0.005,  # 0.5% for Large caps
#         "float_shares_outstanding_threshold": 200000000,  # 200 million shares
#         "relative_volume_threshold": 1.3,  # More inclusive
#         "relative_volume_at_time_threshold": 0.04,  # More inclusive
#         "pre_market_gap_percentage_threshold": 0.005,  # 0.5%
#         "pre_market_vwap_drawdown_threshold": 0.004,  # 0.4% drawdown from VWAP
#         "pre_market_volume_threshold": 50000  # Minimum pre-market volume
#     },
#     # "Midlers" in TradingView
#     "Midlers": { 
#         "pre_market_change_pct_threshold": 0.02,  # 2% for Midlers 
#         "float_shares_outstanding_threshold": 50000000,  # 50 million shares
#         "relative_volume_threshold": 1.3,
#         "relative_volume_at_time_threshold": 0.05,
#         "pre_market_gap_percentage_threshold": 0.02,
#         "pre_market_vwap_drawdown_threshold": 0.005,  # 0.5% drawdown from VWAP
#         "pre_market_volume_threshold": 50000  # Minimum pre-market volume
#     },
#     "Small caps": {
#         "pre_market_change_pct_threshold": 0.03,  # 3% for Small caps
#         "float_shares_outstanding_threshold": 20000000,  # 20 million shares
#         "relative_volume_threshold": 1.2,
#         "relative_volume_at_time_threshold": 0.05,
#         "pre_market_gap_percentage_threshold": 0.03,
#         "pre_market_vwap_drawdown_threshold": 0.006,  # 0.6% drawdown from VWAP
#         "pre_market_volume_threshold": 50000  # Minimum pre-market volume
#     },
#     "Micro caps": {
#         "pre_market_change_pct_threshold": 0.04,  # 4% for Micro caps
#         "float_shares_outstanding_threshold": 5000000,  # 5 million shares
#         "relative_volume_threshold": 1.1,
#         "relative_volume_at_time_threshold": 0.05,
#         "pre_market_gap_percentage_threshold": 0.04,
#         "pre_market_vwap_drawdown_threshold": 0.007,  # 0.7% drawdown from VWAP
#         "pre_market_volume_threshold": 50000  # Minimum pre-market volume
#     },
#     "Shrimp": {
#         "pre_market_change_pct_threshold": 0.05,  # 5% for Shrimp
#         "float_shares_outstanding_threshold": 1000000,  # 1 million shares
#         "relative_volume_threshold": 1.0,
#         "relative_volume_at_time_threshold": 0.05,
#         "pre_market_gap_percentage_threshold": 0.05,
#         "pre_market_vwap_drawdown_threshold": 0.008, # 0.8% drawdown from VWAP
#         "pre_market_volume_threshold": 50000  # Minimum pre-market volume
#     }
# }

In [540]:
# def filter_stocks(df, config):
#     """Filter stocks based on configuration criteria."""
#     conditions = (
#         (df['Pre-market Change %'] >= config.get('pre_market_change_pct_threshold', 0)) &
#         (df['Float shares outstanding'] <= config.get('float_shares_outstanding_threshold', float('inf'))) &
#         (df['Relative Volume 1 day'] >= config.get('relative_volume_threshold', 0)) &
#         (df['Relative Volume at Time'] >= config.get('relative_volume_at_time_threshold', 0)) &
#         (df['Pre-market Gap %'] >= config.get('pre_market_gap_percentage_threshold', 0)) &
#         (df['Price'] >= df['Volume Weighted Average Price 1 day'] * (1 - config.get('pre_market_vwap_drawdown_threshold', 0))) &
#         (df['Volatility 1 day'] >= df['Volatility 1 week']) &
#         (df['Volatility 1 day'] >= df['Volatility 1 month']) &
#         (df['Pre-market Volume'] >= config.get('pre_market_volume_threshold', 0))
#     )
#     return df[conditions]

# def screen_stocks_by_category(df, category):
#     """Filter stocks in a category using predefined criteria."""
#     config = criteria_config.get(category, {})
#     filtered_df = filter_stocks(df, config)
#     return filtered_df


In [541]:
# for category in pmgus_df['marketCapType'].unique():
#     category_df = pmgus_df[pmgus_df['marketCapType'] == category]
#     gap_up_stage_df = screen_stocks_by_category(category_df, category)
#     pmgus_two_df = pd.concat([category_df, gap_up_stage_df], ignore_index=True)

# print(len(pmgus_two_df))


enhanced volume screening -L2

In [542]:
# market_cap_volume_thresholds = {
#     "Titans": {
#         "min_pm_volume_vs_adv": 0.001,  # 0.1% of ADV minimum in pre-market
#         "min_rel_vol_5min": 1.5,        # 50% above normal 5-min volume
#         "min_rel_vol_15min": 1.3        # 30% above normal 15-min volume
#     },
#     "Large caps": {
#         "min_pm_volume_vs_adv": 0.002,  # 0.2% of ADV
#         "min_rel_vol_5min": 1.8,
#         "min_rel_vol_15min": 1.5
#     },
#     "Midlers": {
#         "min_pm_volume_vs_adv": 0.003,  # 0.3% of ADV
#         "min_rel_vol_5min": 2.0,
#         "min_rel_vol_15min": 1.7
#     },
#     "Small caps": {
#         "min_pm_volume_vs_adv": 0.004,  # 0.4% of ADV
#         "min_rel_vol_5min": 2.5,
#         "min_rel_vol_15min": 2.0
#     },
#     "Micro caps": {
#         "min_pm_volume_vs_adv": 0.005,  # 0.5% of ADV
#         "min_rel_vol_5min": 3.0,
#         "min_rel_vol_15min": 2.5
#     },
#     "Shrimp": {
#         "min_pm_volume_vs_adv": 0.008,  # 0.8% of ADV
#         "min_rel_vol_5min": 3.5,
#         "min_rel_vol_15min": 3.0
#     }
# }

# def analyze_premarket_volume_by_cap(df, conditions):
#     """
#     Enhanced volume analysis based on market cap category with progressive thresholds
#     """
#     df['PM_Volume_Ratio'] = df['Pre-market Volume'] / df['Average Volume 10 days']
#     df['Volume_Acceleration'] = df['Relative Volume 5 minutes'] / df['Relative Volume 15 minutes']
    
#     # Apply filters based on market cap category
#     conditions = []
#     for cap_type, thresholds in market_cap_volume_thresholds.items():
#         cap_condition = (
#             (df['marketCapType'] == cap_type) &
#             (df['PM_Volume_Ratio'] >= thresholds['min_pm_volume_vs_adv']) &
#             (df['Relative Volume 5 minutes'] >= thresholds['min_rel_vol_5min']) &
#             (df['Relative Volume 15 minutes'] >= thresholds['min_rel_vol_15min'])
#         )
#         conditions.append(cap_condition)
    
#     return pd.concat([df[cond] for cond in conditions])

In [543]:
# # show all pandas row width
# pd.set_option('display.max_rows', None)
# # show all pandas column width
# pd.set_option('display.max_columns', None)

In [544]:
# pmgus_two_df = analyze_premarket_volume_by_cap(pmgus_two_df, market_cap_volume_thresholds)


# print(len(pmgus_two_df))
# display(pmgus_two_df.head(5))

In [545]:
# # save to csv
# new_pmgus_df.to_csv(base_file_path + 'new_pmgus_2024-10-29_final.csv', index=False)

In [546]:
# print(len(pmgus_two_df))
# print(len(pmgus_two_df.columns))
# print(pmgus_two_df.columns)

# final L1 FILTERS

In [547]:
# print(len(pmgus_two_df))

In [548]:
# # define no negatives
# def no_negatives(df):
#     """Remove negative values in the data frame."""
#     return df[(df['Pre-market Change %'] >= 0) & (df['Pre-market Gap %'] >= 0)]

# # apply no negatives
# pmgus_two_df = no_negatives(pmgus_two_df)
# print(len(pmgus_two_df))

In [549]:
# def technical_price_filter(df):
#     """
#     Filter stocks based on their position relative to key technical levels
#     """
#     conditions = (
#         # Price near recent highs suggesting momentum
#         (df['Price'] >= df['High 1 month'] * 0.85) |  
        
#         # Price above all major SMAs showing strength
#         (df['Price'] > df['Simple Moving Average (5) 1 minute']) &
#         (df['Price'] > df['Simple Moving Average (13) 5 minutes']) &
        
#         # Price near upper Bollinger Band suggesting strength
#         (df['Price'] >= df['Bollinger Bands (20) 5 minutes, Basis'])
#     )
#     return df[conditions]

# # apply technical price filter
# pmgus_two_df = technical_price_filter(pmgus_two_df)
# print(len(pmgus_two_df))

might need to back off a little on the volatility filter?

In [550]:
# def fundamental_filter(df):
#     """
#     Filter using analyst ratings and price targets
#     """
#     conditions = (
#         # Price well below analyst targets suggesting upside
#         (df['Target price 1 year'] > df['Price'] * 1.2) &
        
#         # Strong analyst ratings
#         (df['Analyst Rating'].isin(['Strong buy', 'Buy']))
#     )
#     return df[conditions]

# # apply fundamental filter
# pmgus_two_df = fundamental_filter(pmgus_two_df)

# # sort by change percentage descending
# pmgus_two_df.sort_values('Pre-market Change %', ascending=False, inplace=True)


# print(len(pmgus_two_df))

In [551]:
# def volatility_filter(df):
#     """
#     Filter for stocks showing increasing volatility
#     """
#     conditions = (
#         # Increasing volatility pattern
#         (df['Volatility 1 day'] > df['Volatility 1 week']) &
#         (df['Volatility 1 week'] > df['Volatility 1 month']) &
        
#         # Beta filter for more responsive stocks
#         (df['Beta 1 year'] > 1.0)
#     )
#     return df[conditions]

# # apply volatility filter
# pmgus_two_df = volatility_filter(pmgus_two_df)
# print(len(pmgus_two_df))

In [552]:
# # sort by largest change percentage
# pmgus_two_df.sort_values('Pre-market Change %', ascending=False, inplace=True)

# # look at data after initial filters
# print(len(pmgus_two_df))
# # display(pmgus_two_df)

In [553]:
# # return symbol and price and analyst rating
# pmgus_two_df[['Symbol', 'Price', 'Analyst Rating', 'marketCapType']]

# ADDITIONAL GRANULAR FILTERS (when needed?)

In [554]:
# if you want another volume one slightly more granular

In [555]:
# # Price above short-term moving averages indicating immediate strength
# price_conditions = (
#     (pmgus_two_df['Price'] > pmgus_two_df['Simple Moving Average (5) 1 minute']) &
#     (pmgus_two_df['Price'] > pmgus_two_df['Simple Moving Average (8) 1 minute'])
# )

# # Apply the conditions to filter the dataframe
# pmgus_three_df = pmgus_two_df[price_conditions]

# #3
# print(len(pmgus_three_df))

In [556]:
# # Volume additioal acceleration
# vol_addtl_accel_conditions = (
#     (pmgus_two_df['Relative Volume 1 minute'] > pmgus_two_df['Relative Volume 5 minutes']) &
#     (pmgus_two_df['Relative Volume 5 minutes'] > 1.5)  &  # Strong recent volume
#     (pmgus_two_df['Relative Volume 5 minutes'] > pmgus_two_df['Relative Volume 15 minutes']) &  # Accelerating volume
#     (pmgus_two_df['Relative Volume 15 minutes'] > pmgus_two_df['Relative Volume 30 minutes'])) # building momentum 

# # Apply the conditions to filter the dataframe
# pmgus_three_df = pmgus_two_df[vol_addtl_accel_conditions]

# #3
# print(len(pmgus_three_df))

# End of additional filters when needed

In [557]:
# open source tradingview type chart view. 
# then use historical data and plat the daily for the Symbols in the final_pmgus_df