In [491]:
# libs
import os
import pandas as pd
import requests
from dotenv import load_dotenv #pip3 install python-dotenv
# load secrets
load_dotenv()
import math
import json
from IPython.display import JSON


In [492]:
# iex cloud - api data request
iex_key = os.getenv("IEX_API_KEY")
url = 'https://cloud.iexapis.com/stable/ref-data/symbols?token=' + iex_key
response = requests.get(url)
data = response.json()
symbols_df = pd.DataFrame(data)
# print 
symbols_df.head()

Unnamed: 0,symbol,exchange,exchangeSuffix,exchangeName,exchangeSegment,exchangeSegmentName,name,date,type,iexId,region,currency,isEnabled,figi,cik,lei
0,A,XNYS,,New York Stock Exchange Inc,XNYS,New York Stock Exchange Inc,Agilent Technologies Inc.,2024-02-21,cs,IEX_46574843354B2D52,US,USD,True,BBG000C2V3D6,1090872,QUIX8Y7A2WP0XRMW7G29
1,AA,XNYS,,New York Stock Exchange Inc,XNYS,New York Stock Exchange Inc,Alcoa Corp,2024-02-21,cs,IEX_4238333734532D52,US,USD,True,BBG00B3T3HD3,1675149,549300T12EZ1F6PWWU29
2,AAA,ARCX,,Nyse Arca,ARCX,Nyse Arca,Investment Managers Series Trust II - AXS Firs...,2024-02-21,et,IEX_5030314338392D52,US,USD,True,BBG01B0JRCS6,1587982,549300SU7ER9OFETRU41
3,AAAU,BATS,,Cboe Bzx U S Equities Exchange,BATS,Cboe Bzx U S Equities Exchange,Goldman Sachs Physical Gold ETF Trust - Goldma...,2024-02-21,et,IEX_474B433136332D52,US,USD,True,BBG00LPXX872,1708646,
4,AACG,XNAS,,Nasdaq All Markets,XNMS,Nasdaq Nms Global Market,ATA Creativity Global - ADR,2024-02-21,ad,IEX_44595A4C53392D52,US,USD,True,BBG000V2S3P6,1420529,


In [493]:
# NYSE and NASDAQ symbols only (filter)
filtered_df = symbols_df[symbols_df['exchange'].isin(['XNYS', 'XNAS'])]

# select cols
filtered_df = filtered_df[['symbol', 'exchange', 'exchangeName']]

# filter df
print(filtered_df.head())
data_size = len(filtered_df.index)
print()
print(f"Data size: {data_size}") # get length of dataframe

  symbol exchange                 exchangeName
0      A     XNYS  New York Stock Exchange Inc
1     AA     XNYS  New York Stock Exchange Inc
4   AACG     XNAS           Nasdaq All Markets
5   AACI     XNAS           Nasdaq All Markets
6  AACIU     XNAS           Nasdaq All Markets

Data size: 7791


In [494]:
batch_size = 100
# Assuming filtered_df is a DataFrame that contains a 'symbol' column.
# Calculate total batches
total_batches = math.ceil(len(filtered_df) / batch_size)
print(f"Total batches: {total_batches}")

combined_data = []

# function -> fetch market cap data in batches
def fetch_market_cap(symbols_batch, iex_key):
    symbols_str = ','.join(symbols_batch)
    url = f'https://cloud.iexapis.com/stable/stock/market/batch?symbols={symbols_str}&types=quote&token={iex_key}'
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        return {}
    
# Initialize a counter for completed batches
completed_batches = 0

# Define the length of the progress bar
progress_bar_length = 50

# batch processing - IMPORTANT (API efficiency)
for i in range(0, len(filtered_df), batch_size):
    batch_symbols = filtered_df['symbol'].iloc[i:i+batch_size].tolist()
    batch_data = fetch_market_cap(batch_symbols, iex_key)  # Make sure iex_key is defined and valid
    
    # process and append data for each symbol in the batch
    for symbol in batch_symbols:
        market_cap = batch_data.get(symbol, {}).get('quote', {}).get('marketCap', None)
        combined_data.append({'symbol': symbol, 'marketcap': market_cap})
    
    # Increment the completed batches counter
    completed_batches += 1

    # Calculate the progress
    progress = (completed_batches / total_batches)
    filled_length = int(round(progress_bar_length * progress))
    
    # Create the progress bar
    bar = '█' * filled_length + '-' * (progress_bar_length - filled_length)
    
    # Print the progress bar with the percentage
    print(f"\rProgress: |{bar}| {progress*100:.2f}% Complete", end="\r")

# Ensure the next print happens on the next line
print()

# convert combined data into a DataFrame
screener_df = pd.DataFrame(combined_data)

# join with the filtered_df - this adds exchange and exchangeName data
screener_df = screener_df.merge(filtered_df[['symbol', 'exchange', 'exchangeName']], on='symbol', how='left')

Total batches: 78
Progress: |██████████████████████████████████████████████████| 100.00% Complete


In [495]:
print(screener_df.head())
print()
print(f'Length: {len(screener_df.index)}')

  symbol     marketcap exchange                 exchangeName
0      A  3.930835e+10     XNYS  New York Stock Exchange Inc
1     AA  4.763415e+09     XNYS  New York Stock Exchange Inc
2   AACG  4.209672e+07     XNAS           Nasdaq All Markets
3   AACI  8.880890e+07     XNAS           Nasdaq All Markets
4  AACIU  8.848655e+07     XNAS           Nasdaq All Markets

Length: 7791


In [496]:
# drop NAs
screener_df.dropna(inplace=True)

# determins the count of droped NAs
print(f'New length: {len(screener_df)}')
print()

screener_df.head(5)

New length: 7688



Unnamed: 0,symbol,marketcap,exchange,exchangeName
0,A,39308350000.0,XNYS,New York Stock Exchange Inc
1,AA,4763415000.0,XNYS,New York Stock Exchange Inc
2,AACG,42096720.0,XNAS,Nasdaq All Markets
3,AACI,88808900.0,XNAS,Nasdaq All Markets
4,AACIU,88486550.0,XNAS,Nasdaq All Markets


In [497]:
batch_size = 100
total_batches = math.ceil(len(screener_df) / batch_size)
print(f"Total batches: {total_batches}")

quote_data_list = []

def fetch_quote_data(symbols_batch, iex_key):
    symbols_str = ','.join(symbols_batch)
    url = f'https://cloud.iexapis.com/stable/stock/market/batch?symbols={symbols_str}&types=quote&token={iex_key}'
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        return {}
    
# initialize a counter for completed batches
completed_batches = 0

# define the length of the progress bar
progress_bar_length = 50

"""scalable endpoint variable data request"""
for i in range(0, len(screener_df), batch_size):
    batch_symbols = screener_df['symbol'].iloc[i:i+batch_size].tolist()
    batch_data = fetch_quote_data(batch_symbols, iex_key)
    
    # process and append data for each symbol in the batch
    for symbol in batch_symbols:
        quote_data = batch_data.get(symbol, {}).get('quote', {})
        quote_data_list.append({
            'symbol': symbol, # KEY
            'latestPrice': quote_data.get('latestPrice', None),
            'close': quote_data.get('close', None), 
            'previousClose': quote_data.get('previousClose', None),
            'extendedPrice': quote_data.get('extendedPrice', None), 
            'extendedChange': quote_data.get('extendedChange', None), 
            'extendedChangePercent': quote_data.get('extendedChangePercent', None), 
            'latestVolume': quote_data.get('latestVolume', None), 
            'volume': quote_data.get('volume', None), 
            'previousVolume': quote_data.get('previousVolume', None), 
            'primaryExchange': quote_data.get('primaryExchange', None),
            'avgTotalVolume': quote_data.get('avgTotalVolume', None),
            'calculationPrice': quote_data.get('calculationPrice', None),
            'change': quote_data.get('change', None),
            'changePercent': quote_data.get('changePercent', None),
            'companyName': quote_data.get('companyName', None),
            # IEX real time prices - after hours without UTP authorization (maybe)
            'iexClose': quote_data.get('iexClose', None),
            'iexCloseTime': quote_data.get('iexCloseTime', None),
            'iexRealtimePrice': quote_data.get('iexRealtimePrice', None),
            'iexLastUpdated': quote_data.get('iexLastUpdated', None),
            'iexVolume': quote_data.get('iexVolume', None),
            # updates overkill
            'latestTime': quote_data.get('latestTime', None),
            'latestUpdate': quote_data.get('latestUpdate', None),
            # add parameters 'isUSMarketOpen', 'closeSource', 'openSource', 'iexOpen'
            'isUSMarketOpen': quote_data.get('isUSMarketOpen', None),
            'closeSource': quote_data.get('closeSource', None),
            'openSource': quote_data.get('openSource', None),
            'iexOpen': quote_data.get('iexOpen', None)})
    
    # increment completed batches counter
    completed_batches += 1

    # calculate progress
    progress = (completed_batches / total_batches)
    filled_length = int(round(progress_bar_length * progress))
    
    # create progress bar
    bar = '█' * filled_length + '-' * (progress_bar_length - filled_length)
    
    # print progress bar with percentage
    print(f"\rProgress: |{bar}| {progress*100:.2f}% Complete", end="\r")

# Convert the combined data into a DataFrame
quote_df = pd.DataFrame(quote_data_list)

# MERGE DATA
screener_df = screener_df.merge(quote_df, on='symbol', how='left')

screener_df.head(5)

Total batches: 77
Progress: |██████████████████████████████████████████████████| 100.00% Complete

Unnamed: 0,symbol,marketcap,exchange,exchangeName,latestPrice,close,previousClose,extendedPrice,extendedChange,extendedChangePercent,...,iexCloseTime,iexRealtimePrice,iexLastUpdated,iexVolume,latestTime,latestUpdate,isUSMarketOpen,closeSource,openSource,iexOpen
0,A,39308350000.0,XNYS,New York Stock Exchange Inc,134.14,134.14,134.84,134.14,0.0,0.0,...,1708463000000.0,134.14,1708463000000.0,43483.0,"February 20, 2024",1708462801959,False,official,official,134.07
1,AA,4763415000.0,XNYS,New York Stock Exchange Inc,26.69,26.69,27.4,26.73,0.04,0.0015,...,1708463000000.0,26.7,1708463000000.0,153167.0,"February 20, 2024",1708462875282,False,official,official,26.77
2,AACG,42096720.0,XNAS,Nasdaq All Markets,1.3406,1.3406,1.46,1.36,0.0194,0.01447,...,1708117000000.0,0.0,0.0,0.0,"February 20, 2024",1708462800000,False,official,official,
3,AACI,88808900.0,XNAS,Nasdaq All Markets,11.02,,10.98,11.02,0.0,0.0,...,1707235000000.0,0.0,0.0,0.0,"February 20, 2024",1708462800000,False,official,official,
4,AACIU,88486550.0,XNAS,Nasdaq All Markets,10.9,,10.89,10.9,0.0,0.0,...,1706906000000.0,0.0,0.0,0.0,"February 2, 2024",1706907600000,False,official,official,


In [498]:
def fetch_shares_outstanding_data(symbols_batch, iex_key):
    symbols_str = ','.join(symbols_batch)
    url = f'https://cloud.iexapis.com/stable/stock/market/batch?symbols={symbols_str}&types=stats&token={iex_key}'
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        return {}

# Assuming screener_df is a DataFrame containing the symbols
batch_size = 100
total_batches = math.ceil(len(screener_df) / batch_size)
print(f"Total batches: {total_batches}")

shares_outstanding_data_list = []

# initialize a counter for completed batches
completed_batches = 0
# define the length of the progress bar
progress_bar_length = 50

# Replace 'screener_df' with the actual DataFrame containing the symbols
for i in range(0, len(screener_df), batch_size):
    batch_symbols = screener_df['symbol'].iloc[i:i+batch_size].tolist()
    batch_data = fetch_shares_outstanding_data(batch_symbols, iex_key)
    
    # Process and append data for each symbol in the batch
    for symbol in batch_symbols:
        stats_data = batch_data.get(symbol, {}).get('stats', {})
        shares_outstanding_data_list.append({
            'symbol': symbol,
            'sharesOutstanding': stats_data.get('sharesOutstanding', None),
            'avg10Volume': stats_data.get('avg10Volume', None),
            'avg30Volume': stats_data.get('avg30Volume', None),
        })

    # increment completed batches counter
    completed_batches += 1

    # calculate progress
    progress = (completed_batches / total_batches)
    filled_length = int(round(progress_bar_length * progress))
    
    # create progress bar
    bar = '█' * filled_length + '-' * (progress_bar_length - filled_length)
    
    # print progress bar with percentage
    print(f"\rProgress: |{bar}| {progress*100:.2f}% Complete", end="\r")

# Convert the combined data into a DataFrame
shares_outstanding_df = pd.DataFrame(shares_outstanding_data_list)

# MERGE DATA
screener_df = screener_df.merge(shares_outstanding_df, on='symbol', how='left')

screener_df.head(5)

Total batches: 77
Progress: |██████████████████████████████████████████████████| 100.00% Complete

Unnamed: 0,symbol,marketcap,exchange,exchangeName,latestPrice,close,previousClose,extendedPrice,extendedChange,extendedChangePercent,...,iexVolume,latestTime,latestUpdate,isUSMarketOpen,closeSource,openSource,iexOpen,sharesOutstanding,avg10Volume,avg30Volume
0,A,39308350000.0,XNYS,New York Stock Exchange Inc,134.14,134.14,134.84,134.14,0.0,0.0,...,43483.0,"February 20, 2024",1708462801959,False,official,official,134.07,293039707,1249900,1311345
1,AA,4763415000.0,XNYS,New York Stock Exchange Inc,26.69,26.69,27.4,26.73,0.04,0.0015,...,153167.0,"February 20, 2024",1708462875282,False,official,official,26.77,178471908,5162225,5761875
2,AACG,42096720.0,XNAS,Nasdaq All Markets,1.3406,1.3406,1.46,1.36,0.0194,0.01447,...,0.0,"February 20, 2024",1708462800000,False,official,official,,31401405,42101,20623
3,AACI,88808900.0,XNAS,Nasdaq All Markets,11.02,,10.98,11.02,0.0,0.0,...,0.0,"February 20, 2024",1708462800000,False,official,official,,8058884,461,14409
4,AACIU,88486550.0,XNAS,Nasdaq All Markets,10.9,,10.89,10.9,0.0,0.0,...,0.0,"February 2, 2024",1706907600000,False,official,official,,15000000,0,6983


In [499]:
# categorize market cap
def categorize_market_cap(x):
    if x < 300e6:  #  < 300 million
        return 'Micro-Cap'
    elif 300e6 <= x < 2e9:  # 300 million to 2 billion
        return 'Small-Cap'
    elif 2e9 <= x < 10e9:  # 2 billion to 10 billion
        return 'Mid-Cap'
    elif 10e9 <= x < 200e9:  # 10 billion to 200 billion
        return 'Large-Cap'
    elif x >= 200e9:  # >= 200 billion
        return 'Mega-Cap'
    else:
        return 'Unknown'  # handles negative or NaN values

# apply function to 'marketcap' column AND create new 'marketcapType' column
screener_df['marketcapType'] = screener_df['marketcap'].apply(categorize_market_cap)

screener_df.head(10)

Unnamed: 0,symbol,marketcap,exchange,exchangeName,latestPrice,close,previousClose,extendedPrice,extendedChange,extendedChangePercent,...,latestTime,latestUpdate,isUSMarketOpen,closeSource,openSource,iexOpen,sharesOutstanding,avg10Volume,avg30Volume,marketcapType
0,A,39308350000.0,XNYS,New York Stock Exchange Inc,134.14,134.14,134.84,134.14,0.0,0.0,...,"February 20, 2024",1708462801959,False,official,official,134.07,293039707,1249900,1311345,Large-Cap
1,AA,4763415000.0,XNYS,New York Stock Exchange Inc,26.69,26.69,27.4,26.73,0.04,0.0015,...,"February 20, 2024",1708462875282,False,official,official,26.77,178471908,5162225,5761875,Mid-Cap
2,AACG,42096720.0,XNAS,Nasdaq All Markets,1.3406,1.3406,1.46,1.36,0.0194,0.01447,...,"February 20, 2024",1708462800000,False,official,official,,31401405,42101,20623,Micro-Cap
3,AACI,88808900.0,XNAS,Nasdaq All Markets,11.02,,10.98,11.02,0.0,0.0,...,"February 20, 2024",1708462800000,False,official,official,,8058884,461,14409,Micro-Cap
4,AACIU,88486550.0,XNAS,Nasdaq All Markets,10.9,,10.89,10.9,0.0,0.0,...,"February 2, 2024",1706907600000,False,official,official,,15000000,0,6983,Micro-Cap
5,AACT,527000000.0,XNYS,New York Stock Exchange Inc,10.54,10.54,10.52,10.54,0.0,0.0,...,"February 20, 2024",1708462800000,False,official,official,,50000000,140291,214072,Small-Cap
6,AACT=,526000000.0,XNYS,New York Stock Exchange Inc,10.629,,10.6,10.629,0.0,0.0,...,"February 15, 2024",1708030800000,False,official,official,,50000000,600,4678,Small-Cap
7,AADI,46844390.0,XNAS,Nasdaq All Markets,1.91,1.91,1.9,1.96,0.05,0.02618,...,"February 20, 2024",1708462800381,False,official,official,1.89,24525860,241834,227696,Micro-Cap
8,AADR,25708500.0,XNAS,Nasdaq All Markets,57.13,57.13,57.19,57.13,0.0,0.0,...,"February 20, 2024",1708462800000,False,official,official,,450000,1002,730,Micro-Cap
9,AAGR,46293460.0,XNAS,Nasdaq All Markets,0.8,0.8,0.829,0.8339,0.0339,0.04237,...,"February 20, 2024",1708462800146,False,official,official,0.801,57866830,87929,107901,Micro-Cap


In [533]:
# calculate iexChange and iexChangePercent - custom calculation
screener_df['u_iexChange'] = screener_df['iexClose'] - screener_df['close']
screener_df['u_iexChangePercent'] = (screener_df['iexChange'] / screener_df['close'])

In [537]:
# print columns for screener_df
print(screener_df.columns)

Index(['symbol', 'marketcap', 'exchange', 'exchangeName', 'latestPrice',
       'close', 'previousClose', 'extendedPrice', 'extendedChange',
       'extendedChangePercent', 'latestVolume', 'volume', 'previousVolume',
       'primaryExchange', 'avgTotalVolume', 'calculationPrice', 'change',
       'changePercent', 'companyName', 'iexClose', 'iexCloseTime',
       'iexRealtimePrice', 'iexLastUpdated', 'iexVolume', 'latestTime',
       'latestUpdate', 'isUSMarketOpen', 'closeSource', 'openSource',
       'iexOpen', 'sharesOutstanding', 'avg10Volume', 'avg30Volume',
       'marketcapType', 'u_iexChange', 'u_iexChangePercent'],
      dtype='object')


RESET FROM HERE

In [539]:
# create a columns filter
screener_cols = ['symbol', # KEY
                 # market cap
                'marketcap',
                'marketcapType', 
                # price(s)
                'close', 
                'latestPrice', 
                'extendedPrice',
                # relative change
                'calculationPrice',
                'change', #mandatory
                'changePercent', 
                # shares (approx. float)
                'sharesOutstanding', 
                # volume
                'latestVolume',
                'avgTotalVolume', # NEW ############################ 
                'avg10Volume',
                'avg30Volume',
                'iexVolume', # NEW ############################
                #info
                'isUSMarketOpen',
                'exchange',
                'exchangeName',
                'companyName',
                'closeSource', # NEW ############################
                'openSource', # NEW ############################
                # IEX real time prices - after hours without UTP authorization
                'iexClose',
                'iexCloseTime',
                'iexRealtimePrice',
                'u_iexChange',
                'u_iexChangePercent',
                'iexOpen'] # NEW ############################
                #### ADD -  CHANGE % (CALCULATION - custom) ####
            

screener_df_x = screener_df[screener_cols]

screener_df_x.head(25)

Unnamed: 0,symbol,marketcap,marketcapType,close,latestPrice,extendedPrice,calculationPrice,change,changePercent,sharesOutstanding,...,exchangeName,companyName,closeSource,openSource,iexClose,iexCloseTime,iexRealtimePrice,u_iexChange,u_iexChangePercent,iexOpen
0,A,39308350000.0,Large-Cap,134.14,134.14,134.14,close,-0.7,-0.00519,293039707,...,New York Stock Exchange Inc,Agilent Technologies Inc.,official,official,134.14,1708463000000.0,134.14,0.0,0.0,134.07
1,AA,4763415000.0,Mid-Cap,26.69,26.69,26.73,close,-0.71,-0.02591,178471908,...,New York Stock Exchange Inc,Alcoa Corp,official,official,26.7,1708463000000.0,26.7,0.01,0.000375,26.77
2,AACG,42096720.0,Micro-Cap,1.3406,1.3406,1.36,close,-0.1194,-0.08178,31401405,...,Nasdaq All Markets,ATA Creativity Global - ADR,official,official,1.46,1708117000000.0,0.0,0.1194,0.089065,
3,AACI,88808900.0,Micro-Cap,,11.02,11.02,close,0.04,0.00364,8058884,...,Nasdaq All Markets,Armada Acquisition Corp I,official,official,10.93,1707235000000.0,0.0,,,
4,AACIU,88486550.0,Micro-Cap,,10.9,10.9,close,0.01,0.00092,15000000,...,Nasdaq All Markets,Armada Acquisition Corp I - Units (1 Ord & 1/2...,official,official,10.9,1706906000000.0,0.0,,,
5,AACT,527000000.0,Small-Cap,10.54,10.54,10.54,close,0.02,0.0019,50000000,...,New York Stock Exchange Inc,Ares Acquisition Corporation II - Class A,official,official,10.525,1708108000000.0,0.0,-0.015,-0.001423,
6,AACT=,526000000.0,Small-Cap,,10.629,10.629,close,0.029,0.00274,50000000,...,New York Stock Exchange Inc,Ares Acquisition Corporation II - Units (1 Ord...,official,official,10.57,1706714000000.0,0.0,,,
7,AADI,46844390.0,Micro-Cap,1.91,1.91,1.96,close,0.01,0.00526,24525860,...,Nasdaq All Markets,Aadi Bioscience Inc,official,official,1.92,1708463000000.0,1.92,0.01,0.005236,1.89
8,AADR,25708500.0,Micro-Cap,57.13,57.13,57.13,close,-0.06,-0.00105,450000,...,Nasdaq All Markets,Advisorshares Trust - AdvisorShares Dorsey Wri...,official,official,56.57,1707926000000.0,0.0,-0.56,-0.009802,
9,AAGR,46293460.0,Micro-Cap,0.8,0.8,0.8339,close,-0.029,-0.03498,57866830,...,Nasdaq All Markets,African Agriculture Holdings Inc,official,official,0.776,1708463000000.0,0.776,-0.024,-0.03,0.801


In [540]:
# create five (5) dataframes for each marketcap type
microcap_df = screener_df_x[screener_df_x['marketcapType'] == 'Micro-Cap']
smallcap_df = screener_df_x[screener_df_x['marketcapType'] == 'Small-Cap']
midcap_df = screener_df_x[screener_df_x['marketcapType'] == 'Mid-Cap']
largecap_df = screener_df_x[screener_df_x['marketcapType'] == 'Large-Cap']
megacap_df = screener_df_x[screener_df_x['marketcapType'] == 'Mega-Cap']

In [541]:
largecap_df.head(5)

Unnamed: 0,symbol,marketcap,marketcapType,close,latestPrice,extendedPrice,calculationPrice,change,changePercent,sharesOutstanding,...,exchangeName,companyName,closeSource,openSource,iexClose,iexCloseTime,iexRealtimePrice,u_iexChange,u_iexChangePercent,iexOpen
0,A,39308350000.0,Large-Cap,134.14,134.14,134.14,close,-0.7,-0.00519,293039707,...,New York Stock Exchange Inc,Agilent Technologies Inc.,official,official,134.14,1708463000000.0,134.14,0.0,0.0,134.07
29,ABEV,40329810000.0,Large-Cap,2.56,2.56,2.58,close,0.01,0.00392,15753833284,...,New York Stock Exchange Inc,Ambev S.A. - ADR,official,official,2.57,1708463000000.0,2.57,0.01,0.003906,2.62
38,ABNB,95168180000.0,Large-Cap,148.15,148.15,146.5,close,-4.36,-0.02859,642377183,...,Nasdaq All Markets,Airbnb Inc - Class A,official,official,148.15,1708463000000.0,148.15,0.0,0.0,150.07
70,ACGL,31857690000.0,Large-Cap,85.37,85.37,85.13,close,-0.74,-0.00859,373171909,...,Nasdaq All Markets,Arch Capital Group Ltd,official,official,85.42,1708463000000.0,85.42,0.05,0.000586,85.885
71,ACGLN,32293150000.0,Large-Cap,20.27,20.27,20.18,close,0.1,0.00496,20000000,...,Nasdaq All Markets,Arch Capital Group Ltd - 4.55% PRF PERPETUAL U...,official,official,20.17,1708462000000.0,20.17,-0.1,-0.004933,20.34


In [543]:
# get columns
print(largecap_df.columns)

Index(['symbol', 'marketcap', 'marketcapType', 'close', 'latestPrice',
       'extendedPrice', 'calculationPrice', 'change', 'changePercent',
       'sharesOutstanding', 'latestVolume', 'avgTotalVolume', 'avg10Volume',
       'avg30Volume', 'iexVolume', 'isUSMarketOpen', 'exchange',
       'exchangeName', 'companyName', 'closeSource', 'openSource', 'iexClose',
       'iexCloseTime', 'iexRealtimePrice', 'u_iexChange', 'u_iexChangePercent',
       'iexOpen'],
      dtype='object')


In [505]:
def filter_stocks(df, 
                  market_cap_type=None, 
                  price_min=None, 
                  change_min_percent=None,  # Pre-market gap percentage
                  volume_min=None, 
                  volume_avg_comparison=None,  # '10day' or '30day'
                  shares_outstanding_min=None, 
                  shares_outstanding_max=None,
                  extended_price_min=None,  # For pre-market gap analysis
                  u_iexChange_min=None,  # Min change from iexClose to iexRealtimePrice
                  u_iexChangePercent_min=None):  # Min percentage change from iexClose to iexRealtimePrice
    # Market Cap Type
    if market_cap_type is not None:
        df = df[df['marketcapType'] == market_cap_type]
    
    # PRICE
    if price_min is not None:
        df = df[df['latestPrice'] >= price_min]
    
    # CHANGE (Pre-market)
    if change_min_percent is not None:
        df = df[df['changePercent'] >= change_min_percent]
    
    # VOLUME
    if volume_min is not None:
        df = df[df['latestVolume'] >= volume_min]
    
    # Dynamic Volume Comparison
    if volume_avg_comparison is not None:
        if volume_avg_comparison == '10day':
            df = df[df['latestVolume'] >= df['avg10Volume']]
        elif volume_avg_comparison == '30day':
            df = df[df['latestVolume'] >= df['avg30Volume']]
    
    # SHARES OUTSTANDING
    if shares_outstanding_min is not None:
        df = df[df['sharesOutstanding'] >= shares_outstanding_min]
    if shares_outstanding_max is not None:
        df = df[df['sharesOutstanding'] <= shares_outstanding_max]
    
    # EXTENDED PRICE (For pre-market gap analysis)
    if extended_price_min is not None:
        df = df[df['extendedPrice'] >= extended_price_min]
    
    # Custom iexChange Filters
    if u_iexChange_min is not None:
        df = df[df['u_iexChange'] >= u_iexChange_min]
        
    if u_iexChangePercent_min is not None:
        df = df[df['u_iexChangePercent'] >= u_iexChangePercent_min]
    
    return df


In [506]:
#### MICRO-CAP FILTER ####
microcap_df = filter_stocks(microcap_df, 
                            market_cap_type = 'Micro-Cap', 
                            price_min=0.50, 
                            change_min_percent=.02, 
                            volume_avg_comparison='10day')

# order descending by changePercent
microcap_df = microcap_df.sort_values('changePercent', ascending=False)
# print the length of the dataframe
print(f"Length: {len(microcap_df)}")
#re-index the dataframe
microcap_df = microcap_df.reset_index(drop=True)
microcap_df.head(10)

Length: 228


Unnamed: 0,symbol,marketcap,marketcapType,close,latestPrice,extendedPrice,calculationPrice,change,changePercent,sharesOutstanding,...,isUSMarketOpen,exchange,exchangeName,companyName,closeSource,openSource,iexClose,iexCloseTime,iexRealtimePrice,iexOpen
0,CPOP,131482976.0,Micro-Cap,7.19,7.19,5.72,close,4.54,1.71321,18286923,...,False,XNAS,Nasdaq All Markets,Pop Culture Group Co Ltd - Class A,official,official,7.07,1708463000000.0,7.07,2.885
1,MEDS,10953523.0,Micro-Cap,9.09,9.09,8.91,close,4.5,0.98039,1205008,...,False,XNAS,Nasdaq All Markets,Trxade Health Inc,official,official,9.07,1708463000000.0,9.07,11.145
2,MNPR,9945622.0,Micro-Cap,0.669,0.669,0.5944,close,0.3264,0.95271,14866400,...,False,XNAS,Nasdaq All Markets,Monopar Therapeutics Inc,official,official,0.6371,1708463000000.0,0.6371,1.44
3,AVGR,7465499.0,Micro-Cap,5.4488,5.4488,4.57,close,2.4738,0.83153,1370118,...,False,XNAS,Nasdaq All Markets,Avinger Inc,official,official,5.39,1708463000000.0,5.39,2.81
4,VINC,66065961.0,Micro-Cap,3.09,3.09,2.99,close,1.28,0.70718,21380570,...,False,XNAS,Nasdaq All Markets,Vincerx Pharma Inc,official,official,3.02,1708463000000.0,3.02,1.82
5,SISI,147496746.0,Micro-Cap,2.3,2.3,2.17,close,0.825,0.55932,64129020,...,False,XNAS,Nasdaq All Markets,Shineco Inc,official,official,2.2,1708463000000.0,2.2,2.3
6,BFRI,1745272.0,Micro-Cap,1.15,1.15,1.02,close,0.4005,0.53436,1517628,...,False,XNAS,Nasdaq All Markets,Biofrontera Inc,official,official,1.13,1708463000000.0,1.13,1.27
7,LUNR,282835991.0,Micro-Cap,10.99,10.99,12.09,close,3.67,0.50137,25735759,...,False,XNAS,Nasdaq All Markets,Intuitive Machines Inc - Class A,official,official,10.97,1708463000000.0,12.62,9.44
8,MNY,73354528.0,Micro-Cap,3.06,3.06,3.1,close,0.96,0.45714,23972068,...,False,XNAS,Nasdaq All Markets,MoneyHero Ltd - Class A,official,official,3.03,1708463000000.0,3.03,3.18
9,XTKG,4646650.0,Micro-Cap,0.995,0.995,0.955,close,0.2235,0.2897,4670000,...,False,XNAS,Nasdaq All Markets,X3 Holdings Co Ltd.,official,official,0.9461,1708461000000.0,0.9461,0.781


In [507]:
#### SMALL-CAP FILTER ####

In [508]:
#### MID-CAP FILTER ####

In [509]:
#### LARGE-CAP FILTER ####

In [510]:
#### MEGA-CAP FILTER ####

In [511]:
# ADD - relative volume (ratio) data???
# some of this you might have already covered, but might be able to get more granular with the data

In [512]:
# create daily watchlist dataframe
# combine the filtered dataframes

In [513]:
# news

In [514]:
# news international

In [515]:
# sentiment - custom

In [516]:
# RBV - robust value score - custom

In [517]:
# key levels from historical data
# other data

In [518]:
# exogenous data

In [519]:
# do work with BENZINGA BZ squawk box - see if there is an API

In [520]:
# RSI, MACD, etc. - custom

In [521]:
# ichimoku cloud - custom

END - END - END - IN PROGRESS - END - END - END END - END - END - IN PROGRESS - END - END - END END - END - END - IN PROGRESS - END - END - END


In [522]:
# YAHOO data - testing

# data feed testing
import yfinance as yf
import pandas as pd

# List of stock symbols
symbols = ['PLTR', 'TSLA','NVDA']

# Fetch data
data = []
for symbol in symbols:
    stock = yf.Ticker(symbol)
    hist = stock.history(period="1d")  # Get the last day's data
    info = stock.info  # Get general stock info
    
    # Extracting the required information
    data.append({
        'Symbol': symbol,
        'Close Price': hist['Close'].iloc[-1] if not hist.empty else None,
        'Volume': hist['Volume'].iloc[-1] if not hist.empty else None,
        'Average Volume': info.get('averageVolume'),
        # get after hours current price
        'After Hours Price': info.get('postMarketPrice'),
        'Change in Price': info.get('regularMarketChange'),
        'Change in Percentage': info.get('regularMarketChangePercent'),
    })

# Convert to DataFrame
df = pd.DataFrame(data)

print(df)


  Symbol  Close Price     Volume  Average Volume After Hours Price  \
0   PLTR    23.400000   93067100        69071645              None   
1   TSLA   193.759995  104332300       111586877              None   
2   NVDA   694.520020   70171600        44411586              None   

  Change in Price Change in Percentage  
0            None                 None  
1            None                 None  
2            None                 None  


In [523]:
"""
IMPORTANT: Need UTP autorization to continue
https://iexcloud.io/documentation/using-core-data/getting-nasdaq-listed-utp-otc-stock-data.html
Step 1: https://www.utpplan.com/DOC/VendorAgreement.pdf
Step 2: https://www.utpplan.com/datafeed_approval
Step 3: https://www.utpplan.com/system_application

"""

# create a dataframe from utp_auth_columns.xlsx
utp_auth_df = pd.read_excel('utp_auth_columns.xlsx')
# replace NA with " " 
utp_auth_df = utp_auth_df.fillna(" ")

utp_auth_df

Unnamed: 0,utp_auth_required,humbled_trader_TARGETs,iex_real-time_alternatives,u_screener_df_columns_ACTIVE
0,close,Symbol,,
1,closeTime,Price ($),,
2,delayedPrice,Float (shares),,
3,delayedPriceTime,Change Close (%),,
4,extendedPrice,Volume Today (shares),,
5,extendedPriceTime,Avereage Volume (5day) (shares/day),,
6,extendedChange,Market Cap ($),,
7,extendedChangePercent,Held By Institutions (%),,
8,high,Sector,,
9,low,Company Name,,


In [524]:
# # DEV - marketcap groupby count
# marketcap_counts = screener_df['marketcapType'].value_counts()

# # convert SERIES to DF
# marketcap_counts_df = marketcap_counts.reset_index()
# marketcap_counts_df.columns = ['marketcapType', 'count']

# # add percentage column
# total_count = marketcap_counts.sum()
# marketcap_counts_df['percentage'] = (marketcap_counts_df['count'] / total_count) * 100

# # print
# marketcap_counts_df

In [525]:
# # DEV - checker
# total_count_CHECK = marketcap_counts_df['count'].sum()

# # if statement comparing the two values, return 'Data is correct' if they match, otherwise return 'Data is incorrect'
# if total_count_CHECK == len(screener_df):
#     print('Data lengths match! SUCCESS')
# else:    
#     print('Data mismatch! ERROR')
#     print()
#     print(f'Code checker: {total_count_CHECK}')
#     print(f'Data length: {len(screener_df)}')

In [526]:
# def print_available_stats_parameters(symbol, iex_key):
#     url = f'https://cloud.iexapis.com/stable/stock/{symbol}/stats?token={iex_key}'
#     response = requests.get(url)
#     if response.status_code == 200:
#         data = response.json()
#         print(json.dumps(data, indent=4, sort_keys=True))
#     else:
#         print(f"Failed to fetch stats for {symbol}. Status code: {response.status_code}")

# pltr = 'PLTR'
# print_available_stats_parameters(pltr, iex_key)

In [527]:
# def fetch_deep_trades_data(symbols_batch, iex_key):
#     symbols_str = ','.join(symbols_batch)
#     url = f'https://cloud.iexapis.com/stable/deep/trades?symbols={symbols_str}&token={iex_key}'
#     response = requests.get(url)
#     if response.status_code == 200:
#         return response.json()
#     else:
#         return {}

# # Assuming screener_df is a DataFrame containing the symbols
# batch_size = 100
# # Example for illustrative purposes
# # screener_df = pd.DataFrame({'symbol': ['AAPL', 'MSFT', 'GOOGL']})
# total_batches = math.ceil(len(screener_df) / batch_size)
# print(f"Total batches: {total_batches}")

# deep_data_list = []

# for i in range(0, len(screener_df), batch_size):
#     batch_symbols = screener_df['symbol'].iloc[i:i+batch_size].tolist()
#     batch_data = fetch_deep_trades_data(batch_symbols, iex_key)
    
#     # Process and infer volume data for each symbol in the batch
#     for symbol in batch_symbols:
#         if symbol in batch_data and batch_data[symbol]:
#             volume = sum(trade['size'] for trade in batch_data[symbol])
#             deep_data_list.append({
#                 'symbol': symbol,
#                 'volume': volume,
#             })

# # Convert the combined data into a DataFrame
# iex_deep_df = pd.DataFrame(deep_data_list)

# print(iex_deep_df.head(20)) # data check
# print(len(iex_deep_df.index)) # data check

In [528]:
# print(f"screener_df column count: {len(screener_df.columns)}") # col count
# screener_df = screener_df.merge(shares_outstanding_df, on='symbol', how='left')

In [529]:
# # My Watchlist
# watchlist = ['PLTR', 'TSLA', 'NOW', 'SNOW','FB', 'NVDA', 'PYPL', 'ADBE', 'NFLX']
# watchlist_df = screener_df[screener_df['symbol'].isin(watchlist)]

# watchlist_df

In [530]:
# # DEV - API endpoing parameter availability. 
# schema_base_test_url = 'https://cloud.iexapis.com/stable'
# test_symbol = 'PLTR' # test ticker symbol
# schema_test_url = f"{schema_base_test_url}/stock/{test_symbol}/quote?schema=true&token={iex_key}"
# # GET request
# schema_test_response = requests.get(schema_test_url)

# print(json.dumps(schema_test_response.json(), indent=4))


IDEAS
1. Robust value score (RBV)
2. create data frames for each market cap
3. create a function that does math and querying based on rules / conditions
4. apply function to the market cap data sets
5. Look at other STATS now that the API is connected for the sharesOutstanding

NOTES
1. Float
1. Free Flt Mkt Cap
1. Free Flt
1. Shares outstanding

MANDATORY FIELDS
1. 