In [159]:
# libs
import os
import pandas as pd
import requests
from dotenv import load_dotenv #pip3 install python-dotenv
# load secrets
load_dotenv()
import math
import json
from IPython.display import JSON


In [160]:
# iex cloud - api data request
iex_key = os.getenv("IEX_API_KEY")
url = 'https://cloud.iexapis.com/stable/ref-data/symbols?token=' + iex_key
response = requests.get(url)
data = response.json()
symbols_df = pd.DataFrame(data)
# print 
symbols_df.head()

Unnamed: 0,symbol,exchange,exchangeSuffix,exchangeName,exchangeSegment,exchangeSegmentName,name,date,type,iexId,region,currency,isEnabled,figi,cik,lei
0,A,XNYS,,New York Stock Exchange Inc,XNYS,New York Stock Exchange Inc,Agilent Technologies Inc.,2024-02-16,cs,IEX_46574843354B2D52,US,USD,True,BBG000C2V3D6,1090872,QUIX8Y7A2WP0XRMW7G29
1,AA,XNYS,,New York Stock Exchange Inc,XNYS,New York Stock Exchange Inc,Alcoa Corp,2024-02-16,cs,IEX_4238333734532D52,US,USD,True,BBG00B3T3HD3,1675149,549300T12EZ1F6PWWU29
2,AAA,ARCX,,Nyse Arca,ARCX,Nyse Arca,Investment Managers Series Trust II - AXS Firs...,2024-02-16,et,IEX_5030314338392D52,US,USD,True,BBG01B0JRCS6,1587982,549300SU7ER9OFETRU41
3,AAAU,BATS,,Cboe Bzx U S Equities Exchange,BATS,Cboe Bzx U S Equities Exchange,Goldman Sachs Physical Gold ETF Trust - Goldma...,2024-02-16,et,IEX_474B433136332D52,US,USD,True,BBG00LPXX872,1708646,
4,AACG,XNAS,,Nasdaq All Markets,XNMS,Nasdaq Nms Global Market,ATA Creativity Global - ADR,2024-02-16,ad,IEX_44595A4C53392D52,US,USD,True,BBG000V2S3P6,1420529,


In [161]:
# NYSE and NASDAQ symbols only (filter)
filtered_df = symbols_df[symbols_df['exchange'].isin(['XNYS', 'XNAS'])]

# select cols
filtered_df = filtered_df[['symbol', 'exchange', 'exchangeName']]

# filter df
print(filtered_df.head())
data_size = len(filtered_df.index)
print()
print(f"Data size: {data_size}") # get length of dataframe

  symbol exchange                 exchangeName
0      A     XNYS  New York Stock Exchange Inc
1     AA     XNYS  New York Stock Exchange Inc
4   AACG     XNAS           Nasdaq All Markets
5   AACI     XNAS           Nasdaq All Markets
6  AACIU     XNAS           Nasdaq All Markets

Data size: 7799


In [162]:
batch_size = 100
# Assuming filtered_df is a DataFrame that contains a 'symbol' column.
# Calculate total batches
total_batches = math.ceil(len(filtered_df) / batch_size)
print(f"Total batches: {total_batches}")

combined_data = []

# function -> fetch market cap data in batches
def fetch_market_cap(symbols_batch, iex_key):
    symbols_str = ','.join(symbols_batch)
    url = f'https://cloud.iexapis.com/stable/stock/market/batch?symbols={symbols_str}&types=quote&token={iex_key}'
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        return {}
    
# Initialize a counter for completed batches
completed_batches = 0

# Define the length of the progress bar
progress_bar_length = 50

# batch processing - IMPORTANT (API efficiency)
for i in range(0, len(filtered_df), batch_size):
    batch_symbols = filtered_df['symbol'].iloc[i:i+batch_size].tolist()
    batch_data = fetch_market_cap(batch_symbols, iex_key)  # Make sure iex_key is defined and valid
    
    # process and append data for each symbol in the batch
    for symbol in batch_symbols:
        market_cap = batch_data.get(symbol, {}).get('quote', {}).get('marketCap', None)
        combined_data.append({'symbol': symbol, 'marketcap': market_cap})
    
    # Increment the completed batches counter
    completed_batches += 1

    # Calculate the progress
    progress = (completed_batches / total_batches)
    filled_length = int(round(progress_bar_length * progress))
    
    # Create the progress bar
    bar = '█' * filled_length + '-' * (progress_bar_length - filled_length)
    
    # Print the progress bar with the percentage
    print(f"\rProgress: |{bar}| {progress*100:.2f}% Complete", end="\r")

# Ensure the next print happens on the next line
print()

# convert combined data into a DataFrame
screener_df = pd.DataFrame(combined_data)

# join with the filtered_df - this adds exchange and exchangeName data
screener_df = screener_df.merge(filtered_df[['symbol', 'exchange', 'exchangeName']], on='symbol', how='left')

Total batches: 78
Progress: |██████████████████████████████████████████████████| 100.00% Complete


In [163]:
print(screener_df.head())
print()
print(len(screener_df.index))

  symbol     marketcap exchange                 exchangeName
0      A  3.951347e+10     XNYS  New York Stock Exchange Inc
1     AA  4.890130e+09     XNYS  New York Stock Exchange Inc
2   AACG  4.584605e+07     XNAS           Nasdaq All Markets
3   AACI  2.594957e+07     XNAS           Nasdaq All Markets
4  AACIU  2.587867e+07     XNAS           Nasdaq All Markets

7799


In [164]:
# drop NAs
screener_df.dropna(inplace=True)

In [165]:
# determins the count of droped NAs
print(f'New length: {len(screener_df)}')
print()
print(screener_df.head())

New length: 7692

  symbol     marketcap exchange                 exchangeName
0      A  3.951347e+10     XNYS  New York Stock Exchange Inc
1     AA  4.890130e+09     XNYS  New York Stock Exchange Inc
2   AACG  4.584605e+07     XNAS           Nasdaq All Markets
3   AACI  2.594957e+07     XNAS           Nasdaq All Markets
4  AACIU  2.587867e+07     XNAS           Nasdaq All Markets


In [166]:
# categorize market cap
def categorize_market_cap(x):
    if x < 300e6:  #  < 300 million
        return 'Micro-Cap'
    elif 300e6 <= x < 2e9:  # 300 million to 2 billion
        return 'Small-Cap'
    elif 2e9 <= x < 10e9:  # 2 billion to 10 billion
        return 'Mid-Cap'
    elif 10e9 <= x < 200e9:  # 10 billion to 200 billion
        return 'Large-Cap'
    elif x >= 200e9:  # >= 200 billion
        return 'Mega-Cap'
    else:
        return 'Unknown'  # handles negative or NaN values

# apply function to 'marketcap' column AND create new 'marketcapType' column
screener_df['marketcapType'] = screener_df['marketcap'].apply(categorize_market_cap)
print(screener_df.head(10))

   symbol     marketcap exchange                 exchangeName marketcapType
0       A  3.951347e+10     XNYS  New York Stock Exchange Inc     Large-Cap
1      AA  4.890130e+09     XNYS  New York Stock Exchange Inc       Mid-Cap
2    AACG  4.584605e+07     XNAS           Nasdaq All Markets     Micro-Cap
3    AACI  2.594957e+07     XNAS           Nasdaq All Markets     Micro-Cap
4   AACIU  2.587867e+07     XNAS           Nasdaq All Markets     Micro-Cap
5    AACT  5.262500e+08     XNYS  New York Stock Exchange Inc     Small-Cap
7   AACT=  5.262500e+08     XNYS  New York Stock Exchange Inc     Small-Cap
8    AADI  4.659913e+07     XNAS           Nasdaq All Markets     Micro-Cap
9    AADR  2.580885e+07     XNAS           Nasdaq All Markets     Micro-Cap
10   AAGR  4.797160e+07     XNAS           Nasdaq All Markets     Micro-Cap


In [167]:
# # DEV - marketcap groupby count
# marketcap_counts = screener_df['marketcapType'].value_counts()

# # convert SERIES to DF
# marketcap_counts_df = marketcap_counts.reset_index()
# marketcap_counts_df.columns = ['marketcapType', 'count']

# # add percentage column
# total_count = marketcap_counts.sum()
# marketcap_counts_df['percentage'] = (marketcap_counts_df['count'] / total_count) * 100

# # print
# marketcap_counts_df

In [168]:
# # DEV - checker
# total_count_CHECK = marketcap_counts_df['count'].sum()

# # if statement comparing the two values, return 'Data is correct' if they match, otherwise return 'Data is incorrect'
# if total_count_CHECK == len(screener_df):
#     print('Data lengths match! SUCCESS')
# else:    
#     print('Data mismatch! ERROR')
#     print()
#     print(f'Code checker: {total_count_CHECK}')
#     print(f'Data length: {len(screener_df)}')

In [169]:
# screener_df.head(2)

In [170]:
batch_size = 100
total_batches = math.ceil(len(screener_df) / batch_size)
print(f"Total batches: {total_batches}")

quote_data_list = []

def fetch_quote_data(symbols_batch, iex_key):
    symbols_str = ','.join(symbols_batch)
    url = f'https://cloud.iexapis.com/stable/stock/market/batch?symbols={symbols_str}&types=quote&token={iex_key}'
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        return {}
    
# initialize a counter for completed batches
completed_batches = 0

# define the length of the progress bar
progress_bar_length = 50

"""scalable endpoint variable data request"""
for i in range(0, len(screener_df), batch_size):
    batch_symbols = screener_df['symbol'].iloc[i:i+batch_size].tolist()
    batch_data = fetch_quote_data(batch_symbols, iex_key)
    
    # process and append data for each symbol in the batch
    for symbol in batch_symbols:
        quote_data = batch_data.get(symbol, {}).get('quote', {})
        quote_data_list.append({
            'symbol': symbol, # KEY
            'latestPrice': quote_data.get('latestPrice', None),
            'previousClose': quote_data.get('previousClose', None), # PREV DAY CLOSE
            'extendedPrice': quote_data.get('extendedPrice', None), # EXTENDED HOURS PRICE
            'extendedChangePercent': quote_data.get('extendedChangePercent', None), # EXTENDED HOURS CHANGE
            'extendedPriceTime': quote_data.get('extendedPriceTime', None), # EXTENDED HOURS PRICE TIME
            'latestVolume': quote_data.get('latestVolume', None), # LATEST VOLUME
            'companyName': quote_data.get('companyName', None) # COMPANY NAME
        })
    
    # increment completed batches counter
    completed_batches += 1

    # calculate progress
    progress = (completed_batches / total_batches)
    filled_length = int(round(progress_bar_length * progress))
    
    # create progress bar
    bar = '█' * filled_length + '-' * (progress_bar_length - filled_length)
    
    # print progress bar with percentage
    print(f"\rProgress: |{bar}| {progress*100:.2f}% Complete", end="\r")

# Convert the combined data into a DataFrame
quote_df = pd.DataFrame(quote_data_list)

# Merge with the screener_df
screener_df = screener_df.merge(quote_df[['symbol',
                                        'latestPrice',
                                        'previousClose', #MOVED UP
                                        'extendedPrice', #NEW
                                        'extendedChangePercent', #NEW #IMPORTANT
                                        'extendedPriceTime', #NEW  
                                        'latestVolume', #NEW
                                        'companyName']], on='symbol', how='left')


Total batches: 77
Progress: |██████████████████████████████████████████████████| 100.00% Complete

In [171]:
# def print_available_stats_parameters(symbol, iex_key):
#     url = f'https://cloud.iexapis.com/stable/stock/{symbol}/stats?token={iex_key}'
#     response = requests.get(url)
#     if response.status_code == 200:
#         data = response.json()
#         print(json.dumps(data, indent=4, sort_keys=True))
#     else:
#         print(f"Failed to fetch stats for {symbol}. Status code: {response.status_code}")

# pltr = 'PLTR'
# print_available_stats_parameters(pltr, iex_key)

In [172]:
def fetch_shares_outstanding_data(symbols_batch, iex_key):
    symbols_str = ','.join(symbols_batch)
    url = f'https://cloud.iexapis.com/stable/stock/market/batch?symbols={symbols_str}&types=stats&token={iex_key}'
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        return {}

# Assuming screener_df is a DataFrame containing the symbols
batch_size = 100
total_batches = math.ceil(len(screener_df) / batch_size)
print(f"Total batches: {total_batches}")

shares_outstanding_data_list = []

# initialize a counter for completed batches
completed_batches = 0
# define the length of the progress bar
progress_bar_length = 50

# Replace 'screener_df' with the actual DataFrame containing the symbols
for i in range(0, len(screener_df), batch_size):
    batch_symbols = screener_df['symbol'].iloc[i:i+batch_size].tolist()
    batch_data = fetch_shares_outstanding_data(batch_symbols, iex_key)
    
    # Process and append data for each symbol in the batch
    for symbol in batch_symbols:
        stats_data = batch_data.get(symbol, {}).get('stats', {})
        shares_outstanding_data_list.append({
            'symbol': symbol,
            'sharesOutstanding': stats_data.get('sharesOutstanding', None),
            'avg10Volume': stats_data.get('avg10Volume', None),
            'avg30Volume': stats_data.get('avg30Volume', None),
        })

    # increment completed batches counter
    completed_batches += 1

    # calculate progress
    progress = (completed_batches / total_batches)
    filled_length = int(round(progress_bar_length * progress))
    
    # create progress bar
    bar = '█' * filled_length + '-' * (progress_bar_length - filled_length)
    
    # print progress bar with percentage
    print(f"\rProgress: |{bar}| {progress*100:.2f}% Complete", end="\r")

# Convert the combined data into a DataFrame
shares_outstanding_df = pd.DataFrame(shares_outstanding_data_list)

Total batches: 77
Progress: |██████████████████████████████████████████████████| 100.00% Complete

In [173]:
# shares outstanding dataframe - 1 
shares_outstanding_df.head(10)

Unnamed: 0,symbol,sharesOutstanding,avg10Volume,avg30Volume
0,A,293039707,1314703,1337534
1,AA,178471908,5108389,6162207
2,AACG,31401405,38869,18008
3,AACI,2363349,8995,15402
4,AACIU,15000000,0,6983
5,AACT,50000000,311051,209019
6,AACT=,50000000,600,4678
7,AADI,24525860,228299,247570
8,AADR,450000,975,739
9,AAGR,57866830,82754,103377


In [174]:
def fetch_deep_trades_data(symbols_batch, iex_key):
    symbols_str = ','.join(symbols_batch)
    url = f'https://cloud.iexapis.com/stable/deep/trades?symbols={symbols_str}&token={iex_key}'
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        return {}

# Assuming screener_df is a DataFrame containing the symbols
batch_size = 100
# Example for illustrative purposes
# screener_df = pd.DataFrame({'symbol': ['AAPL', 'MSFT', 'GOOGL']})
total_batches = math.ceil(len(screener_df) / batch_size)
print(f"Total batches: {total_batches}")

deep_data_list = []

for i in range(0, len(screener_df), batch_size):
    batch_symbols = screener_df['symbol'].iloc[i:i+batch_size].tolist()
    batch_data = fetch_deep_trades_data(batch_symbols, iex_key)
    
    # Process and infer volume data for each symbol in the batch
    for symbol in batch_symbols:
        if symbol in batch_data and batch_data[symbol]:
            volume = sum(trade['size'] for trade in batch_data[symbol])
            deep_data_list.append({
                'symbol': symbol,
                'volume': volume,
            })

# Convert the combined data into a DataFrame
iex_deep_df = pd.DataFrame(deep_data_list)

Total batches: 77


In [186]:
print(iex_deep_df.head(20)) # data check
print(len(iex_deep_df.index)) # data check

   symbol  volume
0       A   32764
1      AA  126927
2    AACG     248
3    AACT      50
4    AADI    3396
5    AAGR    2786
6     ACT    5373
7    ACTG     925
8     ACV     547
9    ACVA   31754
10   ACWI   28321
11   ACWX   20745
12   ACXP     400
13   ADAG    1564
14   ADAP    8380
15   ADBE     912
16  AGM-D      23
17   AGMH     300
18   AGNC  182256
19  AGNCL     196
454


In [187]:
print(f"screener_df column count: {len(screener_df.columns)}") # col count
screener_df = screener_df.merge(shares_outstanding_df, on='symbol', how='left')

screener_df column count: 16


In [None]:
# merge shares_outstanding_df AND iex_deep_df with screener_df
screener_df = screener_df.merge(iex_deep_df, on='symbol', how='left')
print(f"screener_df column count: {len(screener_df.columns)}") # col count

screener_df.head(24) # data check

In [None]:
# My Watchlist
watchlist = ['PLTR', 'TSLA', 'NOW', 'SNOW','FB', 'NVDA', 'PYPL', 'ADBE', 'NFLX']
watchlist_df = screener_df[screener_df['symbol'].isin(watchlist)]

watchlist_df

In [179]:
# screener_df.drop(columns=['avg10Volume'], inplace=True)

In [180]:
# screener_df.head(1)

In [181]:
# # filtered dataframe for selected stocks (TSLA, PLTR, SNOW)
# selected_stocks = screener_df[(screener_df['symbol'] == 'TSLA') | (screener_df['symbol'] == 'PLTR') | (screener_df['symbol'] == 'SNOW')]
# # fix index
# selected_stocks.reset_index(drop=True, inplace=True)

In [182]:
# # look at filterd dataframe
# selected_stocks

In [183]:
# # drop columns other than 'sybol', 'marketcap', 'exchange', 'exchangeName', 'marketcapType'
# screener_df = screener_df[['symbol', 'marketcap', 'exchange', 'exchangeName', 'marketcapType',]]

# screener_df.head(10)

In [184]:
# # DEV - API endpoing parameter availability. 
# schema_base_test_url = 'https://cloud.iexapis.com/stable'
# test_symbol = 'PLTR' # test ticker symbol
# schema_test_url = f"{schema_base_test_url}/stock/{test_symbol}/quote?schema=true&token={iex_key}"
# # GET request
# schema_test_response = requests.get(schema_test_url)

# print(json.dumps(schema_test_response.json(), indent=4))


In [185]:
# # shares outstanding
# symbol = 'PLTR'
# so_test_url = f'https://cloud.iexapis.com/stable/stock/{symbol}/stats/sharesOutstanding?token={iex_key}'

# response = requests.get(so_test_url)
# if response.status_code == 200:
#     shares_outstanding = response.json()
#     print(f'Average Shares Outstanding for {symbol}: {shares_outstanding}')
# else:
#     print(f'Failed to fetch data for {symbol}. Status code: {response.status_code}')


IDEAS
1. Robust value score (RBV)
2. create data frames for each market cap
3. create a function that does math and querying based on rules / conditions
4. apply function to the market cap data sets
5. Look at other STATS now that the API is connected for the sharesOutstanding

NOTES
1. Float
1. Free Flt Mkt Cap
1. Free Flt
1. Shares outstanding

MANDATORY FIELDS
1. 