In [None]:
'''
The code below will generate a complete list of WSB accepted tickers when run on a home PC, things like Google Colab are 
blocked by NYSE or whoever is being queried though so it won't work 'in the cloud'.

The output of the 'filtered' variable should be used for the uppercase non-$-prefixed list. The 'complete' variable 
should be used for the $-prefixed list since it won't match common words.
'''

In [None]:
%%capture
# Install the dictionary lookup library
!pip3 install pyenchant
import enchant
dic = enchant.Dict("en_US")

In [None]:
%%capture
# Install the ticker lookup library
!pip3 install get-all-tickers
from get_all_tickers import get_tickers as gt

In [None]:
# List of accepted acronyms/tickers
common = ['wsb', 'salt', 'irl', 'gf', 'pdfs', 'sand', 'prpl', 'ppt', 'eod', 'cuz', 'star', 'amc', 'spce', 'alot', 'kodk', 'robo', 'meds', 'ay', 'csv', 'bj', 'gpro', 'stim', 'cspr', 'jwn', 'ogs', 'gen', 'hyln', 'eod']
common = [x.upper() for x in common]

In [None]:
# mktcap_max is in millions, can change to min for other stuff
list_of_tickers = gt.get_tickers_filtered(mktcap_max=1000)

In [None]:
# Run if you want to see it
print(list_of_tickers)

In [None]:
# Put the current automod list here to see the intersection between AutoMod and newly generated list
d = []

# Note, everything is uppercase
# Get rid of these $ signs for comparison
de = [x.strip("$") for x in d]

print("%s tickers in our current list" % (len(de)))
print("%s tickers in our generated list" % (len(list_of_tickers)))
print("%s tickers in common" % (len(set(list_of_tickers).intersection(set(de)))))

In [None]:
# Run all of our tickers through the dictionary and make a list of tickers that are words
fakes = [x for x in list_of_tickers if dic.check(x)]

In [None]:
# A list of spacs we can intersect as well
spacs = ['PSTH', 'CCIV', 'BFT', 'FEAC', 'PRPB', 'WPF', 'SBE', 'IPOB', 'JWS', 'IPOC', 'DGNR', 'CRHC', 'GSAH', 'FTOC', 'RTP', 'RBAC', 'LCA', 'AACQ', 'GMHI', 'CIIC', 'CCX', 'VGAC', 'LGVW', 'STPK', 'AGC', 'ETAC', 'TWCT', 'GOAC', 'SEAH', 'HZAC', 'CFII', 'NGA', 'GRSV', 'TREB', 'TRNE', 'LEAP', 'FMAC', 'APXT', 'LCY', 'BOWX', 'VSPR', 'CMLF', 'GHIV', 'PCPL', 'THCB', 'RMG', 'QELL', 'EQD', 'ACND', 'ENPC', 'SVAC', 'HYAC', 'HEC', 'SFTW', 'HCAC', 'DMYT', 'IPV', 'DMYD', 'PIC', 'SNPR', 'PDAC', 'IGAC', 'RPLA', 'SMMC', 'JIH', 'THBR', 'FAII', 'BTAQ', 'FUSE', 'FCAC', 'YAC', 'ERES', 'DEH', 'TWND', 'PIAI', 'ACAM', 'GIK', 'SOAC', 'KSMT', 'VACQ', 'CHPM', 'GXGX', 'BSN', 'INAQ', 'FIII', 'HOL', 'CLII', 'PSAC', 'CCAC', 'ALUS', 'LOAK', 'ACTC', 'SRAC', 'AONE', 'EXPC', 'IACA', 'CPSR', 'SSPK', 'FSRV', 'CRSA', 'LATN', 'OACB', 'STWO', 'HPX', 'FTIV', 'ASAQ', 'MNCL', 'HSAQ', 'DFPH', 'ASPL', 'LCAP', 'SCVX', 'NPA', 'BCTG', 'DGNS', 'ACEV', 'THCA', 'DBDR', 'GIX', 'GIX^', 'NSH', 'SBG', 'PMVC', 'OAC', 'GNRS', 'VIH', 'TMTS', 'RCHG', 'FST', 'CLA', 'PTAC', 'RACA', 'CGRO', 'ESSC', 'GLEO', 'SCPE', 'DFNS', 'BLSA', 'NHIC', 'NOVS', 'SAII', 'MCMJ', 'DFHT', 'ARYA', 'GSMG', 'HCCH', 'PANA', 'NMMC', 'TDAC', 'PTK', 'MLAC', 'CAPA', 'SAMA', 'LSAC', 'LFAC', 'ROCH', 'HLXA', 'FSDC', 'AMCI', 'OPES', 'NBAC', 'VMAC', 'CHAQ', 'AHAC', 'JYAC', 'AMHC', 'LSAQ', 'PAIC', 'FVAM', 'LIVK', 'MCAC', 'ITAC', 'GRNV', 'ZGYH', 'LACQ', 'AGBA', 'BRLI', 'TZAC', 'GRCY', 'CCNC', 'ALAC', 'ANDA', 'TOTA', 'ID', 'MFAC', 'BRPA', 'LOAC', 'APSG', 'AVAN', 'DSAC', 'EMPW', 'FGNA', 'IMPX', 'IPOD', 'IPOF', 'MAAC', 'MACU', 'NBA', 'PACE', 'TPGY', 'VYGG']

In [None]:
# Filtered = all tickers that aren't spacs or english words or common acronyms/accepted tickers
filtered = [x.strip(" ") for x in list_of_tickers if x not in spacs and x not in fakes and x not in common]
len(filtered)

In [None]:
# Let's see it. Use this for the non-$-prefixed list
print(filtered)

In [None]:
# For good measure let's get the unfiltered list (english words not filtered) with $ infront so we don't miss anything
# Use this for the $-prefixed list
complete = [x.strip(" ") for x in list_of_tickers if x not in spacs]
complete = ["$" + x for x in complete]

In [None]:
print(len(complete))
print(complete)