In [1]:
import numerapi
import pyEX
import pandas as pd
import os
from tqdm import tqdm

In [2]:
napi = numerapi.SignalsAPI(os.environ["NUMERAIAPIPUBLIC"], os.environ["NUMERAIAPIPRIVATE"])
client = pyEX.Client()

In [3]:
numerai_tickers = napi.ticker_universe()

In [4]:
country_map = {
    'AU': 'AU', # Australia
    'AV': 'AT', # Vienna
    'BB': 'BE', # Brussels
    'BZ': 'BR', # Brazil (B3)
    'CN': 'CA', # Toronto
    'CP': 'CZ', # Prague
    'DC': 'DK', # Copenhagen
    'FH': 'FI', # Helsinki
    'FP': 'FR', # Paris
    'GA': 'GR', # Athens
    'GR': 'DE', # Xetra
    'HB': 'HU', # Budapest
    'HK': 'HK', # Hong Kong
    'ID': 'IE', # Dublin
    'IJ': 'ID', # Jakarta
    'IM': 'IT', # Milan
    'IT': 'IL', # Tel Aviv
    'JP': 'JP', # Japan
    'KS': 'KR', # Korea
    'LN': 'GB', # London
    'MF': 'MX', # Mexico
    'MK': 'MY', # Kuala Lumpur
    'NA': 'NL', # Amsterdam
    'NO': 'NO', # Oslo
    'NZ': 'NZ', # New Zealand
    'PL': 'PT', # Lisbon
    'PM': 'PH', # Philippines
    'PW': 'PL', # Warsaw
    'SJ': 'ZA', # Johannesburg
    'SM': 'ES', # Madrid
    'SP': 'SG', # Singapore
    'SS': 'SE', # Stockholm
    'SW': 'CH', # Switzerland
    'TB': 'TH', # Thailand
    'TI': 'TR', # Istanbul
    'TT': 'TW', # Taiwan
    'US': 'US', # RegNMS
}

In [5]:
# assemble symbols for IEX
symbols_per_region = {
    region: client.internationalSymbolsDF(region=region) for region in country_map.values()
}

In [6]:
# Known Tickers
# assemble from prior runs
df_known = pd.read_csv("ticker_map.csv").fillna("")
overrides = {_[0]: _[1] for _ in df_known.values if _[1] != ""}

In [7]:
iex_tickers = []
no_matches = []
multiple_matches = []

for numerai_ticker in tqdm(numerai_tickers):
    ticker, country = numerai_ticker.split(" ")
    
    iex_region = country_map[country]

    if iex_region not in symbols_per_region:
        print(f"missing region: {iex_region}")

    # short circuit for known
    if numerai_ticker in overrides:
        iex_tickers.append([numerai_ticker, overrides[numerai_ticker]])
        continue

    # short circuit for RegNMS
    if iex_region == "US":
        # skip lookup
        iex_tickers.append([numerai_ticker, ticker])
        continue

    df = symbols_per_region[iex_region]
    
    # adjust lengths for numerical exchanges
    if iex_region == "HK":
        fill = 5 - len(ticker)
        ticker = "0" * fill + ticker
    if iex_region == "HK":
        fill = 5 - len(ticker)
        ticker = "0" * fill + ticker

    # adjust suffix to filter down multiple matches
    suffix = ""
    if iex_region == "HK":
        # omit HKHKSG and HKHKSZ
        suffix = "HK"
    elif iex_region == "ES":
        # use Bolsa de Madrid instead of Valencia
        suffix = "SA"
    elif iex_region == "ID":
        # use main exchange
        suffix = "IJ"
    elif iex_region == "DK" and numerai_ticker.endswith("B DC"):
        # Normalize spacing
        ticker = ticker[:-1] + " B"
    elif iex_region == "SE" and numerai_ticker.endswith("B SS"):
        # Normalize spacing
        ticker = ticker[:-1] + " B"
    elif iex_region == "SE" and numerai_ticker.endswith("A SS"):
        # Normalize spacing
        ticker = ticker[:-1] + " A"
    elif iex_region == "SE" and numerai_ticker.endswith("C SS"):
        # Normalize spacing
        ticker = ticker[:-1] + " C"

    if country == "GR":
        # use Xetra
        suffix = "GY"
    
    # IEX formatting
    suffix = "-" + suffix
    ticker = ticker.replace("/", ".").replace("*", "")

    satisfying_records = df[df.index.str.startswith(f"{ticker}{suffix}")].index

    if len(satisfying_records) != 1:
        if len(satisfying_records) == 0:
            # FIXME no coverage
            if iex_region != "ZA":
                no_matches.append([ticker, iex_region])
        elif len(satisfying_records) > 1:
            multiple_matches.append([ticker, iex_region])

        iex_tickers.append([numerai_ticker, ""])
    else:
        iex_tickers.append([numerai_ticker, satisfying_records[0]])
    

100%|██████████| 5377/5377 [00:00<00:00, 102849.67it/s]


In [8]:
len(no_matches)

10

In [9]:
len(multiple_matches)

0

In [10]:
df = pd.DataFrame(iex_tickers, columns=["numerai_ticker", "iex_ticker"])
df.to_csv("ticker_map.csv", index=False)

In [11]:
# Audit
len(df[df.iex_ticker == ""])

53

In [12]:
# Original
len(numerai_tickers)

5377

In [13]:
# Mapped
print(len(df))

coverage_pct = len(df[df.iex_ticker != ""]) / len(numerai_tickers) * 100
print(f"Coverage {coverage_pct:0.1f}%")

5377
Coverage 99.0%
