# Imports

In [1]:
import json
import numpy as np
import pandas as pd

from tqdm import tqdm

# Check coin coverage

## Load coins from CoinMarketCap

In [None]:
# CoinMarketCap data is retrieved with the API call below. Note that instead of
# iterating in a loop, it's possible to specify multiple statuses, separated 
# by a comma, like so: {"listing_status":"active,inactive,untracked"}. However,
# a single query can return only 10000 results max, so for the best coverage
# it's better to iterate through status values.

# from requests import Session

# headers = {
#   "Accepts": "application/json",
#   "X-CMC_PRO_API_KEY": "MY_API_KEY",
# }
# session = Session()
# session.headers.update(headers)

# url = "https://pro-api.coinmarketcap.com/v1/cryptocurrency/map"

# for status in ["active", "inactive", "untracked"]:
#     parameters = {
#       "listing_status":status,
#     }
#     response = session.get(url, params=parameters)
#     data = json.loads(response.text)
#     with open(f"coinmarketcap_coins_{status}.json", "w") as f:
#         json.dump(data, f)

In [2]:
with open("coins_coinmarketcap.json") as user_file:
    coinmarketcap_data = json.load(user_file)
coins_coinmarketcap = pd.DataFrame(coinmarketcap_data["data"])
print(f"Loaded {len(coins_coinmarketcap)} coins from CoinMarketCap")

Loaded 20745 coins from CoinMarketCap


In [3]:
coins_coinmarketcap.head()

Unnamed: 0,id,name,symbol,slug,rank,displayTV,manualSetTV,tvCoinSymbol,is_active,first_historical_data,last_historical_data,platform
0,1,Bitcoin,BTC,bitcoin,1.0,1,0,,1,2013-04-28T18:47:21.000Z,2023-02-23T18:29:00.000Z,
1,2,Litecoin,LTC,litecoin,14.0,1,0,,1,2013-04-28T18:47:22.000Z,2023-02-23T18:29:00.000Z,
2,3,Namecoin,NMC,namecoin,664.0,1,0,,1,2013-04-28T18:47:22.000Z,2023-02-23T18:29:00.000Z,
3,4,Terracoin,TRC,terracoin,1790.0,1,0,,1,2013-04-28T18:47:22.000Z,2023-02-23T18:29:00.000Z,
4,5,Peercoin,PPC,peercoin,766.0,1,0,,1,2013-04-28T18:47:23.000Z,2023-02-23T18:29:00.000Z,


In [4]:
len(coins_coinmarketcap[coins_coinmarketcap["is_active"] == 1])

9006

In [5]:
# All the coins from our target currency pairs are included in the CoinMarketCap data.
target_coins = [
    "APE", "AVAX", "AXS", "BAKE", "BNB", "BTC", "BUSD", "CRV", "CTK", "DOGE", "DOT", "DYDX", "ETH", "FTM", 
    "GMT", "LINK", "MATIC", "NEAR", "OGN", "RUNE", "SAND", "SOL", "STORJ", "UNFI", "USDT", "WAVES", "XRP"     
]
assert all(coin in coins_coinmarketcap["symbol"].unique() for coin in target_coins)

### Some symbols belong to more than one coin

In [6]:
duplicate_symbols = coins_coinmarketcap[coins_coinmarketcap.duplicated("symbol")]
duplicate_symbols

Unnamed: 0,id,name,symbol,slug,rank,displayTV,manualSetTV,tvCoinSymbol,is_active,first_historical_data,last_historical_data,platform
98,659,Bitswift,BITS,bitswift,1562.0,1,0,,1,2014-10-05T17:44:24.000Z,2023-02-23T18:29:00.000Z,
417,2135,Revain,REV,revain,422.0,1,0,,1,2017-11-01T20:49:27.000Z,2023-02-23T18:29:00.000Z,"{'id': 1027, 'name': 'Ethereum', 'symbol': 'ET..."
455,2246,CyberMiles,CMT,cybermiles,1275.0,1,0,,1,2017-12-06T03:19:47.000Z,2023-02-23T18:29:00.000Z,
534,2419,Profile Utility Token,PUT,profile-utility-token,7636.0,1,0,,1,2018-01-17T17:34:26.000Z,2023-02-23T18:29:00.000Z,
653,2682,Holo,HOT,holo,113.0,1,0,,1,2018-04-30T22:14:25.000Z,2023-02-23T18:29:00.000Z,"{'id': 1027, 'name': 'Ethereum', 'symbol': 'ET..."
...,...,...,...,...,...,...,...,...,...,...,...,...
20735,19920,SNACKCLUB,SNK,snackclub,,1,0,,0,,,
20739,19927,VINCI,VINCI,byvinci,,1,0,,0,2022-08-16T14:45:00.000Z,2022-08-25T18:20:00.000Z,"{'id': 1839, 'name': 'BNB', 'symbol': 'BNB', '..."
20740,19928,DoctorS Token,DST,doctors-token,,1,0,,0,2022-05-04T07:50:00.000Z,2022-10-05T18:20:00.000Z,"{'id': 1839, 'name': 'BNB', 'symbol': 'BNB', '..."
20742,19930,Aree Shards,AES,aree-shards,,1,0,,0,,,"{'id': 3890, 'name': 'Polygon', 'symbol': 'MAT..."


In [7]:
len(duplicate_symbols["symbol"].unique())

2999

In [8]:
coins_coinmarketcap[coins_coinmarketcap["symbol"] == "REV"]

Unnamed: 0,id,name,symbol,slug,rank,displayTV,manualSetTV,tvCoinSymbol,is_active,first_historical_data,last_historical_data,platform
382,2021,RChain,REV,rchain,1181.0,1,0,,1,2017-10-06T22:39:24.000Z,2023-02-23T18:29:00.000Z,
417,2135,Revain,REV,revain,422.0,1,0,,1,2017-11-01T20:49:27.000Z,2023-02-23T18:29:00.000Z,"{'id': 1027, 'name': 'Ethereum', 'symbol': 'ET..."
5770,17172,Revolution,REV,revolution,8548.0,1,0,,1,2022-01-12T04:29:00.000Z,2023-02-23T18:29:00.000Z,"{'id': 1839, 'name': 'BNB', 'symbol': 'BNB', '..."
9917,1202,Revenu,REV,revenu,,1,0,,0,2016-03-22T20:01:24.000Z,2017-11-21T16:29:14.000Z,
19514,17425,Revolutin,REV,revolutin,,1,0,,0,2022-01-20T16:40:00.000Z,2022-03-05T18:25:00.000Z,"{'id': 1839, 'name': 'BNB', 'symbol': 'BNB', '..."


### Some names belong to more than one coin

In [9]:
duplicate_names = coins_coinmarketcap[coins_coinmarketcap.duplicated("name")]
duplicate_names

Unnamed: 0,id,name,symbol,slug,rank,displayTV,manualSetTV,tvCoinSymbol,is_active,first_historical_data,last_historical_data,platform
2558,8619,Moola,MLA,moola,8058.0,1,0,,1,2021-03-02T07:45:06.000Z,2023-02-23T18:25:00.000Z,"{'id': 1839, 'name': 'BNB', 'symbol': 'BNB', '..."
2601,8705,Bifrost,BNC,bifrost-bnc,878.0,1,0,,1,2021-10-21T12:51:06.000Z,2023-02-23T18:26:00.000Z,
3216,10293,Swarm,BZZ,ethereum-swarm,542.0,1,0,,1,2021-06-21T14:43:03.000Z,2023-02-23T18:28:00.000Z,"{'id': 1027, 'name': 'Ethereum', 'symbol': 'ET..."
4121,12472,Elysian,ELS,elysian-els,5989.0,1,0,,1,2021-11-25T22:35:04.000Z,2023-02-23T18:25:00.000Z,"{'id': 52, 'name': 'XRP', 'symbol': 'XRP', 'sl..."
4282,12863,MYCE,MYCE,my-ceremonial-event,5843.0,1,0,,1,2021-10-19T04:20:06.000Z,2023-02-23T18:25:00.000Z,"{'id': 1027, 'name': 'Ethereum', 'symbol': 'ET..."
...,...,...,...,...,...,...,...,...,...,...,...,...
20624,19664,Belka,BELKA,belka-project,,1,0,,0,2022-04-21T05:44:00.000Z,2022-10-08T10:24:00.000Z,"{'id': 1839, 'name': 'BNB', 'symbol': 'BNB', '..."
20626,19668,Astronaut,ASTRO,astronaut-guru,,1,0,,0,2022-04-21T07:23:00.000Z,2022-05-10T02:53:00.000Z,"{'id': 1839, 'name': 'BNB', 'symbol': 'BNB', '..."
20659,19746,BabyApe,BAPE,babyape,,1,0,,0,2022-04-25T09:05:00.000Z,2022-10-31T08:10:00.000Z,"{'id': 1027, 'name': 'Ethereum', 'symbol': 'ET..."
20676,19780,AlgoDAO,SIGMA,algodao-sigma,,1,0,,0,,,


In [10]:
len(duplicate_names["name"].unique())

251

In [11]:
coins_coinmarketcap[coins_coinmarketcap["name"] == "Moola"]

Unnamed: 0,id,name,symbol,slug,rank,displayTV,manualSetTV,tvCoinSymbol,is_active,first_historical_data,last_historical_data,platform
558,2466,Moola,AXPR,axpr-token,1925.0,1,0,,1,2018-01-25T03:39:28.000Z,2023-02-23T18:29:00.000Z,"{'id': 1027, 'name': 'Ethereum', 'symbol': 'ET..."
2558,8619,Moola,MLA,moola,8058.0,1,0,,1,2021-03-02T07:45:06.000Z,2023-02-23T18:25:00.000Z,"{'id': 1839, 'name': 'BNB', 'symbol': 'BNB', '..."


## Load RavenPack coins 

In [12]:
# Load RP entities of the CURR type.
currencies_rp = pd.read_csv("currency_2023-02-15.csv")
print(currencies_rp.shape)
currencies_rp.head()

(11892, 6)


Unnamed: 0,RP_ENTITY_ID,ENTITY_TYPE,DATA_TYPE,DATA_VALUE,RANGE_START,RANGE_END
0,1000.0,CURR,COUNTRY_ID,F93C11,2020-03-20,
1,1000.0,CURR,CURRENCY_TYPE_ID,2BCD97,,
2,1000.0,CURR,ENTITY_NAME,Hive,,
3,1000.0,CURR,NAME,Hive,2020-03-20,
4,1000.0,CURR,SYMBOL,HIVE,2020-03-20,


In [13]:
# Cryptocurrencies have CURRENCY_TYPE_ID = 2BCD97.
crypto_rp_ids = currencies_rp[currencies_rp["DATA_VALUE"] == "2BCD97"]["RP_ENTITY_ID"].unique()
print(f"RP tracks {len(crypto_rp_ids)} cryptocurrencies")

RP tracks 1908 cryptocurrencies


In [14]:
# Select and reformat RP cryptocurrency data.
crypto_rp = currencies_rp[currencies_rp["RP_ENTITY_ID"].isin(crypto_rp_ids)]
crypto_rp = crypto_rp[crypto_rp["DATA_TYPE"].isin(["ENTITY_NAME", "SYMBOL"])].reset_index(drop=True)
crypto_rp = crypto_rp.groupby(["RP_ENTITY_ID", "DATA_TYPE"])["DATA_VALUE"].aggregate("first").unstack().reset_index()
crypto_rp.columns.name = False
print(crypto_rp.shape)
crypto_rp.head()

(1908, 3)


Unnamed: 0,RP_ENTITY_ID,ENTITY_NAME,SYMBOL
0,0001E3,Hive,HIVE
1,000B28,WhiteCoin,XWC
2,000E81,Arno Token,ARNO
3,0028A2,Steem,STEEM
4,003022,Squeezer,SQR


### Some symbols belong to more than one coin

In [15]:
duplicate_symbols_rp = crypto_rp[crypto_rp.duplicated("SYMBOL")]
duplicate_symbols_rp = duplicate_symbols_rp.dropna()
duplicate_symbols_rp

Unnamed: 0,RP_ENTITY_ID,ENTITY_NAME,SYMBOL
220,1CB0EF,Flowchain,FLC
324,2C3DBB,Mdex,MDX
428,3BCDBA,BitTorrent (BTTOLD),BTT
471,409F30,Mandala,MDX
509,45B912,Multi-Collateral DAI,DAI
555,4BB08E,Neurotoken,NTK
675,5E1282,IMPT,IMPT
710,6203E1,Vlux,VLX
895,7DF732,Rise,RISE
947,83D334,Will Smith Inu,WSI


In [16]:
len(duplicate_symbols_rp["SYMBOL"].unique())

27

In [17]:
crypto_rp[crypto_rp["SYMBOL"] == "MDX"]

Unnamed: 0,RP_ENTITY_ID,ENTITY_NAME,SYMBOL
311,2AB926,Meduse Coin,MDX
324,2C3DBB,Mdex,MDX
471,409F30,Mandala,MDX


### Some coins do not have a symbol

In [18]:
coins_without_symbol = crypto_rp[crypto_rp["SYMBOL"].isna()]
coins_without_symbol

Unnamed: 0,RP_ENTITY_ID,ENTITY_NAME,SYMBOL
9,009CC5,Cryptaur,
17,0198BC,Safe Haven (SHA),
22,023285,APENFT,
38,042F77,Difo Network,
39,044ABF,Cronos,
...,...,...,...
1815,F2D6A1,Healthereum,
1846,F78685,Santiment Network Token,
1848,F81117,DAOstack,
1852,F8BAC9,Kuai Token,


In [19]:
# Show most highly ranked coins that are missing a symbol.
coin_no_symbol_to_rank = {}
for i in tqdm(range(len(coins_without_symbol))):
    name = coins_without_symbol.iloc[i]["ENTITY_NAME"]
    name_cmc = coins_coinmarketcap[coins_coinmarketcap["name"] == name]
    if not len(name_cmc):
        continue
    rank = name_cmc.iloc[0]["rank"]
    if not np.isnan(rank):
        coin_no_symbol_to_rank[name] = rank
sorted(coin_no_symbol_to_rank.items(), key=lambda item: item[1])[0:15]

100%|██████████| 203/203 [00:00<00:00, 330.79it/s]


[('Solana', 11.0),
 ('Cronos', 35.0),
 ('Internet Computer', 37.0),
 ('BitDAO', 46.0),
 ('Curve DAO Token', 66.0),
 ('Basic Attention Token', 91.0),
 ('Compound', 110.0),
 ('Arweave', 120.0),
 ('Amp', 124.0),
 ('Balancer', 126.0),
 ('Harmony', 132.0),
 ('Siacoin', 158.0),
 ('APENFT', 194.0),
 ('ConstitutionDAO', 196.0),
 ('Augur', 284.0)]

## Find CoinMarketCap coins in RP

In [20]:
coins_to_rp_entity_ids = {}
missing_in_rp = []
crypto_rp["ENTITY_NAME_lower"] = crypto_rp["ENTITY_NAME"].apply(str.lower)
for i, row in tqdm(coins_coinmarketcap.iterrows()):
    # Search by symbol.
    coin_slice = crypto_rp[crypto_rp["SYMBOL"] == row["symbol"]]
    if len(coin_slice) > 1:
        # Disambiguate by name.
        coin_slice = coin_slice[coin_slice["ENTITY_NAME_lower"] == row["name"].lower()]
    if not len(coin_slice):
        # Search by name only (some RP entities have a null symbol).
        coin_slice = crypto_rp[crypto_rp["ENTITY_NAME_lower"] == row["name"].lower()]
    if not len(coin_slice):    
        missing_in_rp.append((row["symbol"], row["name"]))
        continue
    # Verify that the mapping is unique.
    assert len(coin_slice["RP_ENTITY_ID"].unique()) == 1
    coin_rp_id = coin_slice.iloc[0]["RP_ENTITY_ID"]
    coins_to_rp_entity_ids[(row["symbol"], row["name"])] = coin_rp_id
del crypto_rp["ENTITY_NAME_lower"]
print(f"{len(missing_in_rp)} coins are not found in the RP data ({round(len(missing_in_rp)*100/len(coins_coinmarketcap), 2)}%)")

20745it [00:30, 682.75it/s]

18164 coins are not found in the RP data (87.56%)





In [21]:
missing_in_rp[:5]

[('FRC', 'Freicoin'),
 ('IXC', 'Ixcoin'),
 ('WDC', 'WorldCoin'),
 ('DGC', 'Digitalcoin'),
 ('GLC', 'Goldcoin')]

In [22]:
coin_to_rank = {}
for coin in tqdm(missing_in_rp):
    rank = coins_coinmarketcap[coins_coinmarketcap["symbol"] == coin[0]].iloc[0]["rank"]
    coin_to_rank[coin] = rank
sorted(coin_to_rank.items(), key=lambda item: item[1])[0:15]

100%|██████████| 18164/18164 [00:51<00:00, 349.40it/s]


[(('SOL', 'Wrapped Solana'), 11.0),
 (('DAI', 'Dai'), 18.0),
 (('USDP', 'USDP Stablecoin'), 59.0),
 (('MINA', 'Mina'), 62.0),
 (('FXS', 'Frax Share'), 65.0),
 (('BTT', 'BitTorrent-New'), 73.0),
 (('BTT', 'Bitteam token'), 73.0),
 (('SNX', 'Synthetix'), 76.0),
 (('FLR', 'Flare'), 86.0),
 (('BONE', 'Bone ShibaSwap'), 94.0),
 (('BONE', 'Bulldog Billionaires'), 94.0),
 (('BONE', 'Bone'), 94.0),
 (('DYDX', 'dYdX'), 100.0),
 (('LUNA', 'Terra'), 109.0),
 (('BLUR', 'Blur'), 118.0)]

NB! Some of them are not really missing, just couldn't be mapped due to inconsistencies in naming.

## Find RP coins in CoinMarketCap

In [23]:
missing_in_coinmarketcap = []
for i, row in tqdm(crypto_rp.iterrows()):
    if type(row["SYMBOL"]) == str and row["SYMBOL"] not in coins_coinmarketcap["symbol"].unique():
        missing_in_coinmarketcap.append((row["SYMBOL"], row["ENTITY_NAME"]))
print(f"{len(missing_in_coinmarketcap)} coins are not found in the CoinMarketCap data ({round(len(missing_in_coinmarketcap)*100/len(crypto_rp), 2)}%)")

1908it [00:06, 277.20it/s]

268 coins are not found in the CoinMarketCap data (14.05%)





In [24]:
missing_in_coinmarketcap[:25]

[('ZUUM', 'Zuum'),
 ('RF', 'Raido Financial'),
 ('ENCN', 'EndChain'),
 ('SKTG', 'SpeedkingTokenGold'),
 ('LYN', 'Lynchpin'),
 ('HSR', 'Hshare'),
 ('USDM', 'Moneta'),
 ('EFYT', 'Ergo'),
 ('GSCP', 'GSC Platform'),
 ('DGO', 'DINNGO'),
 ('LOKI', 'Loki'),
 ('CATER', 'Newcater'),
 ('WEIS', 'WeiCrowd'),
 ('VAD', 'Varanida'),
 ('WINS', 'WinStars'),
 ('2021COIN', '2021coin'),
 ('LINDA', 'Metrix Coin'),
 ('COF', 'Cryptoffer'),
 ('RWRD', 'RewardsToken'),
 ('MYDFS', 'MyDFS'),
 ('ARN', 'Aeron'),
 ('REFER', 'Plentix'),
 ('OCR', 'OZCAR'),
 ('ULED', 'Ledder'),
 ('TM-SNP1', 'Skynavpro')]