In [1]:
import os
import scipy
import scipy.cluster
import sklearn
import sklearn.cluster
import numpy as np
from PIL import Image
from PIL import ImageColor

In [2]:
def write(json_data: dict, file_name: str):
    with open(file_name, 'w') as f:
        f.write(json.dumps(json_data))

In [3]:
# Download list of all currencies
import urllib
import json

url = 'https://api.coingecko.com/api/v3/coins/list?include_platform=true'
response = urllib.request.urlopen(url)
currencies = json.loads(response.read())
write(currencies, 'tmp_step_01.json')
print('Downloaded list of ', len(currencies))

Downloaded list of  12951


In [4]:
# Include only native or ethereum ERC20s

def include(currency: dict) -> bool:
    if len(currency['platforms']) == 0:
        return True
    if 'ethereum' in currency['platforms']:
        return len(currency['platforms']['ethereum']) != 0
    return False

currencies = list(filter(lambda c: include(c) , currencies))

# Change id to coinGeckoId
for currency in currencies:
    currency['coinGeckoId'] = currency['id']
    currency.pop('id', None )

write(currencies, 'tmp_step_02.json')
print('Filtered out unsupported, remaining count', len(currencies))

Filtered out unsupported, remaining count 6304


In [5]:
# Get image url rank description
import time
import requests

base_url = 'https://api.coingecko.com/api/v3/coins/'
query_params = '?localization=false&tickers=false&market_data=false&community_data=false&developer_data=false&sparkline=false'

for idx, currency in enumerate(currencies):
    url = base_url + currency['coinGeckoId'] + query_params
    response = urllib.request.urlopen(url)
    info = json.loads(response.read())

    currency['imageUrl'] = info['image']['large']
    currency['rank'] = info['coingecko_rank']

    if info['links']['homepage'][0] is not None:
        currency['link'] = info['links']['homepage'][0]

    if info['links']['twitter_screen_name'] is not None:
        currency['twitter'] = info['links']['twitter_screen_name']

    if info['description']['en'] is not None and len(info['description']['en']) != 0:
        currency['description'] = info['description']['en']

    write(currencies, 'tmp_step_03.json')
    print('\r', 'Downloaded metadata', idx, end='')
    time.sleep(1.21)

Count 300
 Downloaded metadata 299

In [6]:
# Download images if needed
from urllib.parse import urljoin, urlparse
import os
import platform
import requests
import shutil

def file_name_for_url(url: str) -> str:
    return urljoin(url, urlparse(url).path).rsplit('/', 1)[-1]

def file_extension_for_url(url: str) -> str:
    return os.path.splitext(file_name_for_url(url))[-1]

def cache_image_file_name(id: str, url: str) -> str:
    return id + '_large@3x' + file_extension_for_url(url)

headers = requests.utils.default_headers()
default_agent = headers['User-Agent']
headers.update({'User-Agent': default_agent + ' (' + platform.platform() + ')'})
failed = list()

for idx, currency in enumerate(currencies):
    image_name = cache_image_file_name(currency['coinGeckoId'], currency['imageUrl'])
    image_path = 'images/' + image_name
    if os.path.exists(image_path) == False and currency['imageUrl']:
        res = requests.get(url, headers = headers, stream = True)
    if res.status_code == 200:
        with open(image_path, 'wb') as f:
            shutil.copyfileobj(res.raw, f)
            print('\r', 'Downloaded image', image_name, idx, '         ', end='')
    else:
        failed.append(currency['imageUrl'])
    time.sleep(1.21)

print('Failed count', len(failed))

In [7]:
# Resize images
from PIL import Image
from PIL import ImageColor
import os

for image_name in os.listdir('images/'):
    path = 'images/' + image_name


# for idx, currency in enumerate(currencies):
#     image_name = cache_image_file_name(currency['coinGeckoId'], currency['imageUrl'])
#     exists = os.path.exists('images/' + image_name)
#     print(exists, image_name)

roobee_large@3x.png
celeb_large@3x.png
electrum-dark_large@3x.png
stronger_large@3x.png
boost_large@3x.png
mx-token_large@3x.png
moonienft_large@3x.png
dust-token_large@3x.png
muse-2_large@3x.png
delphy_large@3x.png
archethic_large@3x.png
elysian_large@3x.png
entropyfi_large@3x.png
ethereum-yield_large@3x.png
sergs_large@3x.png
sport-and-leisure_large@3x.png
compound-uniswap_large@3x.png
mithril-share_large@3x.png
new-frontier-presents_large@3x.png
clearpool_large@3x.png
edgeless_large@3x.jpg
gazetv_large@3x.png
prism-network_large@3x.png
genexi_large@3x.jpg
darussafaka-sports-club_large@3x.png
alpaca_large@3x.png
3x-long-tomochain-token_large@3x.png
defil_large@3x.png
penguin-party-fish_large@3x.png
9481-wayburn_large@3x.jpg
crowns_large@3x.png
blockswap-network_large@3x.png
pendle_large@3x.png
mcs-token_large@3x.png
yearn-lazy-ape_large@3x.png
maya-preferred-223_large@3x.png
netkoin_large@3x.png
elevate_large@3x.png
dengba-planet_large@3x.jpg
bifi_large@3x.png
aga-token_large@3x.png
