In [1]:
import json
import requests
import platform
import time
import os
import urllib
from urllib.parse import urljoin, urlparse
from PIL import Image

def write(json_data: dict, file_name: str):
    with open(file_name, 'w') as f:
        f.write(json.dumps(json_data))

def file_name_for_url(url: str) -> str:
    return urljoin(url, urlparse(url).path).rsplit('/', 1)[-1]

def file_extension_for_url(url: str) -> str:
    return os.path.splitext(file_name_for_url(url))[-1]

def cache_image_file_name(id: str, url: str) -> str:
    return id + file_extension_for_url(url)

headers = requests.utils.default_headers()
default_agent = headers['User-Agent']
headers.update({'User-Agent': default_agent + ' (' + platform.platform() + ')'})

market_keys_to_remove = [
    'id', 'symbol', 'name', 'image', 'high_24h', 'low_24h', 'price_change_24h', 'market_cap_change_24h', 'market_cap_change_percentage_24h', 'max_supply', 'ath', 'ath_change_percentage', 'ath_date', 'atl', 'atl_change_percentage', 'atl_date', 'last_updated', 'price_change_percentage_24h_in_currency', 'roi'
]

space = '         '
req_interval = 1.25
req_interval_long = 60 * 5

In [2]:
# Download list of all currencies

url = 'https://api.coingecko.com/api/v3/coins/list?include_platform=true'
response = urllib.request.urlopen(url)
currencies = json.loads(response.read())
write(currencies, 'tmp_step_01.json')
print('Downloaded list of ', len(currencies))
time.sleep(req_interval)

Downloaded list of  12961


In [3]:
# Include only native or ethereum ERC20s

def include(currency: dict) -> bool:
    if len(currency['platforms']) == 0:
        return currency['id'] == 'ethereum'
    if 'ethereum' in currency['platforms']:
        return len(currency['platforms']['ethereum']) != 0
    return False

currencies = list(filter(lambda c: include(c) , currencies))

# Change id to coinGeckoId
for currency in currencies:
    currency['coinGeckoId'] = currency['id']
    currency.pop('id', None )

write(currencies, 'tmp_step_02.json')
print('Filtered out unsupported, remaining count', len(currencies))

Filtered out unsupported, remaining count 4819


In [4]:
# Download market data

def url(page: int) -> str:
    url = "https://api.coingecko.com/api/v3/coins/markets"
    url += "?vs_currency=usd&order=market_cap_desc&per_page=250&page=" + str(page)
    url += "&sparkline=false&price_change_percentage=24h"
    return url

page = 0
markets = list()
while True:
    response = requests.get(url(page), headers = headers, stream = False)
    if response.status_code == 200:
        page_markets = json.loads(response.content)
        markets += page_markets
        write(markets, 'tmp_step_03.json')
        print('\r', 'Downloaded markets page', page, space, end='')
        time.sleep(req_interval)
        if len(page_markets) == 0:
            print('\r', 'Ended page count', page, ', market: ', len(markets), space, end='')
            break;
    elif response.status_code == 429:
        print('\r', 'Long sleeps page', page, space, end='')
        time.sleep(req_interval_long)
    else:
        print('\r', 'Ended page count', page,  ', market: ', len(markets), response.status_code, space, end='')
        break;
    page += 1

 Ended page count 53 , market:  13211          

In [5]:
# Update currencies with rank and sort.
markets_dict = dict()

for market in markets:
    markets_dict[market['id']] = market

write(markets_dict, 'tmp_step_04.json')

for currency in currencies:
    if currency['coinGeckoId'] in markets_dict:
        market = markets_dict[currency['coinGeckoId']]
        currency['imageUrl'] = market['image']
        if 'market_cap_rank' in market:
            currency['rank'] = market['market_cap_rank']


currencies_ranked = list()
currencies_no_rank = list()

for currency in currencies:
    if 'rank' in currency and currency['rank'] is not None:
        currencies_ranked.append(currency)
    else:
        currencies_no_rank.append(currency)

print(len(currencies))
print(len(currencies_ranked))
print(len(currencies_no_rank))

sorted_currencies = list(sorted(currencies_ranked, key=lambda x: x['rank']))
sorted_currencies = sorted_currencies + currencies_no_rank
write(sorted_currencies, 'tmp_step_05.json')
currencies = sorted_currencies

4819
2175
2644


In [6]:
# Minimize currencies
for currency in currencies:
    if 'imageUrl' in currency and currency['imageUrl'] == 'missing_large.png':
        currency.pop('imageUrl', None)

    if 'platforms' in currency and len(currency['platforms']) != 0:
        currency['platforms'] = {'ethereum': currency['platforms']['ethereum']}

write(currencies, 'tmp_step_06.json')

# Minimize markets
minimized_markets = dict()
for currency in currencies:
    id = currency['coinGeckoId']
    if id in markets_dict:
        market = markets_dict[id]
        for key in market_keys_to_remove:
            market.pop(key, None )
        minimized_markets[id] = market

write(minimized_markets, 'cache_markets.json')

In [None]:
# Get urls and description

base_url = 'https://api.coingecko.com/api/v3/coins/'
query_params = '?localization=false&tickers=false&market_data=false&community_data=false&developer_data=false&sparkline=false'

for idx, currency in enumerate(currencies):
    url = base_url + currency['coinGeckoId'] + query_params
    info = dict()
    try:
        time.sleep(req_interval)
        response = urllib.request.urlopen(url)
        info = json.loads(response.read())
    except:
        print('\r', 'Sleeping for 61s', idx, space, end='')
        time.sleep(req_interval_long)
        response = urllib.request.urlopen(url)
        info = json.loads(response.read())

    if 'links' in info:
        if 'homepage' in info['links'] and len(info['links']['homepage']) != 0:
            currency['link'] = info['links']['homepage'][0]

        if 'twitter_screen_name' in info['links'] is not None:
            currency['twitter'] = info['links']['twitter_screen_name']

    if 'description' in info and len(info['description']['en']) != 0:
        currency['description'] = info['description']['en']

    write(currencies, 'tmp_step_07.json')
    print('\r', 'Downloaded metadata', idx, end='')

In [9]:
with open('tmp_step_07.json', 'r') as f:
  currencies_with_description = json.load(f)

currencies_with_description_dict = dict()
for currency in currencies_with_description:
    currencies_with_description_dict[currency['coinGeckoId']] = currency

for currency in currencies:
    id = currency['coinGeckoId']
    if id in currencies_with_description_dict:
        meta = currencies_with_description_dict[id]
        if 'link' in meta:
            currency['link'] = meta['link']
        if 'twitter' in meta:
            currency['twitter'] = meta['twitter']
        if 'description' in meta:
            currency['description'] = meta['description']

In [None]:
# Download images if needed
import requests
import shutil

failed = list()

def download_image(currency: dict) -> int:
    if 'imageUrl' not in currency:
        return 200
    image_name = cache_image_file_name(currency['coinGeckoId'], currency['imageUrl'])
    image_path = 'images/' + image_name
    if os.path.exists(image_path) == True or currency['imageUrl'] is None:
        return 200
    else:
        time.sleep(req_interval)
        res = requests.get(currency['imageUrl'], headers = headers, stream = True)
        if res.status_code == 200:
            with open(image_path, 'wb') as f:
                shutil.copyfileobj(res.raw, f)
                print('\r', 'Downloaded image', image_name, idx, space, end='')
        return res.status_code

for idx, currency in enumerate(currencies):
    status_code = download_image(currency)
    if status_code == 429:
        print('\r', 'Sleeping', currency['coinGeckoId'], idx, space, end='')
        time.sleep(req_interval_long)

print('Failed count', len(failed))

In [12]:
# Resize images

image_length = 32 * 3
count = len(os.listdir('images/'))
failed_resize_images = list()

for idx, image_name in enumerate(os.listdir('images/')):
    path = 'images/' + image_name
    image = None
    try:
        image = Image.open(path)
    except:
        os.remove(path)
        print('\r', 'Removing image', image_name, idx, '/', count, space, end='')
    if image is not None:
        if image.size[0] != image_length or image.size[1] != image_length:
            image = image.resize((image_length, image_length), reducing_gap=3.0)
            try:
                image.save(path)
            except:
                failed_resize_images.append(path)
    print('\r', 'Resizing image', image_name, idx, '/', count, space, end='')

print('')
for failed_path in failed_resize_images:
    print(failed_path)
print('Failed images count', len(failed_resize_images))

 Resizing image kaiba-defi.png 4545 / 4546              / 4546                                  
Failed images count 0


In [13]:
# Get color images
from PIL import ImageColor
import numpy as np
import sklearn
import sklearn.cluster
import scipy
import scipy.cluster
import json

count = len(currencies)

def rgba_to_hex(r, g, b, a):
  return ('#{:02X}{:02X}{:02X}{:02X}').format(r, g, b, a)

def rgb_to_hex(r, g, b):
  return ('#{:02X}{:02X}{:02X}').format(r, g, b)

class Color:
    def __init__(self, rgba):
        self.rgba = rgba
        self.hex = rgb_to_hex(rgba[0], rgba[1], rgba[2])
        self.average = (rgba[0] + rgba[1] + rgba[2]) / 3

def dominant_colors(image: Image):  # PIL image input
    ar = np.asarray(image)
    shape = ar.shape
    ar = ar.reshape(np.product(shape[:2]), shape[2]).astype(float)

    kmeans = sklearn.cluster.MiniBatchKMeans(n_clusters=10, init="k-means++", max_iter=20, random_state=1000).fit(ar)
    codes = kmeans.cluster_centers_

    vecs, _dist = scipy.cluster.vq.vq(ar, codes)      # assign codes
    counts, _bins = np.histogram(vecs, len(codes))    # count occurrences

    colors = []
    for index in np.argsort(counts)[::-1]:
        colors.append(tuple([int(code) for code in codes[index]]))
    return colors   # returns colors in order of dominance

overrides = dict()
with open('overrides/overrides.json', 'r') as f:
    overrides = json.load(f)

for idx, currency in enumerate(currencies):
    image = None
    try:
        image_name = cache_image_file_name(currency['coinGeckoId'], currency['imageUrl'])
        image_path = 'images/' + image_name
        image = Image.open(image_path)
    except:
        continue

    image = image = image.convert("RGBA")
    colors_rgba = dominant_colors(image)
    colors = map(lambda rgba: Color(rgba), colors_rgba)
    colors = filter(lambda color: color.average != 0 and color.average != 255, colors)
    colors = sorted(colors, key=lambda x: x.average)

    if len(colors) > 0:
        currency["colors"] = [
            colors[int(len(colors) * 0.33)].hex,
            colors[int(len(colors) * 0.66)].hex
        ]

    if currency['coinGeckoId'] in overrides:
        currency['colors'] = overrides[currency['coinGeckoId']]

    write(currencies, 'cache_currencies.json')
    print('\r', 'Colors', image_name, idx, '/', count, space, end='')

 Colors zytara-dollar.png 4818 / 4819                                          

In [14]:
# Rename files for @3x

import shutil

for idx, image_name in enumerate(os.listdir('images/')):
    path = 'images/' + image_name
    parts = image_name.split('.')
    shutil.copyfile(path, 'images@3x/' + parts[0] + '@3x.' + parts[1])

In [15]:
# Split metadata

metadatas = dict()
currencies_stripped = list()

for currency in currencies:
    metadata = dict()
    if 'imageUrl' in currency:
        metadata['imageUrl'] = currency['imageUrl']
    if 'rank' in currency:
        metadata['rank'] = currency['rank']
    if 'colors' in currency:
        metadata['colors'] = currency['colors']
    if 'link' in currency:
        metadata['link'] = currency['link']
    if 'description' in currency:
        metadata['description'] = currency['description']
    metadatas[currency['coinGeckoId']] = metadata

for currency in currencies:
    currency_dict = dict()
    if 'name' in currency:
        currency_dict['name'] = currency['name']
    if 'symbol' in currency:
        currency_dict['symbol'] = currency['symbol']
    if 'decimals' in currency:
        currency_dict['decimals'] = currency['decimals']
    if 'platforms' in currency and len(currency['platforms']) != 0:
        currency_dict['address'] = currency['platforms']['ethereum']
    if 'address' in currency_dict:
        currency_dict['type'] = 2
    else:
        currency_dict['type'] = 1
    if 'coinGeckoId' in currency:
        currency_dict['coinGeckoId'] = currency['coinGeckoId']

    currencies_stripped.append(currency_dict)

write(metadatas, 'cache_metadatas.json')
write(currencies_stripped, 'cache_currencies.json')

In [16]:
# update decimals from cache_currencies_with_decimals

with open('cache_currencies_with_decimals.json', 'r') as f:
  currencies_with_decimals = json.load(f)

decimals = dict()

for currency in currencies_with_decimals:
    if 'decimals' in currency:
        decimals[currency['coinGeckoId']] = currency

for currency in currencies_stripped:
    id = currency['coinGeckoId']
    if id in decimals and decimals[id] != 18:
        currency['decimals'] = decimals[id]['decimals']

write(currencies_stripped, 'cache_currencies.json')