# Download images
---
### This script downloads images from https://battlecatsinfo.github.io saving to [cat](data/cat) an [background](data/background) respectively.

In [3]:
import os
import logging
import urllib
from urllib.error import HTTPError
from concurrent.futures import ThreadPoolExecutor
import requests
from furl import furl
from itertools import count

cats_path = 'data/cat'
bg_path = 'data/background'
base_url = furl('https://battlecatsinfo.github.io')

logger = logging.getLogger()
logger.setLevel(logging.INFO)

### Helper function

In [2]:
from itertools import takewhile


def find_max_index(url: str):
    def key(index: int):
        f = url.format(id=index)
        res = requests.head(f)
        logger.info(f'{f} returned {res.status_code}')
        return res.status_code == 200

    p = list(takewhile(key, (2 ** i for i in count())))
    lo = p[-1]
    hi = 2 * lo
    while lo < hi:
        mid = (lo + hi) // 2
        if key(mid):
            lo = mid + 1
        else:
            hi = mid
    return lo - 1


### Download cat images

In [3]:
# Download cat images

_cats_url = f'{base_url.url}/img/u/{{id:03}}/{{fcs}}/uni{{id:03}}_{{fcs}}00.png'


def download_cats(start: int = 0, force_download=False):
    if not os.path.exists(cats_path):
        os.mkdir(cats_path)
    elif not force_download and any((file.endswith('.png') for file in os.listdir(cats_path))):
        return

    def download(cat_id: int):
        first = _cats_url.format(id=cat_id, fcs='f')
        if requests.head(first).status_code != 200:
            logging.warning(f'failed to download cat[{cat_id}]')
            return
        try:
            for fcs in ['f', 'c', 's']:
                src = _cats_url.format(id=cat_id, fcs=fcs)
                logging.debug(f'Downloading {src}')
                output_path = f'{cats_path}/{cat_id:003}_{fcs}.png'
                image = requests.get(src)
                with open(output_path, "wb") as f:
                    f.write(image.content)
        except HTTPError:
            pass
        logging.info(f'downloaded cat[{cat_id}]')

    if not os.path.exists(cats_path):
        os.mkdir(cats_path)
    elif not force_download and any((file.endswith('.png') for file in os.listdir(cats_path))):
        return

    f_url = f'{base_url.url}/img/u/{{id:03}}/f/uni{{id:03}}_f00.png'
    max_index = find_max_index(f_url)
    logging.info(f'maximum index is {max_index}')

    with ThreadPoolExecutor(max_workers=10) as executor:
        for i in range(start, max_index + 1):
            executor.submit(download, i)


download_cats()

INFO:root:maximum index is 761
INFO:root:downloaded cat[2]
INFO:root:downloaded cat[0]
INFO:root:downloaded cat[8]
INFO:root:downloaded cat[9]
INFO:root:downloaded cat[4]
INFO:root:downloaded cat[5]
INFO:root:downloaded cat[1]
INFO:root:downloaded cat[7]
INFO:root:downloaded cat[3]
INFO:root:downloaded cat[6]
INFO:root:downloaded cat[10]
INFO:root:downloaded cat[13]
INFO:root:downloaded cat[14]
INFO:root:downloaded cat[12]
INFO:root:downloaded cat[16]
INFO:root:downloaded cat[11]
INFO:root:downloaded cat[15]
INFO:root:downloaded cat[18]
INFO:root:downloaded cat[19]
INFO:root:downloaded cat[17]
INFO:root:downloaded cat[20]
INFO:root:downloaded cat[22]
INFO:root:downloaded cat[24]
INFO:root:downloaded cat[21]
INFO:root:downloaded cat[25]
INFO:root:downloaded cat[23]
INFO:root:downloaded cat[26]
INFO:root:downloaded cat[28]
INFO:root:downloaded cat[27]
INFO:root:downloaded cat[29]
INFO:root:downloaded cat[30]
INFO:root:downloaded cat[31]
INFO:root:downloaded cat[32]
INFO:root:downloaded c

### Download backgrounds

In [4]:
# download backgrounds

def download_backgrounds(start=0, force_download=False):
    if not os.path.exists(bg_path):
        os.mkdir(bg_path)
    elif not force_download and any((file.endswith('.png') for file in os.listdir(bg_path))):
        return

    bg_url = f'{base_url}/img/bg/bg{{id:03}}.png'
    max_index = find_max_index(bg_url)
    logging.info(f'maximum index is {max_index}')

    def download(bg_id: int):
        src = base_url.copy().add(path=f'img/bg/bg{bg_id:03}.png').url
        output_path = f'{bg_path}/bg{bg_id:003}.png'
        image = requests.get(src)
        if image.status_code == 200:
            with open(output_path, "wb") as f:
                f.write(image.content)
            logging.info(f'downloaded bg[{bg_id}]')
        else:
            logging.warning(f'failed to download bg[{bg_id}]')

    with ThreadPoolExecutor(max_workers=10) as executor:
        for i in range(start, max_index + 1):
            executor.submit(download, i)


download_backgrounds()

In [4]:
import cv2
import numpy as np

_cats_url = f'{base_url.url}/img/u/{{id:03}}/{{fcs}}/uni{{id:03}}_{{fcs}}00.png'
uurl = _cats_url.format(id=1, fcs='f')
req = urllib.request.urlopen(uurl)
arr = np.asarray(bytearray(req.content), dtype=np.uint8)
img = cv2.imdecode(arr, -1)  # 'Load it as it is'

cv2.imshow('lalala', img)
if cv2.waitKey() & 0xff == 27: quit()

error: OpenCV(4.10.0) D:\a\opencv-python\opencv-python\opencv\modules\highgui\src\window.cpp:1301: error: (-2:Unspecified error) The function is not implemented. Rebuild the library with Windows, GTK+ 2.x or Cocoa support. If you are on Ubuntu or Debian, install libgtk2.0-dev and pkg-config, then re-run cmake or configure script in function 'cvShowImage'


In [8]:
import os
cats_path = 'data\\cat'
cats = [cat.path for cat in os.scandir(cats_path) if cat.name.endswith('.png')]

for cat in cats:
    try:
        img = cv2.imread(cat,cv2.IMREAD_UNCHANGED)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    except:
        print(f'failed to read cat[{cat}]')
        os.remove(cat)
