In [1]:
import glob
import json
import os
from pathlib import Path
import requests
import tarfile
from urllib.parse import urlparse

data_file = os.getenv('DATASET_URL', 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz')

data_dir_name = 'data'

print('Downloading data file {} ...'.format(data_file))
r = requests.get(data_file)
if r.status_code != 200:
    raise RuntimeError('Could not fetch {}: HTTP status code {}'
                       .format(data_file, r.status_code))
else:
    # extract data set file name from URL
    data_file_name = Path((urlparse(data_file).path)).name
    # create the directory where the downloaded file will be stored
    data_dir = Path(data_dir_name)
    data_dir.mkdir(parents=True, exist_ok=True)
    downloaded_data_file = data_dir / data_file_name

    print('Saving downloaded file "{}" as ...'.format(data_file_name))
    with open(downloaded_data_file, 'wb') as downloaded_file:
        downloaded_file.write(r.content)

    print('Extracting downloaded file in directory "{}" ...'
          .format(data_dir))
    with tarfile.open(downloaded_data_file, 'r') as tar:
        tar.extractall(data_dir)
    print('Removing downloaded file ...')
    downloaded_data_file.unlink()

Downloading data file https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz ...


Saving downloaded file "cifar-10-python.tar.gz" as ...
Extracting downloaded file in directory "data" ...


Removing downloaded file ...
