In [1]:

REPOSITORY_CORE50_NPZ_128 = "http://bias.csr.unibo.it/maltoni/download/core50/core50_imgs.npz"
REPOSITORY_CORE50_PATHS = "https://vlomonaco.github.io/core50/data/paths.pkl"
REPOSITORY_CORE50_LABELS = "https://vlomonaco.github.io/core50/data/labels.pkl"
REPOSITORY_CORE50_LUP = "https://vlomonaco.github.io/core50/data/LUP.pkl"
md5 = {
    "core50_imgs.npz": "3689d65d0a1c760b87821b114c8c4c6c",
    "labels.pkl": "281c95774306a2196f4505f22fd60ab1",
    "paths.pkl": "b568f86998849184df3ec3465290f1b0",
    "LUP.pkl": "33afc26faa460aca98739137fdfa606e"
}
FOLDER = "./datasets/core50"
import os
import requests
import pickle
import numpy as np
import sys
from tqdm import tqdm
import hashlib

In [2]:
os.makedirs(FOLDER, exist_ok=True)
def download_file(url, file_path):
    response = requests.get(url, stream=True)
    total_size_in_bytes = int(response.headers.get('content-length', 0))
    progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
    with open(file_path, 'wb') as file:
        for data in response.iter_content(1024):
            progress_bar.update(len(data))
            file.write(data)
    progress_bar.close()
    if not checksum(file_path, md5[os.path.basename(file_path)]):
        print("Checksum failed. Deleting file.")
        os.remove(file_path)
        sys.exit(1)
    else:
        print("Checksum validated for " + file_path)

def checksum(file_path, md5):
    with open(file_path, "rb") as f:
        file_hash = hashlib.md5()
        while chunk := f.read(4096):
            file_hash.update(chunk)
    return file_hash.hexdigest() == md5

if not os.path.exists(FOLDER + "/core50_imgs.npz"):
    print("Downloading dataset core50...")
    download_file(REPOSITORY_CORE50_NPZ_128, FOLDER + "/core50_imgs.npz")
if not os.path.exists(FOLDER + "/paths.pkl"):
    print("Downloading paths...")
    download_file(REPOSITORY_CORE50_PATHS, FOLDER + "/paths.pkl")
if not os.path.exists(FOLDER + "/labels.pkl"):
    print("Downloading labels...")
    download_file(REPOSITORY_CORE50_LABELS, FOLDER + "/labels.pkl")
if not os.path.exists(FOLDER + "/LUP.pkl"):
    print("Downloading lup...")
    download_file(REPOSITORY_CORE50_LUP, FOLDER + "/LUP.pkl")
print("Download completed.")


Downloading paths...


100%|██████████| 5.28M/5.28M [00:00<00:00, 11.1MiB/s]


Checksum validated for ./datasets/core50/paths.pkl
Downloading labels...


100%|██████████| 26.5M/26.5M [00:02<00:00, 11.5MiB/s]


Checksum validated for ./datasets/core50/labels.pkl
Downloading lup...


100%|██████████| 55.5M/55.5M [00:04<00:00, 11.6MiB/s]


Checksum validated for ./datasets/core50/LUP.pkl
Download completed.


In [3]:
# Load the npz file
bin_path = FOLDER + "/core50_img.bin"
if os.path.exists(bin_path):
   data = np.fromfile(bin_path, dtype=np.uint8).reshape(164866, 128, 128, 3)
else:
    data = np.load(FOLDER + "/core50_imgs.npz")['x']
    data.tofile(bin_path)
labels = pickle.load(open(FOLDER + "/labels.pkl", "rb"))
paths = pickle.load(open(FOLDER + "/paths.pkl", "rb"))
lup = pickle.load(open(FOLDER + "/LUP.pkl", "rb"))

(164866, 128, 128, 3)
8
164866
8
