In [1]:
import os
import csv
from io import BytesIO

import requests
from PIL import Image
from dotenv import load_dotenv

### Загружаем переменные окружения

In [2]:
load_dotenv()

True

### Параметры:

In [3]:
SAVE_DIR = "dataset/images"
LABELS_FILE = os.path.join(SAVE_DIR, "data.csv")
BATCH_SIZE = 50
IMG_SIZE = (128, 128)

In [4]:
API_URLS = {
    "cat": "https://api.thecatapi.com/v1/images/search",
    "dog": "https://api.thedogapi.com/v1/images/search"
}

In [5]:
HEADERS = {
    "cat": {"x-api-key": os.getenv("API_KEY_cat")},
    "dog": {"x-api-key": os.getenv("API_KEY_dog")}
}

### Функции ляляля

In [6]:
def download_image(url, path):
    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()
        img = Image.open(BytesIO(response.content)).resize(IMG_SIZE)
        img.save(path, format="PNG")
        return True
    except Exception as e:
        print(f"Error {url}: {e}")
        return False

In [7]:
def get_images(num_images, label, labels, category) -> None:
    downloaded = len(labels)
    while downloaded < num_images:
        try:            
            response = requests.get(
                API_URLS[category],
                params={"limit": min(BATCH_SIZE, num_images - downloaded)},
                headers=HEADERS[category]
            )
            response.raise_for_status()
            images = response.json()

            for img_data in images:
                img_url = img_data.get("url")
                if not img_url:
                    continue

                img_name = f"image_{downloaded + 1}.png"
                img_path = os.path.join(SAVE_DIR, img_name)

                if download_image(img_url, img_path):
                    labels.append((img_name, label))
                    downloaded += 1

                if downloaded >= num_images:
                    break
        except requests.RequestException as e:
            print(f"Error API: {e}")

### Список меток изображений

In [8]:
labels = []

### Скачиваем кошек и не кошек

In [9]:
get_images(1000, 1, labels, "cat")

In [11]:
get_images(2000, 0, labels, "dog")



### Сохраняем

In [13]:
with open(LABELS_FILE, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["image", "label"])
    writer.writerows(labels)