In [21]:
#!pip3 install opencv-python
!pip3 install numpy

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m


In [None]:
import ijson
from PIL import Image
from io import BytesIO
import requests
import cv2
import numpy as np

def download_image(url):
    print(f"Downloading: {url}")
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    try:
        response = requests.get(url, headers=headers, timeout=100)  # Добавлен таймаут
        response.raise_for_status()  # Проверка на ошибки HTTP
        image = Image.open(BytesIO(response.content))
        image.load()  # Загружаем изображение, чтобы проверить его корректность
        return image
    except Exception as e:
        print(f"Error downloading image from {url}: {e}")
        return None

def crop_to_multiple_of_64(image):
    try:
        width, height = image.size  # Получаем размеры изображения
        new_width = (width // 64) * 64
        new_height = (height // 64) * 64
        return image.crop((0, 0, new_width, new_height))
    except Exception as e:
        print(f"Error cropping image: {e}")
        return None

def downscale_image_opencv(image, max_size):
    try:
        image = np.array(image)
        height, width = image.shape[:2]
        if width > height:
            new_width = max_size
            new_height = int(height * (max_size / width))
        else:
            new_height = max_size
            new_width = int(width * (max_size / height))
        resized_image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
        return resized_image
    except Exception as e:
        print(f"Error downscaling image: {e}")
        return None

def iterate_range(file_path, start, end):
    with open(file_path, 'r', encoding='utf-8') as file:
        parser = ijson.items(file, 'item')
        for i, item in enumerate(parser, start=1):
            try:
                if start <= i <= end:
                    print(f"Processing item {i}: {item['f']}")
                    
                    # Скачиваем изображение
                    image = download_image(item["f"])
                    if image is None:
                        print(f"Skipping item {i} due to download error.")
                        continue

                    # Проверяем, что изображение корректно
                    if not hasattr(image, "size") or not isinstance(image.size, tuple):
                        print(f"Invalid image size for item {i}. Skipping.")
                        continue

                    # Даунсемплим изображение
                    resized_image = downscale_image_opencv(image, 1536)
                    if resized_image is None:
                        print(f"Skipping item {i} due to downscaling error.")
                        continue

                    image = Image.fromarray(resized_image)
                    # Обрезаем изображение
                    cropped_image = crop_to_multiple_of_64(image)
                    if cropped_image is None:
                        print(f"Skipping item {i} due to cropping error.")
                        continue

                    folder = "/Users/v.kulibaba/Desktop"

                    # Сохраняем изображение
                    output_path = f"{folder}/gb_{start}_{end}_{i}.jpg"
                    image.save(output_path, quality=96)
                    print(f"Saved image {i} to {output_path}")


                    # Удаляем переводы строк и заменяем _ на пробелы
                    processed_text = item['t'].replace("\n", "").replace("_", " ")

                    # Сохраняем в файл
                    output_file = f"{folder}/gb_{start}_{end}_{i}.txt"
                    with open(output_file, 'w', encoding='utf-8') as file:
                        file.write(processed_text)

                    print(f"Текст сохранён в файл: {output_file}")

                elif i > end:
                    break

            except Exception as e:
                print(f"Error processing item {i}: {e}")
                continue

# Пример использования
iterate_range('/Users/v.kulibaba/Pictures/sana/hqdataset.txt', 10, 12)


Processing item 10: https://img3.gelbooru.com/images/00/00/00008e64de644368730f30997cf027d4.jpg
Downloading: https://img3.gelbooru.com/images/00/00/00008e64de644368730f30997cf027d4.jpg
Saved image 10 to /Users/v.kulibaba/Desktop/gb_10_12_10.jpg
Текст сохранён в файл: /Users/v.kulibaba/Desktop/gb_10_12_10.txt
Processing item 11: https://img3.gelbooru.com/images/00/00/000091e6c39a427fd1d82782bda05f6c.jpg
Downloading: https://img3.gelbooru.com/images/00/00/000091e6c39a427fd1d82782bda05f6c.jpg
Saved image 11 to /Users/v.kulibaba/Desktop/gb_10_12_11.jpg
Текст сохранён в файл: /Users/v.kulibaba/Desktop/gb_10_12_11.txt
Processing item 12: https://img3.gelbooru.com/images/00/00/0000a8790f26ff6850b0eec9d7c8f79a.jpg
Downloading: https://img3.gelbooru.com/images/00/00/0000a8790f26ff6850b0eec9d7c8f79a.jpg
Saved image 12 to /Users/v.kulibaba/Desktop/gb_10_12_12.jpg
Текст сохранён в файл: /Users/v.kulibaba/Desktop/gb_10_12_12.txt


In [None]:
ASPECT_RATIO_384 = {}
width = 192
height = 768
step = 64

# Перебираем все возможные значения ширины и высоты с шагом 64
for w in range(width, height + 1, step):  # Диапазон ширины
    for h in range(width, height + 1, step):  # Диапазон высоты
        ratio = round(w / h, 2)  # Вычисляем соотношение сторон и округляем до 2 знаков
        ASPECT_RATIO_384[str(ratio)] = [float(w), float(h)]  # Добавляем в словарь

# Отсортировать словарь по ключу
ASPECT_RATIO_384 = dict(sorted(ASPECT_RATIO_384.items()))

# Вывод словаря в нужном формате
output = "ASPECT_RATIO_384 = {\n"
for key, value in ASPECT_RATIO_384.items():
    output += f'    "{key}": [{value[0]}, {value[1]}],\n'
output += "}"

print(output)



ASPECT_RATIO_384 = {
    "0.25": [192.0, 768.0],
    "0.27": [192.0, 704.0],
    "0.3": [192.0, 640.0],
    "0.33": [256.0, 768.0],
    "0.36": [256.0, 704.0],
    "0.38": [192.0, 512.0],
    "0.4": [256.0, 640.0],
    "0.42": [320.0, 768.0],
    "0.43": [192.0, 448.0],
    "0.44": [256.0, 576.0],
    "0.45": [320.0, 704.0],
    "0.5": [384.0, 768.0],
    "0.55": [384.0, 704.0],
    "0.56": [320.0, 576.0],
    "0.57": [256.0, 448.0],
    "0.58": [448.0, 768.0],
    "0.6": [384.0, 640.0],
    "0.62": [320.0, 512.0],
    "0.64": [448.0, 704.0],
    "0.67": [512.0, 768.0],
    "0.7": [448.0, 640.0],
    "0.71": [320.0, 448.0],
    "0.73": [512.0, 704.0],
    "0.75": [576.0, 768.0],
    "0.78": [448.0, 576.0],
    "0.8": [512.0, 640.0],
    "0.82": [576.0, 704.0],
    "0.83": [640.0, 768.0],
    "0.86": [384.0, 448.0],
    "0.88": [448.0, 512.0],
    "0.89": [512.0, 576.0],
    "0.9": [576.0, 640.0],
    "0.91": [640.0, 704.0],
    "0.92": [704.0, 768.0],
    "1.0": [768.0, 768.0],
    "1.

In [None]:
import os
from PIL import Image

import cv2
import numpy as np
from PIL import Image

def downscale_image_by(image, max_size,x=64):
    try:
        image = np.array(image)
        height, width = image.shape[:2]
        if width > height:
            new_width = max_size
            new_height = int(height * (max_size / width))
        else:
            new_height = max_size
            new_width = int(width * (max_size / height))
        new_width = (new_width // x) * x
        new_height = (new_height // x) * x
        image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
        image = Image.fromarray(image)
        if image.mode == "RGBA":
            image = image.convert("RGB")
        return image
    except Exception as e:
        print(f"Error downscaling image: {e}")
        return None


def process_images_in_directory(input_dir, output_dir, max_size=768):
    # Создаем выходную директорию, если её нет
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Перебираем все файлы в директории
    for filename in os.listdir(input_dir):
        try:
            # Проверяем, является ли файл изображением
            if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
                input_path = os.path.join(input_dir, filename)
                print(f"Processing image: {input_path}")

                # Открываем изображение
                image = Image.open(input_path)

                # Обрезаем изображение
                image = downscale_image_by(image,768,64)
                if image is None:
                    print(f"Skipping {filename} due to cropping error.")
                    continue

                # Сохраняем обработанное изображение
                name, _ = os.path.splitext(filename)
                output_path = os.path.join(output_dir, f"{name}.jpg")
                image.save(output_path, quality=96)
                print(f"Saved processed image: {output_path}")

        except Exception as e:
            print(f"Error processing {filename}: {e}")
            continue

# Пример использования
input_directory = "/Users/v.kulibaba/Desktop/1"  # Укажите путь к папке с изображениями
output_directory = "/Users/v.kulibaba/Desktop/2"  # Укажите путь к папке для сохранения обработанных изображений
process_images_in_directory(input_directory, output_directory)

Processing image: /Users/v.kulibaba/Desktop/1/cropped_image.png
Saved processed image: /Users/v.kulibaba/Desktop/2/cropped_image.png
Processing image: /Users/v.kulibaba/Desktop/1/gb_10_12_11.jpg
Saved processed image: /Users/v.kulibaba/Desktop/2/gb_10_12_11.jpg
Processing image: /Users/v.kulibaba/Desktop/1/gb_10_12_10.jpg
Saved processed image: /Users/v.kulibaba/Desktop/2/gb_10_12_10.jpg
Processing image: /Users/v.kulibaba/Desktop/1/gb_10_12_12.jpg
Saved processed image: /Users/v.kulibaba/Desktop/2/gb_10_12_12.jpg
Processing image: /Users/v.kulibaba/Desktop/1/gb_0_10000_1.jpg
Saved processed image: /Users/v.kulibaba/Desktop/2/gb_0_10000_1.jpg
Processing image: /Users/v.kulibaba/Desktop/1/gb_0_10000_9948.jpg
Saved processed image: /Users/v.kulibaba/Desktop/2/gb_0_10000_9948.jpg
