In [None]:
import os
import cv2
import numpy as np
import shutil
import random
import tensorflow as tf
from PIL import Image
import tifffile as tiff
from imgaug import augmenters as iaa
from imgaug.augmenters import Sequential


In [None]:
from google.colab import drive
drive.mount('/content/drive')

!ls "/content/drive/My Drive"

Объявляем пути к файлам с исходными данными.

In [None]:
base_dir = '/content/drive/MyDrive/tiff/'
base_dir_tiff = os.path.join(base_dir,'tif_data')
base_dir_new ='/content/drive/MyDrive/tiff/new_tif_data'
#каталог с данными для обучения
train_dir = os.path.join(base_dir, 'train')
#каталог с данными для проверки
validation_dir = os.path.join(base_dir, 'val')
#каталог с данными для тестирования
test_dir = os.path.join(base_dir, 'test')

 Функция вывода информации о цветовом пространстве изображения

In [None]:
def get_color_space(image_path):
    with Image.open(image_path) as img:
        return img.mode

image_path = "/content/01_06_2023_3_1.tif"
color_space = get_color_space(image_path)
print("Цветовое пространство изображения:", color_space)

Функция вывода информации о размерах изображений

In [None]:
def get_size_image(path, folders):
    for folder in folders:
        print(f"Для папки {folder}:")
        folder_path = f'{path}/{folder}'

        smallest_size = None
        largest_size = None
        sum_sizes = 0
        num_images = 0

        for filename in os.listdir(folder_path):
            if filename.endswith(".tif"):
                with Image.open(os.path.join(folder_path, filename)) as img:
                    size = img.size
                    sum_sizes += size[0] * size[1]
                    num_images += 1
                    if smallest_size is None or size[0] * size[1] < smallest_size[0] * smallest_size[1]:
                        smallest_size = size
                    if largest_size is None or size[0] * size[1] > largest_size[0] * largest_size[1]:
                        largest_size = size

        if smallest_size:
            print(f"\tСамый маленький размер изображения: {smallest_size}")
        else:
            print("\tНет изображений для определения самого маленького размера.")
        if largest_size:
            print(f"\tСамый большой размер изображения: {largest_size}")
        else:
            print("\tНет изображений для определения самого большого размера.")
        if num_images > 0:
            average_size = sum_sizes / num_images
            print(f"\tСредний размер изображений: {round(average_size, 3)}")
        else:
            print("\tНет изображений для расчета среднего размера.")
        print()

In [None]:
folders = ['tiff_3', 'tiff_4', 'tiff_5', 'tiff_6']
get_size_image(base_dir_tiff, folders)

Функция расчета количества файлов в определенном диапазоне размеров

In [None]:
def count_image_sizes(folder_path):
    for filename in os.listdir(folder_path):

        sizes_count = {
        '215000-225000': 0, '205000-215000': 0, '195000-205000': 0, '185000-195000': 0, '175000-185000': 0,
        '165000-175000': 0, '155000-165000': 0, '145000-155000': 0, '135000-145000': 0, '125000-135000': 0,
        '115000-125000': 0, '105000-115000': 0, '95000-105000': 0, '85000-95000': 0, '75000-85000': 0,
        '65000-75000': 0, '55000-65000': 0, '45000-55000': 0, '35000-45000': 0, '25000-35000': 0,
        '15000-25000': 0, '10000-15000': 0, '5000-10000': 0, '2500-5000': 0, '1500-2500': 0,
        '1000-1500': 0, '500-1000': 0, '250-500': 0, '0-250': 0
        }
        dir_name = os.path.join(folder_path, filename)
        for dir_entry in os.listdir(dir_name):
            if dir_entry.endswith('.tiff') or dir_entry.endswith('.tif'):
                image_path = os.path.join(dir_name, dir_entry)
                img = tiff.imread(image_path)
                height, width = img.shape[:2]
                img_size = height * width

                for size_range, count in sizes_count.items():
                    start, end = map(int, size_range.split('-'))
                    if start <= img_size < end:
                        sizes_count[size_range] += 1
                        break


        print(f"Результаты подсчета размеров изображений в папке {dir_name}:")
        for size_range, count in sizes_count.items():
            if count > 0:
                print(f"Размер {size_range} пикселей: {count} изображений")

        valid_sizes = {size: count for size, count in sizes_count.items() if count > 0}

        top_5_sizes = sorted(valid_sizes.items(), key=lambda x: x[1], reverse=True)[:5]
        print(f"5 самых больших значений (не равных 0) в папке {filename}:")
        for size, count in top_5_sizes:
            print(f"{size}: {count} изображений")
        print()

In [None]:
count_image_sizes(base_dir_tiff)

Функция get_size (data_dir) для получения количества файлов в папке.

In [None]:
def get_size (dir_name):
    for culture_folder_name in os.listdir(dir_name):
        culture_folder_path = dir_name + "/" + culture_folder_name
        file_count = len(os.listdir(culture_folder_path))
        print(f'Количество файлов в папке {culture_folder_name}: {file_count}')

In [None]:
get_size (base_dir_tiff)

In [None]:
# размеры изображения
img_width, img_height = 122, 122

Функция обработки изображений

In [None]:
def processing_of_images(image_path, output_path):
    image = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
    normalized_image = cv2.normalize(image, None, 0, 65535, cv2.NORM_MINMAX)
    resized_image = cv2.resize(normalized_image, (img_width, img_height), interpolation=cv2.INTER_CUBIC)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(4, 4))
    clahe_image = clahe.apply(resized_image)
    resized_image_pil = Image.fromarray(clahe_image)
    resized_image_pil.save(output_path, "PNG", compress_level=0, bits=16)


In [None]:
for culture_folder in os.listdir(base_dir_tiff):
    culture_folder_name = os.path.join(base_dir_tiff, culture_folder)
    for culture_image in os.listdir(culture_folder_name):
      if culture_image.endswith(".tif"):
        image_path = os.path.join(culture_folder_name,culture_image)
        out = os.path.join(base_dir_new,culture_folder, culture_image.split('.')[0] + '.png')
        processing_of_images(image_path,out)


Разделение файлов на обучающую, проверочную и тестовую выборки.

In [None]:
source_folders = ['tiff_3', 'tiff_4', 'tiff_5', 'tiff_6']

total_files = {}
for folder in source_folders:
    file_count = len(os.listdir(f'{base_dir_new}/{folder}'))
    total_files[folder] = file_count

train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

train_files = {}
val_files = {}
test_files = {}

for folder in source_folders:
    file_list = os.listdir(f'{base_dir_new}/{folder}')
    random.shuffle(file_list)

    train_count = int(train_ratio * total_files[folder])
    val_count = int(val_ratio * total_files[folder])
    test_count = total_files[folder] - train_count - val_count

    train_files[folder] = file_list[:train_count]
    val_files[folder] = file_list[train_count:train_count+val_count]
    test_files[folder] = file_list[train_count+val_count:]

    for target_folder in ['train', 'val', 'test']:
        os.makedirs(f'{base_dir}/{target_folder}/{folder}', exist_ok=True)

    for file in train_files[folder]:
        shutil.copy(f'{base_dir_new}/{folder}/{file}', f'{train_dir}/{folder}/{file}')
    for file in val_files[folder]:
        shutil.copy(f'{base_dir_new}/{folder}/{file}', f'{validation_dir}/{folder}/{file}')
    for file in test_files[folder]:
        shutil.copy(f'{base_dir_new}/{folder}/{file}', f'{test_dir}/{folder}/{file}')

Функция аугментации

In [None]:
def apply_transformations(path):
    image_files = [f for f in os.listdir(path) if f.endswith(".png") and "augmented" not in f]
    aug = Sequential([
        iaa.Affine(rotate=(-25, 25), translate_percent={"x": (-0.05, 0.05), "y": (-0.05, 0.05)}),
        iaa.PerspectiveTransform(scale=(0.01, 0.10))
        ])
    for idx, filename in enumerate(image_files):
        image = cv2.imread(os.path.join(path, filename))
        augmented_image = aug.augment_image(image)
        new_image_path = f'{os.path.splitext(filename)[0]}_augmented.png'
        cv2.imwrite(os.path.join(path, new_image_path), augmented_image)

In [None]:
path_tiff_folders = ['tiff_3', 'tiff_4', 'tiff_5', 'tiff_6']
for folder_name in path_tiff_folders:
    folder_path = os.path.join('/content/drive/MyDrive/tiff/train', folder_name)
    if os.path.exists(folder_path) and os.path.isdir(folder_path):
        apply_transformations(folder_path)
    else:
        print(f"Папка {folder_name} не найдена или не является директорией.")


Создание директорий для подмножеств

In [None]:
for i in range(1, 5):
  work_dir = os.path.join(base_dir, f'work_{i}')
  os.makedirs(work_dir, exist_ok=True)

Функция копирования файлов целевых классов

In [None]:
def copy_files(base_dir, data_type, tiff_num, current_dir):
    tiff_all_dir = os.path.join(current_dir, 'tiff_all')
    os.makedirs(tiff_all_dir, exist_ok=True)

    shutil.copytree(os.path.join(base_dir, data_type, f'tiff_{tiff_num}'),
                    os.path.join(current_dir, f'tiff_{tiff_num}'))


Функция копирования файлов нецелевых классов

In [None]:
def select_and_copy_files(base_dir, data_type, nums, current_dir):
    all_files = []
    for tiff_num in nums:
        all_files.extend([os.path.join(base_dir, data_type,f'tiff_{tiff_num}', file)
        for file in os.listdir(os.path.join(base_dir, data_type, f'tiff_{tiff_num}'))])

    selected_files = random.sample(all_files, k=int(0.33 * len(all_files)))
    random.shuffle(selected_files)

    tiff_all_dir = os.path.join(current_dir, 'tiff_all')
    os.makedirs(tiff_all_dir, exist_ok=True)

    for file_path in selected_files:
        shutil.copyfile(file_path, os.path.join(tiff_all_dir, os.path.basename(file_path)))


In [None]:
nums_dict = {1: ['4', '5', '6'], 2: ['3', '5', '6'], 3: ['3', '4', '6'], 4: ['3', '4', '5']}
tiff_nums = {1:3, 2:4, 3:5, 4:6}

for i in range(1, 5):
    work_dir = os.path.join(base_dir, f'work_{i}')

    for data_type in ['train', 'test', 'val']:
        current_dir = os.path.join(work_dir, data_type)

        copy_files(base_dir, data_type, tiff_nums.get(i), current_dir)
        select_and_copy_files(base_dir, data_type, nums_dict.get(i), current_dir)