In [1]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
import joblib
from torchvision.models import swin_v2_s
import albumentations as A
from albumentations.pytorch import ToTensorV2
from ultralytics import YOLO


model_seg = YOLO(model="finalseg.pt")
model_det = YOLO(model="best.pt")
batch_size = 500

best_rf = joblib.load('best_rf_model.pkl')
device = 'cuda' if torch.cuda.is_available() else 'cpu'

class_labels = {1: 'good', 0: 'bad'}

test_transform = A.Compose([
    A.Resize(224, 224),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])

def get_swin_model():
    model = swin_v2_s(pretrained=False)
    model.head = nn.Linear(model.head.in_features, 2) 
    checkpoint = torch.load('best_swin.pth', map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.to(device)
    model.eval()
    return model

swin_model = get_swin_model()

def tensor_from_images(image):
    image = test_transform(image=np.array(image))['image'].unsqueeze(0).to(device)
    return image

def predict_swin(image, model):
    tensor = tensor_from_images(image)
    with torch.no_grad():
        outputs = model(tensor)
        _, preds = torch.max(outputs, 1)
        return preds.item()



In [3]:
images_folder = 'D:/hackKalin/test_images_submit'

# YOLO detection + SWIN

In [None]:
df_results = pd.DataFrame(columns=["Name", "Bbox", "Class"])

for image_name in os.listdir(images_folder):
    if image_name.lower().endswith(('.jpg', '.jpeg', '.png')):
        image_path = os.path.join(images_folder, image_name)
        
        image = Image.open(image_path)
        image_width, image_height = image.size
        print(f"Обрабатывается изображение: {image_name}, размер: {image_width}x{image_height}")

        # ДЕТЕКЦИЯ
        detection_results = model_det.predict(image, iou=0.5, conf=0.52)
        animal_count = sum(len(result.boxes) for result in detection_results)
        print(f"Количество объектов животных на {image_name}: {animal_count}")

        # КРОП ПО ДЕТЕКЦИИ
        for result in detection_results:
            for i, box in enumerate(result.boxes):
                x_min, y_min, x_max, y_max = map(int, box.xyxy[0])
                x_min = max(0, x_min)
                y_min = max(0, y_min)
                x_max = min(image_width, x_max)
                y_max = min(image_height, y_max)
                
                # ВЫРЕЗАЕМ
                cropped_image = image.crop((x_min, y_min, x_max, y_max))
                cropped_image_np = np.array(cropped_image)
                
                cropped_height, cropped_width = cropped_image_np.shape[:2]
                print(f"Размер обрезанного изображения для объекта {i+1}: {cropped_width}x{cropped_height}")
                
                center_x = (x_min + x_max) / 2 / image_width
                center_y = (y_min + y_max) / 2 / image_height
                norm_width = (x_max - x_min) / image_width
                norm_height = (y_max - y_min) / image_height
                bbox_str = f"{center_x:.5f},{center_y:.5f},{norm_width:.5f},{norm_height:.5f}"

                if cropped_width < 180 and cropped_height < 180:
                    pred_det_class_encoded = 0
                else:
                    swin_predicted_class = predict_swin(cropped_image, swin_model)
                    pred_det_class_encoded = 1 if swin_predicted_class == 1 else 0

                new_row = pd.DataFrame([{
                    "Name": image_name,
                    "Bbox": bbox_str,
                    "Class": pred_det_class_encoded,
                }])
                df_results = pd.concat([df_results, new_row], ignore_index=True)


df_results.to_csv('predictions.csv', index=False)
print("Предсказания сохранены в файл predictions.csv")


Обрабатывается изображение: 1746607.jpg, размер: 1920x1440

0: 480x640 1 part_animal, 54.0ms
Speed: 2.0ms preprocess, 54.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)
Количество объектов животных на 1746607.jpg: 1
Размер обрезанного изображения для объекта 1: 1381x1043
Обрабатывается изображение: 1746821.jpg, размер: 1920x1440

0: 480x640 1 part_animal, 1 animal, 45.0ms
Speed: 1.0ms preprocess, 45.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)
Количество объектов животных на 1746821.jpg: 2
Размер обрезанного изображения для объекта 1: 132x117
Размер обрезанного изображения для объекта 2: 168x363
Обрабатывается изображение: 1747740.jpg, размер: 1920x1440

0: 480x640 1 animal, 40.0ms
Speed: 2.0ms preprocess, 40.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)
Количество объектов животных на 1747740.jpg: 1
Размер обрезанного изображения для объекта 1: 104x142
Обрабатывается изображение: 1747762.jpg, размер: 1920x1440

0: 480

In [8]:
for image_name in os.listdir(images_folder):
    if image_name.lower().endswith(('.jpg', '.jpeg', '.png')):
        image_path = os.path.join(images_folder, image_name)
        
        image = Image.open(image_path)
        image_width, image_height = image.size
        print(f"Обрабатывается изображение: {image_name}, размер: {image_width}x{image_height}")

        # ДЕТЕКЦИЯ
        detection_results = model_det.predict(image, iou=0.5, conf=0.52)
        animal_count = sum(len(result.boxes) for result in detection_results)
        print(f"Количество объектов животных на {image_name}: {animal_count}")

        # КРОП ПО ДЕТЕКЦИИ
        for result in detection_results:
            for i, box in enumerate(result.boxes):
                center_x, center_y, norm_width, norm_height = box.xywhn[0]

                x_min = max(0, int((center_x - norm_width / 2) * image_width))
                y_min = max(0, int((center_y - norm_height / 2) * image_height))
                x_max = min(image_width, int((center_x + norm_width / 2) * image_width))
                y_max = min(image_height, int((center_y + norm_height / 2) * image_height))
                
                # ВЫРЕЗАЕМ
                cropped_image = image.crop((x_min, y_min, x_max, y_max))
                cropped_image_np = np.array(cropped_image)
                
                cropped_height, cropped_width = cropped_image_np.shape[:2]
                print(f"Размер обрезанного изображения для объекта {i+1}: {cropped_width}x{cropped_height}")

                bbox_str = f"{center_x:.5f},{center_y:.5f},{norm_width:.5f},{norm_height:.5f}"

                if cropped_width < 180 and cropped_height < 180:
                    pred_det_class_encoded = 0
                else:
                    swin_predicted_class = predict_swin(cropped_image, swin_model)
                    pred_det_class_encoded = 1 if swin_predicted_class == 1 else 0

                new_row = pd.DataFrame([{
                    "Name": image_name,
                    "Bbox": bbox_str,
                    "Class": pred_det_class_encoded,
                }])
                df_results = pd.concat([df_results, new_row], ignore_index=True)

df_results.to_csv('predictions.csv', index=False)
print("Предсказания сохранены в файл predictions.csv")


Обрабатывается изображение: 1746607.jpg, размер: 1920x1440

0: 480x640 1 part_animal, 257.2ms
Speed: 2.0ms preprocess, 257.2ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)
Количество объектов животных на 1746607.jpg: 1
Размер обрезанного изображения для объекта 1: 1381x1043
Обрабатывается изображение: 1746821.jpg, размер: 1920x1440

0: 480x640 1 part_animal, 1 animal, 33.0ms
Speed: 1.0ms preprocess, 33.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)
Количество объектов животных на 1746821.jpg: 2
Размер обрезанного изображения для объекта 1: 132x117
Размер обрезанного изображения для объекта 2: 168x363
Обрабатывается изображение: 1747740.jpg, размер: 1920x1440

0: 480x640 1 animal, 35.0ms
Speed: 3.0ms preprocess, 35.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)
Количество объектов животных на 1747740.jpg: 1
Размер обрезанного изображения для объекта 1: 104x142
Обрабатывается изображение: 1747762.jpg, размер: 1920x1440

0: 4

# YOLO detection + YOLO segment

In [None]:
df_results = pd.DataFrame(columns=["Name", "Bbox", "Class"])

for image_name in os.listdir(images_folder):
    if image_name.lower().endswith(('.jpg', '.jpeg', '.png')):
        image_path = os.path.join(images_folder, image_name)
        
        image = Image.open(image_path)
        image_width, image_height = image.size
        print(f"Обрабатывается изображение: {image_name}, размер: {image_width}x{image_height}")

        # ДЕТЕКЦИЯ
        detection_results = model_det.predict(image, iou=0.5, conf=0.52)
        animal_count = sum(len(result.boxes) for result in detection_results)
        print(f"Количество объектов животных на {image_name}: {animal_count}")

        # КРОП ПО ДЕТЕКЦИИ
        for result in detection_results:
            for i, box in enumerate(result.boxes):
                x_min, y_min, x_max, y_max = map(int, box.xyxy[0])
                x_min = max(0, x_min)
                y_min = max(0, y_min)
                x_max = min(image_width, x_max)
                y_max = min(image_height, y_max)
                
                # ВЫРЕЗАЕМ
                cropped_image = image.crop((x_min, y_min, x_max, y_max))
                cropped_image_np = np.array(cropped_image)
                
                cropped_height, cropped_width = cropped_image_np.shape[:2]
                print(f"Размер обрезанного изображения для объекта {i+1}: {cropped_width}x{cropped_height}")

                center_x = (x_min + x_max) / 2 / image_width
                center_y = (y_min + y_max) / 2 / image_height
                norm_width = (x_max - x_min) / image_width
                norm_height = (y_max - y_min) / image_height
                bbox_str = f"{center_x:.5f},{center_y:.5f},{norm_width:.5f},{norm_height:.5f}"

                is_head, is_body, is_legs, is_tail = 0, 0, 0, 0

                if cropped_width < 180 and cropped_height < 180:
                    pred_seg_class = "bad"
                else:
                    # СЕГМЕНТАЦИЯ
                    segmentation_results = model_seg.predict(source=cropped_image_np, iou=0.5, conf=0.3)

                    class_counts = {"head": 0, "body": 0, "leg": 0, "tail": 0}
                    
                    for segment in segmentation_results:
                        for j, label in enumerate(segment.boxes.cls):
                            class_name = model_seg.names[int(label)]
                            class_counts[class_name] += 1
                            
                    head_count = class_counts["head"]
                    body_count = class_counts["body"]
                    leg_count = class_counts["leg"]
                    tail_count = class_counts["tail"]

                    if head_count >= 1 and body_count >= 1 and (tail_count >= 1 and leg_count >= 1):
                        pred_seg_class = "good"
                    elif head_count >= 1 and body_count >= 1 and (tail_count == 0 and leg_count >= 2):
                        pred_seg_class = "good"
                    else:
                        pred_seg_class = "bad"
                    
                    is_head, is_body, is_legs, is_tail = head_count, body_count, leg_count, tail_count

                pred_seg_class_encoded = 1 if pred_seg_class == "good" else 0

                new_row = pd.DataFrame([{
                    "Name": image_name,
                    "Bbox": bbox_str,
                    "Class": pred_seg_class_encoded
                }])
                df_results = pd.concat([df_results, new_row], ignore_index=True)


df_results.to_csv('predictions.csv', index=False)


Обрабатывается изображение: 1746607.jpg, размер: 1920x1440

0: 480x640 1 part_animal, 207.0ms
Speed: 2.0ms preprocess, 207.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)
Количество объектов животных на 1746607.jpg: 1
Размер обрезанного изображения для объекта 1: 1381x1043

0: 512x640 1 body, 1 head, 35.0ms
Speed: 1.0ms preprocess, 35.0ms inference, 3.0ms postprocess per image at shape (1, 3, 512, 640)
Обрабатывается изображение: 1746821.jpg, размер: 1920x1440

0: 480x640 1 part_animal, 1 animal, 47.0ms
Speed: 2.0ms preprocess, 47.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)
Количество объектов животных на 1746821.jpg: 2
Размер обрезанного изображения для объекта 1: 132x117
Размер обрезанного изображения для объекта 2: 168x363

0: 640x320 1 head, 36.0ms
Speed: 1.0ms preprocess, 36.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 320)
Обрабатывается изображение: 1747740.jpg, размер: 1920x1440

0: 480x640 1 animal, 40.0ms
Speed:

# Hybrid RF

In [None]:
df_results = pd.DataFrame(columns=["photo_name", "orig_width", "orig_height", "crop_width", "crop_height", 
                                   "pred_seg_class", "pred_det_class", "is_head", "is_body", "is_legs", 
                                   "is_tail", "area_head", "area_body", "area_legs", "area_tail", "Bbox"])

for image_name in os.listdir(images_folder):
    if image_name.lower().endswith(('.jpg', '.jpeg', '.png')):
        image_path = os.path.join(images_folder, image_name)
        
        image = Image.open(image_path)
        image_width, image_height = image.size
        print(f"Обрабатывается изображение: {image_name}, размер: {image_width}x{image_height}")

        # ДЕТЕКЦИЯ
        detection_results = model_det.predict(image, iou=0.5, conf=0.52)
        animal_count = sum(len(result.boxes) for result in detection_results)
        print(f"Количество объектов животных на {image_name}: {animal_count}")

        # КРОП ПО ДЕТЕКЦИИ
        for result in detection_results:
            for i, box in enumerate(result.boxes):
                x_min, y_min, x_max, y_max = map(int, box.xyxy[0])
                x_min = max(0, x_min)
                y_min = max(0, y_min)
                x_max = min(image_width, x_max)
                y_max = min(image_height, y_max)
                
                # ВЫРЕЗАЕМ
                cropped_image = image.crop((x_min, y_min, x_max, y_max))
                cropped_image_np = np.array(cropped_image)
                
                cropped_height, cropped_width = cropped_image_np.shape[:2]
                print(f"Размер обрезанного изображения для объекта {i+1}: {cropped_width}x{cropped_height}")

                center_x = (x_min + x_max) / 2 / image_width
                center_y = (y_min + y_max) / 2 / image_height
                norm_width = (x_max - x_min) / image_width
                norm_height = (y_max - y_min) / image_height
                bbox_str = f"{center_x:.5f},{center_y:.5f},{norm_width:.5f},{norm_height:.5f}"

                is_head, is_body, is_legs, is_tail = 0, 0, 0, 0
                area_head, area_body, area_legs, area_tail = 0, 0, 0, 0

                if cropped_width < 180 and cropped_height < 180:
                    pred_seg_class = "bad"
                else:
                    # СЕГМЕНТАЦИЯ
                    segmentation_results = model_seg.predict(source=cropped_image_np, iou=0.5, conf=0.3)

                    class_counts = {"head": 0, "body": 0, "leg": 0, "tail": 0}
                    class_areas = {"head": 0, "body": 0, "leg": 0, "tail": 0}
                    
                    for segment in segmentation_results:
                        for j, label in enumerate(segment.boxes.cls):
                            class_name = model_seg.names[int(label)]
                            class_counts[class_name] += 1
                            
                            mask_x_min, mask_y_min, mask_x_max, mask_y_max = map(int, segment.boxes.xyxy[j])
                            mask_area = (mask_x_max - mask_x_min) * (mask_y_max - mask_y_min)
                            class_areas[class_name] += mask_area

                    head_count = class_counts["head"]
                    body_count = class_counts["body"]
                    leg_count = class_counts["leg"]
                    tail_count = class_counts["tail"]

                    area_head = class_areas["head"]
                    area_body = class_areas["body"]
                    area_legs = class_areas["leg"]
                    area_tail = class_areas["tail"]

                    if head_count >= 1 and body_count >= 1 and (tail_count >= 1 and leg_count >= 1):
                        pred_seg_class = "good"
                    elif head_count >= 1 and body_count >= 1 and (tail_count == 0 and leg_count >= 2):
                        pred_seg_class = "good"
                    else:
                        pred_seg_class = "bad"
                    
                    is_head, is_body, is_legs, is_tail = head_count, body_count, leg_count, tail_count

                pred_seg_class_encoded = 1 if pred_seg_class == "good" else 0

                if cropped_width < 180 and cropped_height < 180:
                    pred_det_class_encoded = 0
                else:
                    swin_predicted_class = predict_swin(cropped_image, swin_model)
                    pred_det_class_encoded = 1 if swin_predicted_class == 1 else 0

                new_row = pd.DataFrame([{
                    "photo_name": image_name,
                    "orig_width": image_width,
                    "orig_height": image_height,
                    "crop_width": cropped_width,
                    "crop_height": cropped_height,
                    "pred_seg_class": pred_seg_class_encoded,
                    "pred_det_class": pred_det_class_encoded,
                    "is_head": is_head,
                    "is_body": is_body,
                    "is_legs": is_legs,
                    "is_tail": is_tail,
                    "area_head": area_head,
                    "area_body": area_body,
                    "area_legs": area_legs,
                    "area_tail": area_tail,
                    "Bbox": bbox_str
                }])
                df_results = pd.concat([df_results, new_row], ignore_index=True)

final_result = df_results.drop(columns=['photo_name', 'orig_width', 'orig_height', 'area_head', 'area_body', 'area_legs', 'area_tail', 'Bbox'])

scaler = StandardScaler()
final_result_scaled = scaler.fit_transform(final_result)
predictions = best_rf.predict(final_result_scaled)

output = pd.DataFrame({
    'Name': df_results['photo_name'],
    'Bbox': df_results['Bbox'],
    'class': predictions
})

output.to_csv('predictions.csv', index=False)
print("Предсказания сохранены в файл predictions.csv")




Обрабатывается изображение: 1001958.jpg, размер: 1920x1440

0: 480x640 3 part_animals, 1 animal, 73.5ms
Speed: 3.0ms preprocess, 73.5ms inference, 426.2ms postprocess per image at shape (1, 3, 480, 640)
Количество объектов животных на 1001958.jpg: 4
Размер обрезанного изображения для объекта 1: 591x509

0: 576x640 1 body, 1 head, 4 legs, 32.0ms
Speed: 2.0ms preprocess, 32.0ms inference, 33.0ms postprocess per image at shape (1, 3, 576, 640)
Размер обрезанного изображения для объекта 2: 381x538

0: 640x480 1 body, 1 head, 3 legs, 193.7ms
Speed: 1.0ms preprocess, 193.7ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 480)
Размер обрезанного изображения для объекта 3: 495x270

0: 352x640 1 body, 98.0ms
Speed: 2.0ms preprocess, 98.0ms inference, 2.0ms postprocess per image at shape (1, 3, 352, 640)
Размер обрезанного изображения для объекта 4: 137x246

0: 640x384 1 head, 38.7ms
Speed: 1.0ms preprocess, 38.7ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 384)
Обр

In [4]:
image_files = [f for f in os.listdir(images_folder) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]

# Разбиваем на батчи по 500 изображений
batches = [image_files[i:i + batch_size] for i in range(0, len(image_files), batch_size)]

# Обработка каждого батча
for batch_num, batch in enumerate(batches):
    print(f"Обработка батча {batch_num + 1} из {len(batches)}")
    df_results = pd.DataFrame(columns=["photo_name", "orig_width", "orig_height", "crop_width", "crop_height", 
                                       "pred_seg_class", "pred_det_class", "is_head", "is_body", "is_legs", 
                                       "is_tail", "area_head", "area_body", "area_legs", "area_tail", "Bbox"])
    
    # Обработка изображений в текущем батче
    for image_name in batch:
        image_path = os.path.join(images_folder, image_name)
        
        image = Image.open(image_path)
        image_width, image_height = image.size
        print(f"Обрабатывается изображение: {image_name}, размер: {image_width}x{image_height}")

        # ДЕТЕКЦИЯ
        detection_results = model_det.predict(image, iou=0.5, conf=0.52)
        animal_count = sum(len(result.boxes) for result in detection_results)
        print(f"Количество объектов животных на {image_name}: {animal_count}")

        # КРОП ПО ДЕТЕКЦИИ
        for result in detection_results:
            for i, box in enumerate(result.boxes):
                x_min, y_min, x_max, y_max = map(int, box.xyxy[0])
                x_min = max(0, x_min)
                y_min = max(0, y_min)
                x_max = min(image_width, x_max)
                y_max = min(image_height, y_max)
                
                # ВЫРЕЗАЕМ
                cropped_image = image.crop((x_min, y_min, x_max, y_max))
                cropped_image_np = np.array(cropped_image)
                
                cropped_height, cropped_width = cropped_image_np.shape[:2]
                print(f"Размер обрезанного изображения для объекта {i+1}: {cropped_width}x{cropped_height}")

                # Преобразование координат для Bbox
                center_x = (x_min + x_max) / 2 / image_width
                center_y = (y_min + y_max) / 2 / image_height
                norm_width = (x_max - x_min) / image_width
                norm_height = (y_max - y_min) / image_height
                bbox_str = f"{center_x:.5f},{center_y:.5f},{norm_width:.5f},{norm_height:.5f}"

                # Инициализация значений сегментации
                is_head, is_body, is_legs, is_tail = 0, 0, 0, 0
                area_head, area_body, area_legs, area_tail = 0, 0, 0, 0

                # Сегментация, если размеры обрезки подходят
                if cropped_width < 180 and cropped_height < 180:
                    pred_seg_class = "bad"
                else:
                    # СЕГМЕНТАЦИЯ
                    segmentation_results = model_seg.predict(source=cropped_image_np, iou=0.5, conf=0.3)

                    class_counts = {"head": 0, "body": 0, "leg": 0, "tail": 0}
                    class_areas = {"head": 0, "body": 0, "leg": 0, "tail": 0}
                    
                    for segment in segmentation_results:
                        for j, label in enumerate(segment.boxes.cls):
                            class_name = model_seg.names[int(label)]
                            class_counts[class_name] += 1
                            
                            mask_x_min, mask_y_min, mask_x_max, mask_y_max = map(int, segment.boxes.xyxy[j])
                            mask_area = (mask_x_max - mask_x_min) * (mask_y_max - mask_y_min)
                            class_areas[class_name] += mask_area

                    # Учет количества частей тела
                    head_count = class_counts["head"]
                    body_count = class_counts["body"]
                    leg_count = class_counts["leg"]
                    tail_count = class_counts["tail"]

                    # Площади сегментов
                    area_head = class_areas["head"]
                    area_body = class_areas["body"]
                    area_legs = class_areas["leg"]
                    area_tail = class_areas["tail"]

                    # Условие для предсказания сегментации
                    if head_count >= 1 and body_count >= 1 and (tail_count >= 1 and leg_count >= 1):
                        pred_seg_class = "good"
                    elif head_count >= 1 and body_count >= 1 and (tail_count == 0 and leg_count >= 2):
                        pred_seg_class = "good"
                    else:
                        pred_seg_class = "bad"
                    
                    is_head, is_body, is_legs, is_tail = head_count, body_count, leg_count, tail_count

                # Преобразование класса сегментации в числовой формат
                pred_seg_class_encoded = 1 if pred_seg_class == "good" else 0

                # Классификация обрезанного изображения с помощью Swin модели
                if cropped_width < 180 and cropped_height < 180:
                    pred_det_class_encoded = 0
                else:
                    swin_predicted_class = predict_swin(cropped_image, swin_model)
                    pred_det_class_encoded = 1 if swin_predicted_class == 1 else 0

                new_row = pd.DataFrame([{
                    "photo_name": image_name,
                    "orig_width": image_width,
                    "orig_height": image_height,
                    "crop_width": cropped_width,
                    "crop_height": cropped_height,
                    "pred_seg_class": pred_seg_class_encoded,
                    "pred_det_class": pred_det_class_encoded,
                    "is_head": is_head,
                    "is_body": is_body,
                    "is_legs": is_legs,
                    "is_tail": is_tail,
                    "area_head": area_head,
                    "area_body": area_body,
                    "area_legs": area_legs,
                    "area_tail": area_tail,
                    "Bbox": bbox_str
                }])
                df_results = pd.concat([df_results, new_row], ignore_index=True)

    # Обработка и сохранение результатов для текущего батча
    final_result = df_results.drop(columns=['photo_name', 'orig_width', 'orig_height', 'area_head', 'area_body', 'area_legs', 'area_tail', 'Bbox'])
    scaler = StandardScaler()
    final_result_scaled = scaler.fit_transform(final_result)
    predictions = best_rf.predict(final_result_scaled)
    
    output = pd.DataFrame({
        'Name': df_results['photo_name'],
        'Bbox': df_results['Bbox'],
        'class': predictions
    })
    
    # Сохраняем результаты батча в CSV
    output.to_csv(f'predictions_batch_{batch_num + 1}.csv', index=False)
    print(f"Предсказания для батча {batch_num + 1} сохранены в predictions_batch_{batch_num + 1}.csv")

print("Обработка всех батчей завершена")

Обработка батча 1 из 7
Обрабатывается изображение: 1001958.jpg, размер: 1920x1440

0: 480x640 3 part_animals, 1 animal, 453.9ms
Speed: 98.0ms preprocess, 453.9ms inference, 584.4ms postprocess per image at shape (1, 3, 480, 640)
Количество объектов животных на 1001958.jpg: 4
Размер обрезанного изображения для объекта 1: 591x509

0: 576x640 1 body, 1 head, 4 legs, 47.1ms
Speed: 2.0ms preprocess, 47.1ms inference, 107.2ms postprocess per image at shape (1, 3, 576, 640)
Размер обрезанного изображения для объекта 2: 381x538

0: 640x480 1 body, 1 head, 3 legs, 211.2ms
Speed: 2.0ms preprocess, 211.2ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 480)
Размер обрезанного изображения для объекта 3: 495x270

0: 352x640 1 body, 115.1ms
Speed: 1.0ms preprocess, 115.1ms inference, 2.0ms postprocess per image at shape (1, 3, 352, 640)
Размер обрезанного изображения для объекта 4: 137x246

0: 640x384 1 head, 36.1ms
Speed: 1.0ms preprocess, 36.1ms inference, 3.0ms postprocess per image 

# с классификатором

In [None]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
import joblib
from torchvision.models import swin_v2_s
import albumentations as A
from albumentations.pytorch import ToTensorV2
from ultralytics import YOLO

model_seg = YOLO(model="finalseg.pt")
model_det = YOLO(model="best.pt")
batch_size = 500

best_rf = joblib.load('best_rf_model.pkl')
device = 'cuda' if torch.cuda.is_available() else 'cpu'

test_transform = A.Compose([
    A.Resize(224, 224),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])

class_labels = {1: 'good', 0: 'bad'}
empty_normal_labels = {0: 'empty', 1: 'normal'}

def get_classify_model():
    model = swin_v2_s(pretrained=False)
    model.head = nn.Linear(model.head.in_features, 2)
    checkpoint = torch.load('images_class.pth', map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.to(device)
    model.eval()
    return model

classify_model = get_classify_model()

def tensor_from_images(image):
    image = test_transform(image=np.array(image))['image'].unsqueeze(0).to(device)
    return image

def classify_image_empty_normal(image):
    tensor = tensor_from_images(image)
    with torch.no_grad():
        outputs = classify_model(tensor)
        _, preds = torch.max(outputs, 1)
        return empty_normal_labels[preds.item()]

def get_swin_model():
    model = swin_v2_s(pretrained=False)
    model.head = nn.Linear(model.head.in_features, 2)
    checkpoint = torch.load('best_swin.pth', map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.to(device)
    model.eval()
    return model

swin_model = get_swin_model()

def predict_swin(image, model):
    tensor = tensor_from_images(image)
    with torch.no_grad():
        outputs = model(tensor)
        _, preds = torch.max(outputs, 1)
        return preds.item()

images_folder = 'D:/hackKalin/test_images_submit'

In [None]:

image_files = [f for f in os.listdir(images_folder) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
batches = [image_files[i:i + batch_size] for i in range(0, len(image_files), batch_size)]

for batch_num, batch in enumerate(batches):
    print(f"Processing batch {batch_num + 1} of {len(batches)}")
    df_results = pd.DataFrame(columns=["photo_name", "orig_width", "orig_height", "crop_width", "crop_height",
                                       "pred_seg_class", "pred_det_class", "is_head", "is_body", "is_legs",
                                       "is_tail", "area_head", "area_body", "area_legs", "area_tail", "Bbox"])

    for image_name in batch:
        image_path = os.path.join(images_folder, image_name)
        image = Image.open(image_path).convert('RGB')
        image_width, image_height = image.size

        classification = classify_image_empty_normal(image)
        if classification == 'empty':
            print(f"{image_name} classified as empty. Skipping detection.")
            continue

        print(f"Processing {image_name} classified as normal.")
        
        detection_results = model_det.predict(image, iou=0.5, conf=0.52)
        animal_count = sum(len(result.boxes) for result in detection_results)

        for result in detection_results:
            for i, box in enumerate(result.boxes):
                x_min, y_min, x_max, y_max = map(int, box.xyxy[0])
                cropped_image = image.crop((x_min, y_min, x_max, y_max))
                cropped_image_np = np.array(cropped_image)
                cropped_width, cropped_height = cropped_image_np.shape[:2]

                bbox_str = f"{(x_min + x_max) / 2 / image_width:.5f},{(y_min + y_max) / 2 / image_height:.5f}," \
                           f"{(x_max - x_min) / image_width:.5f},{(y_max - y_min) / image_height:.5f}"

                is_head, is_body, is_legs, is_tail = 0, 0, 0, 0
                area_head, area_body, area_legs, area_tail = 0, 0, 0, 0

                if cropped_width < 180 and cropped_height < 180:
                    pred_seg_class = "bad"
                else:
                    segmentation_results = model_seg.predict(source=cropped_image_np, iou=0.5, conf=0.3)
                    class_counts = {"head": 0, "body": 0, "leg": 0, "tail": 0}
                    class_areas = {"head": 0, "body": 0, "leg": 0, "tail": 0}
                    
                    for segment in segmentation_results:
                        for j, label in enumerate(segment.boxes.cls):
                            class_name = model_seg.names[int(label)]
                            class_counts[class_name] += 1
                            mask_x_min, mask_y_min, mask_x_max, mask_y_max = map(int, segment.boxes.xyxy[j])
                            mask_area = (mask_x_max - mask_x_min) * (mask_y_max - mask_y_min)
                            class_areas[class_name] += mask_area

                    pred_seg_class = "good" if (class_counts["head"] >= 1 and class_counts["body"] >= 1 and 
                                                (class_counts["tail"] >= 1 or class_counts["leg"] >= 1)) else "bad"
                    is_head, is_body, is_legs, is_tail = class_counts["head"], class_counts["body"], \
                                                         class_counts["leg"], class_counts["tail"]

                pred_seg_class_encoded = 1 if pred_seg_class == "good" else 0

                if cropped_width < 180 and cropped_height < 180:
                    pred_det_class_encoded = 0
                else:
                    swin_predicted_class = predict_swin(cropped_image, swin_model)
                    pred_det_class_encoded = 1 if swin_predicted_class == 1 else 0

                new_row = pd.DataFrame([{
                    "photo_name": image_name,
                    "orig_width": image_width,
                    "orig_height": image_height,
                    "crop_width": cropped_width,
                    "crop_height": cropped_height,
                    "pred_seg_class": pred_seg_class_encoded,
                    "pred_det_class": pred_det_class_encoded,
                    "is_head": is_head,
                    "is_body": is_body,
                    "is_legs": is_legs,
                    "is_tail": is_tail,
                    "area_head": area_head,
                    "area_body": area_body,
                    "area_legs": area_legs,
                    "area_tail": area_tail,
                    "Bbox": bbox_str
                }])
                df_results = pd.concat([df_results, new_row], ignore_index=True)

    final_result = df_results.drop(columns=['photo_name', 'orig_width', 'orig_height', 'area_head', 'area_body', 
                                            'area_legs', 'area_tail', 'Bbox'])
    scaler = StandardScaler()
    final_result_scaled = scaler.fit_transform(final_result)
    predictions = best_rf.predict(final_result_scaled)
    
    output = pd.DataFrame({
        'Name': df_results['photo_name'],
        'Bbox': df_results['Bbox'],
        'class': predictions
    })
    
    output.to_csv(f'predictions_batch_{batch_num + 1}.csv', index=False)
    print(f"Predictions for batch {batch_num + 1} saved to predictions_batch_{batch_num + 1}.csv")

print("All batches processed.")
