In [2]:
from PIL import Image, ImageDraw
import numpy as np
import matplotlib.pyplot as plt
import ultralytics
from ultralytics import YOLO
from ultralytics.engine.results import Results
import dill
import os
import base64
from io import BytesIO
from math import ceil
import torch
import random
import textwrap
import io
from datetime import datetime
from urllib.parse import unquote
import time
import json
import pandas as pd

In [None]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
import joblib
from torchvision.models import swin_v2_s
import albumentations as A
from albumentations.pytorch import ToTensorV2


images_folder = 'D:/hackKalin/test_data'

model_seg = YOLO(model="finalseg.pt")
model_det = YOLO(model="best.pt")


best_rf = joblib.load('best_rf_model.pkl')


device = 'cuda' if torch.cuda.is_available() else 'cpu'
class_labels = {1: 'good', 0: 'bad'}

test_transform = A.Compose([
    A.Resize(224, 224),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])

def get_swin_model():
    model = swin_v2_s(pretrained=False)
    model.head = nn.Linear(model.head.in_features, 2) 
    checkpoint = torch.load('best_swin.pth', map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.to(device)
    model.eval()
    return model

swin_model = get_swin_model()

def tensor_from_images(image):
    image = test_transform(image=np.array(image))['image'].unsqueeze(0).to(device)
    return image

def predict_swin(image, model):
    tensor = tensor_from_images(image)
    with torch.no_grad():
        outputs = model(tensor)
        _, preds = torch.max(outputs, 1)
        return preds.item()

df_results = pd.DataFrame(columns=["photo_name", "orig_width", "orig_height", "crop_width", "crop_height", 
                                   "pred_seg_class", "pred_det_class", "pred_classif_class", "is_head", "is_body", "is_legs", 
                                   "is_tail", "area_head", "area_body", "area_legs", "area_tail", "Bbox"])

for image_name in os.listdir(images_folder):
    if image_name.lower().endswith(('.jpg', '.jpeg', '.png')):
        image_path = os.path.join(images_folder, image_name)
        
        image = Image.open(image_path)
        image_width, image_height = image.size
        print(f"Обрабатывается изображение: {image_name}, размер: {image_width}x{image_height}")

        # ДЕТЕКЦИЯ
        detection_results = model_det.predict(image, iou=0.5, conf=0.52)
        animal_count = sum(len(result.boxes) for result in detection_results)
        print(f"Количество объектов животных на {image_name}: {animal_count}")

        # КРОП ПО ДЕТЕКЦИИ
        for result in detection_results:
            for i, box in enumerate(result.boxes):
                x_min, y_min, x_max, y_max = map(int, box.xyxy[0])
                x_min = max(0, x_min)
                y_min = max(0, y_min)
                x_max = min(image_width, x_max)
                y_max = min(image_height, y_max)
                
                # ВЫРЕЗАЕМ
                cropped_image = image.crop((x_min, y_min, x_max, y_max))
                cropped_image_np = np.array(cropped_image)
                
                cropped_height, cropped_width = cropped_image_np.shape[:2]
                print(f"Размер обрезанного изображения для объекта {i+1}: {cropped_width}x{cropped_height}")
                
                # Рассчитываем центр и размеры в нормализованной форме для Bbox
                center_x = (x_min + x_max) / 2 / image_width
                center_y = (y_min + y_max) / 2 / image_height
                norm_width = (x_max - x_min) / image_width
                norm_height = (y_max - y_min) / image_height
                bbox_str = f"{center_x:.5f},{center_y:.5f},{norm_width:.5f},{norm_height:.5f}"

                is_head, is_body, is_legs, is_tail = 0, 0, 0, 0
                area_head, area_body, area_legs, area_tail = 0, 0, 0, 0

                if cropped_width < 180 and cropped_height < 180:
                    pred_seg_class = "bad"
                else:
                    # СЕГМЕНТАЦИЯ
                    segmentation_results = model_seg.predict(source=cropped_image_np, iou=0.5, conf=0.3)

                    class_counts = {"head": 0, "body": 0, "leg": 0, "tail": 0}
                    class_areas = {"head": 0, "body": 0, "leg": 0, "tail": 0}
                    
                    for segment in segmentation_results:
                        for j, label in enumerate(segment.boxes.cls):
                            class_name = model_seg.names[int(label)]
                            class_counts[class_name] += 1
                            
                            mask_x_min, mask_y_min, mask_x_max, mask_y_max = map(int, segment.boxes.xyxy[j])
                            mask_area = (mask_x_max - mask_x_min) * (mask_y_max - mask_y_min)
                            class_areas[class_name] += mask_area

                    head_count = class_counts["head"]
                    body_count = class_counts["body"]
                    leg_count = class_counts["leg"]
                    tail_count = class_counts["tail"]

                    area_head = class_areas["head"]
                    area_body = class_areas["body"]
                    area_legs = class_areas["leg"]
                    area_tail = class_areas["tail"]

                    if head_count >= 1 and body_count >= 1 and (tail_count >= 1 and leg_count >= 1):
                        pred_seg_class = "good"
                    elif head_count >= 1 and body_count >= 1 and (tail_count == 0 and leg_count >= 2):
                        pred_seg_class = "good"
                    else:
                        pred_seg_class = "bad"
                    
                    is_head, is_body, is_legs, is_tail = head_count, body_count, leg_count, tail_count

                pred_seg_class_encoded = 1 if pred_seg_class == "good" else 0

                if cropped_width < 180 and cropped_height < 180:
                    pred_det_class = "part_animal"
                else:
                    pred_det_class = model_det.names[int(box.cls[0])]
                    pred_det_class_encoded = 1 if pred_det_class == "animal" else 0

                if cropped_width < 180 and cropped_height < 180:
                    swin_predicted_class = "part_animal"
                else:
                    swin_predicted_class = predict_swin(cropped_image, swin_model)
                    pred_swin_encoded = 1 if swin_predicted_class == 1 else 0

                # Используем модель Swin для предсказания класса вместо YOLO
                swin_predicted_class = predict_swin(cropped_image, swin_model)
                pred_det_class_encoded = 1 if swin_predicted_class == 1 else 0

                new_row = pd.DataFrame([{
                    "photo_name": image_name,
                    "orig_width": image_width,
                    "orig_height": image_height,
                    "crop_width": cropped_width,
                    "crop_height": cropped_height,
                    "pred_seg_class": pred_seg_class_encoded,
                    "pred_det_class": pred_det_class_encoded,
                    "pred_classif_class": pred_swin_encoded,
                    "is_head": is_head,
                    "is_body": is_body,
                    "is_legs": is_legs,
                    "is_tail": is_tail,
                    "area_head": area_head,
                    "area_body": area_body,
                    "area_legs": area_legs,
                    "area_tail": area_tail,
                    "Bbox": bbox_str
                }])
                df_results = pd.concat([df_results, new_row], ignore_index=True)


Обрабатывается изображение: 1746607.jpg, размер: 1920x1440

0: 480x640 1 part_animal, 46.0ms
Speed: 5.0ms preprocess, 46.0ms inference, 77.0ms postprocess per image at shape (1, 3, 480, 640)
Количество объектов животных на 1746607.jpg: 1
Размер обрезанного изображения для объекта 1: 1381x1043

0: 512x640 1 body, 1 head, 52.0ms
Speed: 2.0ms preprocess, 52.0ms inference, 13.0ms postprocess per image at shape (1, 3, 512, 640)
Обрабатывается изображение: 1746821.jpg, размер: 1920x1440

0: 480x640 1 part_animal, 1 animal, 30.0ms
Speed: 2.0ms preprocess, 30.0ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)
Количество объектов животных на 1746821.jpg: 2
Размер обрезанного изображения для объекта 1: 132x117
Размер обрезанного изображения для объекта 2: 168x363

0: 640x320 1 head, 47.0ms
Speed: 1.0ms preprocess, 47.0ms inference, 6.0ms postprocess per image at shape (1, 3, 640, 320)
Обрабатывается изображение: 1747740.jpg, размер: 1920x1440

0: 480x640 1 animal, 39.6ms
Speed:

ValueError: X has 9 features, but RandomForestClassifier is expecting 8 features as input.

In [6]:
df_results

Unnamed: 0,photo_name,orig_width,orig_height,crop_width,crop_height,pred_seg_class,pred_det_class,pred_classif_class,is_head,is_body,is_legs,is_tail,area_head,area_body,area_legs,area_tail,Bbox
0,1746607.jpg,1920,1440,1381,1043,0,0,0,1,1,0,0,1016862,1291634,0,0,"0.63776,0.63160,0.71927,0.72431"
1,1746821.jpg,1920,1440,132,117,0,1,0,0,0,0,0,0,0,0,0,"0.85313,0.46215,0.06875,0.08125"
2,1746821.jpg,1920,1440,168,363,0,0,0,1,0,0,0,35464,0,0,0,"0.95573,0.55035,0.08750,0.25208"
3,1747740.jpg,1920,1440,104,142,0,0,0,0,0,0,0,0,0,0,0,"0.54219,0.47708,0.05417,0.09861"
4,1747762.jpg,1920,1440,361,363,0,1,1,0,1,3,0,0,79952,55430,0,"0.35078,0.62951,0.18802,0.25208"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
473,1997546.jpg,1920,1440,1195,793,1,1,1,2,1,3,0,143880,254176,113850,0,"0.34453,0.49618,0.62240,0.55069"
474,1997602.jpg,1920,1080,573,492,0,0,0,1,1,1,0,80496,74100,79310,0,"0.72161,0.22870,0.29844,0.45556"
475,1999067.jpg,1920,1440,1035,839,1,1,1,2,1,4,0,202312,272650,238437,0,"0.56276,0.51146,0.53906,0.58264"
476,1999067.jpg,1920,1440,192,807,0,0,0,0,0,0,0,0,0,0,0,"0.94948,0.57951,0.10000,0.56042"


In [7]:
df_to_concat = df_results.copy()
df_part = df_to_concat[:350]

In [8]:
df_test = df_to_concat[350:]
df_test

Unnamed: 0,photo_name,orig_width,orig_height,crop_width,crop_height,pred_seg_class,pred_det_class,pred_classif_class,is_head,is_body,is_legs,is_tail,area_head,area_body,area_legs,area_tail,Bbox
350,1939834.jpg,1920,1080,815,637,0,0,0,0,1,0,0,0,383898,0,0,"0.78724,0.67917,0.42448,0.58981"
351,1939834.jpg,1920,1080,915,446,0,0,0,0,1,0,0,0,319608,0,0,"0.33672,0.79352,0.47656,0.41296"
352,1939927.jpg,1920,1080,204,116,0,1,1,0,2,2,0,0,16300,1658,0,"0.22396,0.50000,0.10625,0.10741"
353,1940383.jpg,1920,1440,482,293,1,1,1,1,1,3,0,14076,45084,28551,0,"0.34635,0.61007,0.25104,0.20347"
354,1940383.jpg,1920,1440,502,288,1,1,1,1,1,5,0,14508,35190,53126,0,"0.71198,0.53819,0.26146,0.20000"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
473,1997546.jpg,1920,1440,1195,793,1,1,1,2,1,3,0,143880,254176,113850,0,"0.34453,0.49618,0.62240,0.55069"
474,1997602.jpg,1920,1080,573,492,0,0,0,1,1,1,0,80496,74100,79310,0,"0.72161,0.22870,0.29844,0.45556"
475,1999067.jpg,1920,1440,1035,839,1,1,1,2,1,4,0,202312,272650,238437,0,"0.56276,0.51146,0.53906,0.58264"
476,1999067.jpg,1920,1440,192,807,0,0,0,0,0,0,0,0,0,0,0,"0.94948,0.57951,0.10000,0.56042"


In [9]:
df_part['label'] = [0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1,
                          1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1,
                          0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0,
                          1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 
                          1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1,
                          0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
                          0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_part['label'] = [0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1,


In [11]:
df_part.to_csv('train_rf.csv', index=False)
df_test.to_csv('test_rf.csv', index=False)