In [None]:
import torch
import torch.optim as optim
import numpy as np
from torch.utils.data import DataLoader, Dataset,Subset, random_split
from sklearn.model_selection import KFold
import torchvision
from torchvision import datasets, models
from torchvision import transforms as T
import torchvision.transforms.functional as F
import torch.nn as nn
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
import matplotlib.pyplot as plt
from IPython.display import display
import lightning as L
from lightning.pytorch import loggers as pl_loggers
from lightning.pytorch.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import WandbLogger
import torchmetrics, argparse

from torchvision.datasets import ImageFolder

from PIL import Image, ImageOps
import os

import multiprocessing
num_workers = multiprocessing.cpu_count()
print(num_workers)
import timm
import wandb
import segmentation_models_pytorch as smp
from ultralytics import YOLO
import cv2

In [11]:
model = YOLO('yolov8n.pt')

In [12]:
# 画像が保存されているディレクトリ
input_dir = "/root/signate_tecno/input/test/"

# 画像ファイルのリストを取得
image_files = [os.path.join(input_dir, f) for f in os.listdir(input_dir) if f.endswith('.jpg')]

output_dir = "/root/signate_tecno/input/crop_test/"


In [None]:
for image_path in image_files:
    # 推論を実行
    results = model.predict(image_path, conf=0.5)
    detections = results[0]
    
    # 人物のバウンディングボックスを抽出
    person_boxes = []
    confidences = []
    for result in detections.boxes:
        class_id = int(result.cls.cpu().numpy())
        if class_id == 0:  # 'person'
            bbox = result.xyxy.cpu().numpy()[0]
            confidence = result.conf.cpu().numpy()[0]
            person_boxes.append(bbox)
            confidences.append(confidence)
    
    # 画像を読み込み
    img = cv2.imread(image_path)
    
    if person_boxes:
        # 最も信頼度の高いバウンディングボックスを選択
        max_conf_idx = np.argmax(confidences)
        bbox = person_boxes[max_conf_idx]
        
        # 余白を追加する（例：10ピクセル）
        padding = 10
        x_min, y_min, x_max, y_max = map(int, bbox)
        height, width, _ = img.shape
        x_min = max(0, x_min-padding)
        y_min = 0
        x_max = min(width, x_max+padding)
        y_max = height
        cropped_img = img[y_min:y_max, x_min:x_max]
    else:
        # 検出がなかった場合は元の画像を使用
        cropped_img = img
    
    # 保存先のファイル名を作成
    base_name = os.path.splitext(os.path.basename(image_path))[0]
    output_path = os.path.join(output_dir, f"{base_name}.jpg")
    cv2.imwrite(output_path, cropped_img)

In [None]:
for image_path in image_files:
    # 推論を実行
    results = model.predict(image_path, conf=0.5)
    detections = results[0]
    
    # 人物のバウンディングボックスを抽出
    person_boxes = []
    for result in detections.boxes:
        class_id = int(result.cls.cpu().numpy())
        if class_id == 0:  # 'person'
            bbox = result.xyxy.cpu().numpy()[0]
            person_boxes.append(bbox)
    
    # 画像を読み込み
    img = cv2.imread(image_path)
    
    # 各人物をクロップして保存
    for idx, bbox in enumerate(person_boxes):
        # 余白を追加する（例：10ピクセル）
        padding = 10
        x_min, y_min, x_max, y_max = map(int, bbox)
        height, width, _ = img.shape
        x_min = max(0, x_min-padding)
        y_min = 0
        x_max = min(width, x_max+padding)
        y_max = height
        cropped_img = img[y_min:y_max, x_min:x_max]
        # 保存先のファイル名を作成
        base_name = os.path.splitext(os.path.basename(image_path))[0]
        output_path = os.path.join(output_dir, f"{base_name}.jpg")
        cv2.imwrite(output_path, cropped_img)


## langsamによるtextプロンプト

In [None]:
from PIL import Image
from lang_sam import LangSAM

model = LangSAM()
image_pil = Image.open("/root/signate_tecno/input/train/hold/2a2PfZsZkYfYTX77zPjYep.jpg").convert("RGB")
text_prompt = "fan"
masks, boxes, phrases, logits = model.predict(image_pil, text_prompt)

## samによる検出

In [None]:
from ultralytics import SAM

# Load a model
model = SAM("sam2_b.pt")

# Display model information (optional)
model.info()

# Run inference
result = model("/root/signate_tecno/input/test/AKtCNcoN3x3a6Avwjjf2tw.jpg")

In [None]:
result = model("/root/signate_tecno/input/test/AKtCNcoN3x3a6Avwjjf2tw.jpg")

In [None]:

# 画像の読み込み
image = cv2.imread("/root/signate_tecno/input/test/AKtCNcoN3x3a6Avwjjf2tw.jpg")
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

#マスクをNumPy配列に変換し、CPU上で操作
masks = result[0].masks.data
masks = masks.cpu().numpy()

# マスクの数だけ色を用意
num_masks = masks.shape[0]
colors = plt.cm.get_cmap('hsv', num_masks)

# 元の画像をコピーして表示用に準備
overlay = image_rgb.copy()
for i in range(num_masks):
    # マスクを取得
    mask = masks[i]
    # マスクをブール型に変換
    mask = mask.astype(bool)
    # マスクの色を設定
    color = np.array(colors(i))[:3] * 255  # 色をRGBの範囲にスケーリング
    color = color.astype(np.uint8)
    # マスクを重ねる
    overlay[mask] = overlay[mask] * 0.5 + color * 0.5  # 元の色とマスク色をブレンド

# 画像を表示
plt.figure(figsize=(10, 10))
plt.imshow(overlay)
plt.axis('off')
plt.show()
