In [None]:
import os
import json
import torch
from torch.utils.data import Dataset, DataLoader
import cv2
from torchvision import transforms

# 카테고리별 인코딩
category_encodings = {
    # "카테고리": {
    #     "탑": 0, "블라우스": 1, "캐주얼상의": 2, "니트웨어": 3,
    #     "셔츠": 4, "베스트": 5, "코트": 6, "재킷": 7,
    #     "점퍼": 8, "패딩": 9, "청바지": 10, "팬츠": 11,
    #     "스커트": 12, "드레스": 13, "점프수트": 14, "수영복": 15
    # },
    "색상": {
        "블랙": 0, "화이트": 1, "그레이": 2, "레드": 3, "핑크": 4,
        "오렌지": 5, "베이지": 6, "브라운": 7, "옐로우": 8,
        "그린": 9, "카키": 10, "민트": 11, "블루": 12, "네이비": 13,
        "스카이블루": 14, "퍼플": 15, "라벤더": 16, "와인": 17, 
        "네온": 18, "골드": 19
    },
    "디테일": {
        "비즈": 0, "퍼트리밍": 1, "단추": 2, "글리터": 3, "니트꽈배기": 4,
        "체인": 5, "컷오프": 6, "더블브레스티드": 7, "드롭숄더": 8, 
        "자수": 9, "프릴": 10, "프린지": 11, "플레어": 12, "퀼팅": 13, 
        "리본": 14, "롤업": 15, "러플": 16, "셔링": 17, "슬릿": 18,
        "스팽글": 19, "스티치": 20, "스터드": 21, "폼폼": 22, "포켓": 23,
        "패치워크": 24, "페플럼": 25, "플리츠": 26, "집업": 27, 
        "디스트로이드": 28, "드롭웨이스트": 29, "버클": 30, "컷아웃": 31,
        "X스트랩": 32, "비대칭": 33
    },
    "프린트": {
        "체크": 0, "플로럴": 1, "스트라이프": 2, "레터링": 3, 
        "해골": 4, "타이다이": 5, "지브라": 6, "도트": 7, 
        "카무플라쥬": 8, "그래픽": 9, "페이즐리": 10, "하운즈 투스": 11, 
        "아가일": 12, "깅엄": 13
    },
    "소재": {
        "퍼": 0, "니트": 1, "무스탕": 2, "레이스": 3, "스웨이드": 4,
        "린넨": 5, "앙고라": 6, "메시": 7, "코듀로이": 8, "플리스": 9,
        "시퀸/글리터": 10, "네오프렌": 11, "데님": 12, "실크": 13,
        "저지": 14, "스판덱스": 15, "트위드": 16, "자카드": 17, 
        "벨벳": 18, "가죽": 19, "비닐/PVC": 20, "면": 21,
        "울/캐시미어": 22, "시폰": 23, "합성섬유": 24
    },
    "소매기장": {
        "민소매": 0, "7부소매": 1, "반팔": 2, "긴팔": 3, "캡": 4
    },
    "넥라인": {
        "라운드넥": 0, "스퀘어넥": 1, "유넥": 2, "노카라": 3, 
        "브이넥": 4, "후드": 5, "홀터넥": 6, "터틀넥": 7,
        "오프숄더": 8, "보트넥": 9, "원 숄더": 10, "스위트하트": 11
    },
    "카라": {
        "셔츠칼라": 0, "피터팬칼라": 1, "보우칼라": 2, "너치드칼라": 3,
        "세일러칼라": 4, "차이나칼라": 5, "숄칼라": 6, "테일러드칼라": 7,
        "폴로칼라": 8, "밴드칼라": 9
    },
    "핏": {
        "노멀": 0, "스키니": 1, "루즈": 2, "와이드": 3,
        "오버사이즈": 4, "타이트": 5
    },
    "세이프": {
        "페플럼": 0, "머메이드": 1, "비대칭": 2, "벨보텀/플레어": 3,
        "알수없음": 4, "부츠컷": 5, "펜슬": 6, "테이퍼드": 7,
        "A라인": 8, "스트레이트": 9, "H라인": 10
    },
    "실루엣": {
        "X": 0, "A": 1, "H": 2, "T/Y": 3, "O": 4
    }
}

# 데이터셋 클래스 정의
class FashionDataset(Dataset):
    def __init__(self, img_dir, label_dir, encodings, transform=None):
        self.img_dir = img_dir
        self.label_dir = label_dir
        self.encodings = encodings
        self.transform = transform
        self.img_list = sorted(os.listdir(img_dir))
        self.label_list = sorted(os.listdir(label_dir))

    def __len__(self):
        return len(self.img_list)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_list[idx])
        label_path = os.path.join(self.label_dir, self.label_list[idx])

        # 이미지 로드 및 전처리
        image = cv2.imread(img_path, cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            image = self.transform(image)

        # 라벨 로드 및 인코딩
        with open(label_path, 'r', encoding='utf-8') as f:
            label_data = json.load(f)

        encoded_labels = {}
        for category, attributes in label_data.items():
            encoded_labels[category] = {}
            for attr, value in attributes.items():
                if isinstance(value, list):
                    encoded_labels[category][attr] = [self.encodings[attr][v] for v in value]
                else:
                    encoded_labels[category][attr] = self.encodings[attr].get(value, -1)  # -1 for missing values

        return image, encoded_labels

# 이미지 전처리 정의
transform = transforms.Compose([
    transforms.Resize((640, 640)),
    transforms.ToTensor()
])

# 데이터셋 인스턴스 생성
img_path = './dataset/images/train'
label_path = './dataset/reg_labels/train'
dataset = FashionDataset(img_path, label_path, category_encodings, transform=transform)

# 데이터로더 생성
dataloader = DataLoader(dataset, batch_size=8, shuffle=True)

# 데이터 확인
for images, labels in dataloader:
    print(images.shape)
    print(labels)
    break

KeyError: '기장'

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from ultralytics import YOLO
from ultralytics.nn.modules.conv import Conv

class Classify(nn.Module):
    """YOLO classification head, i.e. x(b,c1,20,20) to x(b,c2)."""
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1):
        super().__init__()
        c_ = 1280  # efficientnet_b0 size
        self.conv = Conv(c1, c_, k, s, p, g)
        self.pool = nn.AdaptiveAvgPool2d(1)  # to x(b,c_,1,1)
        self.drop = nn.Dropout(p=0.0, inplace=True)
        self.linear = nn.Linear(c_, c2)  # to x(b,c2)

    def forward(self, x):
        x = self.linear(self.drop(self.pool(self.conv(x)).flatten(1)))
        return x if self.training else x.softmax(1)

class SubHead(nn.Module):
    def __init__(self, input_channels, num_classes):
        super(SubHead, self).__init__()
        self.conv1x1 = nn.Conv2d(input_channels, num_classes, kernel_size=1)
        self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        
    def forward(self, x):
        x = self.conv1x1(x)
        x = self.global_avg_pool(x)
        return x.squeeze(-1).squeeze(-1)  # (batch_size, num_classes)

class Head(nn.Module):
    def __init__(self, input_channels):
        super(Head, self).__init__()
        
        # 아우터, 상의, 하의에 따라 필요한 SubHead 정의
        self.outer_head = {
            "category": Classify(input_channels, 7),
            "length": Classify(input_channels, 5)
        }
        self.top_head = {
            "category": Classify(input_channels, 7),
            "length": Classify(input_channels, 3),
            "fit": Classify(input_channels, 4)
        }
        self.bottom_head = {
            "category": Classify(input_channels, 5),
            "length": Classify(input_channels, 5),
            "fit": Classify(input_channels, 5)
        }
        
        # 공통 출력층 (컬러와 소재 등)
        self.common_head = {
            "color": SubHead(input_channels, 15),
            "material": SubHead(input_channels, 10)
        }

    def forward(self, x):
        # 아우터 예측
        outer_outputs = {key: head(x) for key, head in self.outer_head.items()}
        
        # 상의 예측
        top_outputs = {key: head(x) for key, head in self.top_head.items()}
        
        # 하의 예측
        bottom_outputs = {key: head(x) for key, head in self.bottom_head.items()}
        
        # 공통 예측
        common_outputs = {key: head(x) for key, head in self.common_head.items()}
        
        return {
            'outer': outer_outputs,
            'top': top_outputs,
            'bottom': bottom_outputs,
            'common': common_outputs
        }

class Rolo(nn.Module):
    def __init__(self, yolo_dir='yolo11s.pt'):
        super().__init__()
        backbone_layers = list(YOLO(yolo_dir).model.model.children())[:11]
        self.backbone = nn.Sequential(*backbone_layers)
        self.head = Head(512)
        
    def forward(self, x):
        x = self.backbone(x)
        return self.head(x)

# 손실 함수 정의
def compute_loss(outputs, targets):
    criterion_ce = nn.CrossEntropyLoss()
    criterion_bce = nn.BCEWithLogitsLoss()

    # 아우터 손실
    loss_outer = sum(criterion_ce(outputs['outer'][key], targets['outer'][key]) for key in outputs['outer'])
    
    # 상의 손실
    loss_top = sum(criterion_ce(outputs['top'][key], targets['top'][key]) for key in outputs['top'])
    
    # 하의 손실
    loss_bottom = sum(criterion_ce(outputs['bottom'][key], targets['bottom'][key]) for key in outputs['bottom'])
    
    # 공통 손실
    loss_common = sum(criterion_ce(outputs['common']['color'], targets['common']['color']) +
                      criterion_bce(outputs['common']['material'], targets['common']['material']))

    total_loss = loss_outer + loss_top + loss_bottom + loss_common
    return total_loss

# 모델 초기화
model = Rolo()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# 옵티마이저 정의
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 학습 루프
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in dataloader:
        images = images.to(device)
        labels = {k: {kk: vv.to(device) for kk, vv in v.items()} for k, v in labels.items()}

        # 옵티마이저 초기화
        optimizer.zero_grad()

        # 순전파
        outputs = model(images)
        loss = compute_loss(outputs, labels)

        # 역전파 및 옵티마이저 스텝
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(dataloader)}')

print('학습 완료')

In [26]:
model = YOLO("yolo11n.pt")

# 백본 레이어 추출 (neck 레이어까지 제거)
# 예를 들어, neck 레이어가 10번째 레이어라면, 그 이후의 레이어들을 제거합니다.
backbone_layers = list(model.model.model.children())[:11]
backbone  = nn.Sequential(*backbone_layers)
x = torch.randn(1, 3, 640, 640)
output = backbone(x)
print(output.shape)
backbone

torch.Size([1, 256, 20, 20])


Sequential(
  (0): Conv(
    (conv): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(16, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
    (act): SiLU(inplace=True)
  )
  (1): Conv(
    (conv): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
    (act): SiLU(inplace=True)
  )
  (2): C3k2(
    (cv1): Conv(
      (conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
      (act): SiLU(inplace=True)
    )
    (cv2): Conv(
      (conv): Conv2d(48, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
      (act): SiLU(inplace=True)
    )
    (m): ModuleList(
      (0): Bottleneck(
        (cv1): Conv(
       

In [9]:
import torch
import torch.nn as nn
from torchinfo import summary
from ultralytics import YOLO

# 모델 로드
model = YOLO("yolo11n.pt")

# 백본 레이어 추출 (neck 레이어까지 제거)
# 예를 들어, neck 레이어가 10번째 레이어라면, 그 이후의 레이어들을 제거합니다.
backbone_layers = list(model.model.model.children())[:10]
init_layer = backbone_layers[0]
backbone1 = nn.Sequential(*backbone_layers[1:4])
c3k2_1 = backbone_layers[4]
c3k2_2 = nn.Sequential(*backbone_layers[5:7])
backbone2 = nn.Sequential(*backbone_layers[7:])
backbone = nn.Sequential(*backbone_layers)

# 디바이스 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
backbone.to(device)


# 더미 입력 생성
input_tensor = torch.randn(1, 3, 640, 640).to(device)

# 모델 순전파 실행
with torch.no_grad():
    try:
        output = backbone(input_tensor)
        print(f'백본 출력 크기: {output.shape}')
    except TypeError as e:
        print(f'오류 발생: {e}')

# 모델 서머리 출력 (배치 크기를 포함)
summary(backbone, input_size=(1, 3, 640, 640))

백본 출력 크기: torch.Size([1, 256, 20, 20])


Layer (type:depth-idx)                                  Output Shape              Param #
Sequential                                              [1, 256, 20, 20]          --
├─Conv: 1-1                                             [1, 16, 320, 320]         --
│    └─Conv2d: 2-1                                      [1, 16, 320, 320]         (432)
│    └─BatchNorm2d: 2-2                                 [1, 16, 320, 320]         (32)
├─SPPF: 1-62                                            --                        (recursive)
│    └─Conv: 2-69                                       --                        (recursive)
│    │    └─SiLU: 3-1                                   [1, 16, 320, 320]         --
├─Conv: 1-3                                             [1, 32, 160, 160]         --
│    └─Conv2d: 2-4                                      [1, 32, 160, 160]         (4,608)
│    └─BatchNorm2d: 2-5                                 [1, 32, 160, 160]         (64)
├─SPPF: 1-62                  

In [None]:
model.model.head = CustomHead(nc=80, ch=[256])
model.loss_fn = CustomLoss()