In [3]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '4'




In [4]:
import torch
from torch.utils.data import DataLoader, Dataset
import pandas as pd
from PIL import Image
from torchvision import transforms
from transformers import ViTFeatureExtractor, ViTForImageClassification
import numpy as np
from sklearn.ensemble import IsolationForest
from tqdm.auto import tqdm



  from .autonotebook import tqdm as notebook_tqdm
2024-02-23 17:20:57.903695: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-02-23 17:20:58.039837: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-23 17:20:59.545437: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvrtc.so.11.1: cannot open shared object file: No such file or directory
2024-02-23 17:20:59.545731: W tensorflow/compiler/xla/

In [5]:
# 데이터 로딩 클래스 정의
class CustomDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.df = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.df['img_path'].iloc[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image



In [6]:
# 이미지 전처리
transform = transforms.Compose([
    transforms.Resize((224, 224)), # ViT 입력 크기에 맞춰 조정
    transforms.ToTensor(),
])

# ViT 모델 및 피처 추출기 로딩
feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224")
model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224")
model.eval() # 추론 모드



Downloading (…)rocessor_config.json: 100%|█████| 160/160 [00:00<00:00, 12.6kB/s]
Downloading config.json: 100%|██████████████| 69.7k/69.7k [00:00<00:00, 381kB/s]
Downloading model.safetensors: 100%|██████████| 346M/346M [00:03<00:00, 112MB/s]


ViTForImageClassification(
  (vit): ViTModel(
    (embeddings): ViTEmbeddings(
      (patch_embeddings): ViTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViTEncoder(
      (layer): ModuleList(
        (0): ViTLayer(
          (attention): ViTAttention(
            (attention): ViTSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ViTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTIntermediate(
            (dense): Linear(in_features=768, out_

In [7]:
# ViT 특성 추출을 위한 함수
def get_embeddings(dataloader, model, device):
    embeddings = []
    model.to(device)
    with torch.no_grad():
        for images in tqdm(dataloader):
            # Feature extractor가 받는 입력 형태로 변환
            inputs = feature_extractor(images=images, return_tensors="pt")
            inputs.to(device)
            outputs = model(**inputs)
            embeddings.append(outputs.last_hidden_state[:,0,:].cpu().numpy()) # [CLS] 토큰의 출력 사용
    return np.concatenate(embeddings, axis=0)



In [8]:
# 데이터 로더 준비
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_data = CustomDataset(csv_file='./bigdata/train.csv', transform=transform)
train_loader = DataLoader(train_data, batch_size=32, shuffle=False)

# 임베딩 추출
train_embeddings = get_embeddings(train_loader, model, device)

# Isolation Forest 모델 학습
clf = IsolationForest(random_state=42)
clf.fit(train_embeddings)

# 테스트 데이터에 대해 이상 탐지 수행
test_data = CustomDataset(csv_file='./bigdata/test.csv', transform=transform)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

test_embeddings = get_embeddings(test_loader, model, device)
test_pred = clf.predict(test_embeddings)

# 결과 변환 및 저장
test_pred = np.where(test_pred == -1, 1, 0) # Isolation Forest의 예측 결과 변환
submit = pd.read_csv('./bigdata/sample_submission.csv')
submit['label'] = test_pred
submit.to_csv('./bigdata/vit_baseline_submit.csv', index=False)

  0%|                                                     | 0/7 [00:00<?, ?it/s]


FileNotFoundError: [Errno 2] No such file or directory: './train/TRAIN_000.png'