In [8]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '4'




In [9]:
import torch
from torch.utils.data import DataLoader, Dataset
import pandas as pd
from PIL import Image
from torchvision import transforms
from transformers import ViTFeatureExtractor, ViTModel
import numpy as np
from sklearn.ensemble import IsolationForest
from tqdm.auto import tqdm

In [10]:
# 데이터 로딩 클래스 정의
class CustomDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.df = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.df['img_path'].iloc[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image

In [11]:
# 이미지 전처리
transform = transforms.Compose([
    transforms.Resize((224, 224)), # ViT 입력 크기에 맞춰 조정
    transforms.ToTensor(),
])

# ViT 모델 및 피처 추출기 로딩
feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224")
model = ViTModel.from_pretrained("google/vit-base-patch16-224")
model.eval() # 추론 모드 설정

Some weights of the model checkpoint at google/vit-base-patch16-224 were not used when initializing ViTModel: ['classifier.weight', 'classifier.bias']
- This IS expected if you are initializing ViTModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['vit.pooler.dense.bias', 'vit.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


ViTModel(
  (embeddings): ViTEmbeddings(
    (patch_embeddings): ViTPatchEmbeddings(
      (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    )
    (dropout): Dropout(p=0.0, inplace=False)
  )
  (encoder): ViTEncoder(
    (layer): ModuleList(
      (0): ViTLayer(
        (attention): ViTAttention(
          (attention): ViTSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.0, inplace=False)
          )
          (output): ViTSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.0, inplace=False)
          )
        )
        (intermediate): ViTIntermediate(
          (dense): Linear(in_features=768, out_features=3072, bias=True)
          (intermediate_act_fn): GELUActivation()
      

In [13]:
# ViT 특성 추출을 위한 함수
def get_embeddings(dataloader, model, device):
    embeddings = []
    model.to(device)
    with torch.no_grad():
        for images in tqdm(dataloader):
            # 이미지를 배치 단위로 처리
            inputs = feature_extractor(images=[image.numpy() for image in images], return_tensors="pt", padding=True)
            inputs = {k: v.to(device) for k, v in inputs.items()}
            outputs = model(**inputs)
            # [CLS] 토큰의 출력을 사용하여 특성 벡터 추출
            embeddings.append(outputs.last_hidden_state[:,0,:].cpu().numpy())
    return np.concatenate(embeddings, axis=0)

# 데이터 로더 준비
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_data = CustomDataset(csv_file='./bigdata/train.csv', transform=transform)
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)

# 임베딩 추출
train_embeddings = get_embeddings(train_loader, model, device)

100%|█████████████████████████████████████████████| 7/7 [00:04<00:00,  1.57it/s]


In [14]:
# Isolation Forest 모델 학습
clf = IsolationForest(random_state=42)
clf.fit(train_embeddings)

# 테스트 데이터에 대해 이상 탐지 수행
test_data = CustomDataset(csv_file='./bigdata/test.csv', transform=transform)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)


In [15]:
test_embeddings = get_embeddings(test_loader, model, device)
test_pred = clf.predict(test_embeddings)

# 결과 변환 및 저장
test_pred = np.where(test_pred == -1, 1, 0) # Isolation Forest의 예측 결과 변환
submit = pd.read_csv('./bigdata/sample_submission.csv')
submit['label'] = test_pred
submit.to_csv('./bigdata/vit_isolation_forest_submit.csv', index=False)

100%|█████████████████████████████████████████████| 4/4 [00:02<00:00,  1.85it/s]
