<a href="https://colab.research.google.com/github/rbdus0715/project/blob/main/aerial_cactus_identification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **항공 사진 내 선인장 식별**
- 문제 유형 : 이진분류
- 모델 : CNN

#### **input -> CNN -> linear-> 2진 -> 교차 엔트로피**

In [1]:
import pandas as pd

# 데이터 경로

labels = pd.read_csv('train.csv')
submission = pd.read_csv('sample_submission.csv')
labels.head()

Unnamed: 0,id,has_cactus
0,0004be2cfeaba1c0361d39e2b000257b.jpg,1
1,000c8a36845c0208e833c79c1bffedd1.jpg,1
2,000d1e9a533f62e55c289303b072733d.jpg,1
3,0011485b40695e9138e92d0b3fb55128.jpg,1
4,0014d7a11e90b62848904c1418fc8cf2.jpg,1


**시드값 고정**

In [2]:
# 시드값 고정 및 GPU 장비 설정
import torch
import random
import numpy as np
import os

seed = 50
random.seed(seed)
torch.manual_seed(seed)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.cuda.manual_seed(seed)

In [3]:
device

device(type='cuda')

**데이터 준비**

In [4]:
# 데이터 준비
from sklearn.model_selection import train_test_split

train, valid = train_test_split(
    labels,
    test_size=0.1, # 검증 데이터의 비율
    stratify=labels['has_cactus'],
    random_state=50
)
print(len(train), len(valid))
train.head()

15750 1750


Unnamed: 0,id,has_cactus
10630,995e3ba502e55d8e469b0b4623893176.jpg,0
8638,7bc5c0e2a681b1f173c70899534dca7a.jpg,1
11167,a148ba511648e3975f0cf5e6b8bc5593.jpg,1
2398,21650f4673340621f3d508f8dee60059.jpg,0
17110,fa4411b08a8efff184ad4e71abbac8a5.jpg,1


In [8]:
# 데이터셋 클래스 정의
import cv2
from torch.utils.data import Dataset

class ImageDataset(Dataset):
    # 인자 : dataframe, img path, transform
    def __init__(self, df, img_dir='./', transform=None):
        super().__init__() # 상속받은 Dataset의 생성자 호출
        self.df = df
        self.img_dir = img_dir
        self.transform = transform
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_id = self.df.iloc[idx, 0] # 이미지 id
        img_path = self.img_dir + img_id
        image = cv2.imread(img_path) # 이미지 파일 읽기
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # 이미지 색상 보정
        label = self.df.iloc[idx, 1]

        if self.transform is not None:
            image = self.transform(image)
        return image, label

In [9]:
# 데이터셋 생성
from torchvision import transforms
transform = transforms.ToTensor()

dataset_train = ImageDataset(df=train, img_dir='train/', transform=transform)
dataset_valid = ImageDataset(df=valid, img_dir='train/', transform=transform)

In [10]:
# 데이터 로더 생성
from torch.utils.data import DataLoader
loader_train = DataLoader(dataset=dataset_train, batch_size=32, shuffle=True)
loader_valid = DataLoader(dataset=dataset_valid, batch_size=32, shuffle=True)

**모델 생성**

In [12]:
import torch.nn as nn
import torch.nn.functional as F

In [17]:
class Model(nn.Module):
    def __init__(self):
        super().__init__() # 상속받은 nn.Module의 생성자 호출
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=2)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=2)
        self.max_pool = nn.MaxPool2d(kernel_size=2)
        self.avg_pool = nn.AvgPool2d(kernel_size=2)
        self.fc = nn.Linear(in_features=64*4*4, out_features=2)

    def forward(self, x):
        # 초기 : (32, 3, 32, 32)
        x = self.max_pool(F.relu(self.conv1(x)))
        # 첫 합성곱 연산 : (32, 32, 34, 34)
        # 첫 최대 풀링 : (32, 32, 17, 17)
        x = self.max_pool(F.relu(self.conv2(x)))
        # 두 번째 합성곱 : (32, 64, 19, 19)
        # 두 번째 최대 풀링 : (32, 64, 9, 9)
        x = self.avg_pool(x)
        # 평균 풀링 : (32, 64, 4, 4)
        x = x.view(-1, 64*4*4) 
        # 평탄화 : (32, 1024)
        x = self.fc(x)
        # fc : (32, 2)
        return x

In [18]:
model = Model().to(device)

**모델 훈련**

In [19]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

In [20]:
epochs = 10

for epoch in range(epochs):
    epoch_loss = 0

    for images, labels in loader_train:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        epoch_loss += loss.item() # 역전파 수행
        loss.backward()
        optimizer.step()
    
    print(f'에폭 :[{epoch+1}/{epochs}] - 손실값: {epoch_loss/len(loader_train):.4f}')

에폭 :[1/10] - 손실값: 0.5271
에폭 :[2/10] - 손실값: 0.3634
에폭 :[3/10] - 손실값: 0.2510
에폭 :[4/10] - 손실값: 0.1949
에폭 :[5/10] - 손실값: 0.1747
에폭 :[6/10] - 손실값: 0.1584
에폭 :[7/10] - 손실값: 0.1463
에폭 :[8/10] - 손실값: 0.1422
에폭 :[9/10] - 손실값: 0.1306
에폭 :[10/10] - 손실값: 0.1269


**성능 검증**

In [22]:
from sklearn.metrics import roc_auc_score
true_list = []
preds_list = []

In [30]:
# 모델 평가 상태
model.eval()

with torch.no_grad():
    for images, labels in loader_valid:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        preds = torch.softmax(outputs.cpu(), dim=1)[:, 1] # 예측 확률
        true = labels.cpu()
        preds_list.extend(preds)
        true_list.extend(true)

print(f'검증 데이터 ROC AUC : {roc_auc_score(true_list, preds_list):.4f}')

검증 데이터 ROC AUC : 0.9896


**예측 및 결과 제출**

In [25]:
dataset_test = ImageDataset(df=submission, img_dir='test/', transform=transform)
loader_test = DataLoader(dataset=dataset_test, batch_size=32, shuffle=False)

In [32]:
model.eval()

preds = []

with torch.no_grad():
    for images, _ in loader_test:
        images = images.to(device)
        outputs = model(images)
        preds_part = torch.softmax(outputs.cpu(), dim=1)[:, 1].tolist()
        preds.extend(preds_part)

In [33]:
submission['has_cactus'] = preds
submission.to_csv('submission.csv', index=False)

**성능 개선**

In [35]:
# 데이터 준비 - 다양한 이미지 변환 수행
from torchvision import transforms

transform_train = transforms.Compose([
    transforms.ToTensor(),
    # 이미지 주변에 패딩 추가, symmetric : 패딩 추가시 원본 데이터를 상하 좌우 대칭이 되는 모양으로 만듦
    transforms.Pad(32, padding_mode='symmetric'), 
    # 이미지를 무작위로 좌우 대칭 변환
    transforms.RandomHorizontalFlip(), 
    # 이미지를 무작위로 상하 대칭 변환
    transforms.RandomVerticalFlip(),
    # 이미지를 무작위로 회전
    transforms.RandomRotation(10),
    # 텐서 형태의 이미지 데이터를 정규화 : RGB 3개의 값의 평균이 앞의 3개, 분산이 뒤의 3개
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Pad(32, padding_mode='symmetric'),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

In [36]:
dataset_train = ImageDataset(df=train, img_dir='train/', transform = transform_train)
dataset_test = ImageDataset(df=valid, img_dir='train/', transform = transform_test)

In [37]:
loader_train = DataLoader(dataset=dataset_train, batch_size=32, shuffle=True)
loader_valid = DataLoader(dataset=dataset_valid, batch_size=32, shuffle=False)

In [62]:
class Model_2(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=2),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=2),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=2),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.layer4 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=2),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.layer5 = nn.Sequential(
            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=2),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.avg_pool = nn.AvgPool2d(kernel_size=4)
        self.fc1 = nn.Linear(in_features=512*1*1, out_features=64)
        self.fc2 = nn.Linear(in_features=64, out_features=2)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        x = self.avg_pool(x)
        x = x.view(-1, 512*1*1)
        x = self.fc1(x)
        x = self.fc2(x)
        return x

In [63]:
model = Model_2().to(device)

In [64]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adamax(model.parameters(), lr=0.00006)

In [None]:
epochs = 70

for epoch in range(epochs):
    epoch_loss = 0

    for images, labels in loader_train:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        epoch_loss += loss.item() # 역전파 수행
        loss.backward()
        optimizer.step()
    
    print(f'에폭 :[{epoch+1}/{epochs}] - 손실값: {epoch_loss/len(loader_train):.4f}')

에폭 :[1/70] - 손실값: 0.1233
에폭 :[2/70] - 손실값: 0.0641
에폭 :[3/70] - 손실값: 0.0515
에폭 :[4/70] - 손실값: 0.0422
에폭 :[5/70] - 손실값: 0.0375
에폭 :[6/70] - 손실값: 0.0325
에폭 :[7/70] - 손실값: 0.0286
