## 1. Data preparation: data loader

In [1]:
import torch
from torch import nn
from torch.utils.data import Dataset

from torchvision import transforms
import torchvision.models as models

from PIL import Image
import os

import numpy as np

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
train_transforms = transforms.Compose([
    transforms.RandomRotation(20),          # 랜덤 회전
    transforms.RandomResizedCrop(224),      # 사이즈 맞추기
    transforms.RandomHorizontalFlip(),      # 좌우 뒤집기
    transforms.ToTensor(),                  # 텐서로 변환
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])  # 정규화
])

val_transforms = transforms.Compose([
    transforms.Resize(256),                # 이미지 크기 조정
    transforms.CenterCrop(224),            # 중앙 크롭
    transforms.ToTensor(),                 # 텐서로 변환
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])  # 정규화
])

In [3]:
class BoDataset(Dataset):
    def __init__(self, path, transforms=None):
        super().__init__()

        self.x = []
        self.y = []
        self.transforms = transforms

        for dir in ['bo', 'not_bo']:
            label = 1 if dir == 'bo' else 0
            dir_path = os.path.join(path, dir)
            for img in os.listdir(dir_path):
                self.x.append(os.path.join(dir_path, img))
                self.y.append(label)


    def __getitem__(self, idx):
        image = Image.open(self.x[idx]).convert("RGB")

        if self.transforms:
            x_tensor = self.transforms(image)
        else:
            x_tensor = transforms.ToTensor()(image)

        y_tensor = torch.tensor(self.y[idx])
        
        return x_tensor, y_tensor

    def __len__(self):
        return len(self.x)

In [4]:
class BoDataLoader:
    def __init__(self, dataset, batch_size=1, shuffle=False):
        self.dataset = dataset
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.indices = np.arange(len(dataset))
        self.num_samples = len(dataset)
    
    def __iter__(self):
        if self.shuffle:
            np.random.shuffle(self.indices)  # 데이터 셔플링
        self.current_index = 0
        return self

    def __next__(self):
        if self.current_index >= self.num_samples:
            raise StopIteration
        
        batch_indices = self.indices[self.current_index:self.current_index+self.batch_size]
        batch = [self.dataset[i] for i in batch_indices]
        self.current_index += self.batch_size

        batch_x, batch_y = zip(*batch)
        return torch.stack(batch_x), torch.tensor(batch_y)

In [5]:
train_path = './hw3_bo_and_notbo/train'
valid_path = './hw3_bo_and_notbo/valid'

train_data = BoDataset(train_path, transforms=train_transforms)
val_data = BoDataset(valid_path, transforms=val_transforms)

train_loader = BoDataLoader(train_data, batch_size=20, shuffle=True)
val_loader = BoDataLoader(val_data, batch_size=20, shuffle=False)

## 2. Constructing a neural network architecture

In [6]:
resnet = models.resnet18(pretrained=True)

for param in resnet.parameters():
    param.requires_grad = False

resnet.fc = nn.Linear(in_features=resnet.fc.in_features, out_features=2)
resnet = resnet.to(device)



## 3. Loss function and optimization method

In [7]:
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet.parameters(), lr=0.001)

## 4. Training of the neural network

In [8]:
para = []
for epoch in range(30):
    loss_val = 0
    for itr, data in enumerate(train_loader):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # Forward
        pred = resnet(inputs)
        loss = loss_function(pred, labels)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        loss_val += loss.item()

    if epoch%5 == 0:
        para.append(resnet.state_dict())

    print("Epoch:", epoch+1, " , Loss:", loss_val)


Epoch: 1  , Loss: 3.49159674346447
Epoch: 2  , Loss: 2.8907951414585114
Epoch: 3  , Loss: 1.8670158237218857
Epoch: 4  , Loss: 1.9716014564037323
Epoch: 5  , Loss: 1.1646679937839508
Epoch: 6  , Loss: 1.0768968760967255
Epoch: 7  , Loss: 0.8807556256651878
Epoch: 8  , Loss: 0.6507934927940369
Epoch: 9  , Loss: 0.594440970569849
Epoch: 10  , Loss: 0.5289658010005951
Epoch: 11  , Loss: 0.5132788326591253
Epoch: 12  , Loss: 0.38628101721405983
Epoch: 13  , Loss: 0.39037197828292847
Epoch: 14  , Loss: 0.4037373121827841
Epoch: 15  , Loss: 0.41993197426199913
Epoch: 16  , Loss: 0.4821723308414221
Epoch: 17  , Loss: 0.3692601229995489
Epoch: 18  , Loss: 0.3765437211841345
Epoch: 19  , Loss: 0.2877541985362768
Epoch: 20  , Loss: 0.24791929125785828
Epoch: 21  , Loss: 0.26789115369319916
Epoch: 22  , Loss: 0.30350164137780666
Epoch: 23  , Loss: 0.24732948187738657
Epoch: 24  , Loss: 0.19429335556924343
Epoch: 25  , Loss: 0.27366623375564814
Epoch: 26  , Loss: 0.23788858205080032
Epoch: 27  , L

## 5. Prediction and Evaluation for test set

In [9]:
def cal_accuracy(net, loader):
  pred_list = []
  label_list = []
  for itr, data in enumerate(loader):
    inputs, labels = data

    pred_test = net(inputs)
    pred_category = torch.argmax(pred_test, dim=1)

    pred_list = pred_list + list(pred_category)
    label_list = label_list + list(labels)

  accu = np.mean(np.array(pred_list) == np.array(label_list))
  return accu

In [10]:
if device != 'cpu':
    rsenet = resnet.to('cpu')

max_accu = 0
best_net = None

for i, tmp_param in enumerate(para):
    resnet.load_state_dict(tmp_param)
    accu = cal_accuracy(resnet, val_loader)
    print(f"Test for model {i+1}: {accu}")

if accu > max_accu:
    max_accu = accu
    best_net = tmp_param

Test for model 1: 0.8333333333333334
Test for model 2: 0.8333333333333334
Test for model 3: 0.8333333333333334
Test for model 4: 0.8333333333333334
Test for model 5: 0.8333333333333334
Test for model 6: 0.8333333333333334


In [11]:
print("train accuracy: "+ str(cal_accuracy(resnet, train_loader)))
print("valid accuracy: "+ str(cal_accuracy(resnet, val_loader)))

train accuracy: 0.9928057553956835
valid accuracy: 0.8333333333333334


## 보고서 내용

### 데이터셋

1. 이미지 데이터를 처리하기 위한 데이터셋 클래스 정의
2. 각 이미지 경로를 찾아서 라벨링
```
for dir in ['bo', 'not_bo']:
    label = 1 if dir == 'bo' else 0
    dir_path = os.path.join(path, dir)
    for img in os.listdir(dir_path):
        self.x.append(os.path.join(dir_path, img))
        self.y.append(label)
```
3. 추가적인 데이터를 위해 데이터 증강
```python
train_transforms = transforms.Compose([
    transforms.RandomRotation(20),          # 랜덤 회전
    transforms.RandomResizedCrop(224),      # 사이즈 맞추기
    transforms.RandomHorizontalFlip(),      # 좌우 뒤집기
    transforms.ToTensor(),                  # 텐서로 변환
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])  # 정규화
])
```
4. 모델 사용 및 이미지 크기 통일을 위해 224x224로 사이즈 조정

### 데이터로더
1. 셔플 기능 구현
```
if self.shuffle:
    np.random.shuffle(self.indices)  # 데이터 셔플링
```
2. 배치 사이즈 별로 같은 배열에 포함하여 반환
3. 배치 크기 별로 batch_x, batch_y 형태로 반환
```
batch_indices = self.indices[self.current_index:self.current_index+self.batch_size]
batch = [self.dataset[i] for i in batch_indices]
self.current_index += self.batch_size

batch_x, batch_y = zip(*batch)
return torch.stack(batch_x), torch.tensor(batch_y)
```

### 네트워크 모델

1. ResNet 사용
2. pre-trained parameter 사용
3. 출력층만 학습에 이용
```
resnet = models.resnet18(pretrained=True)

for param in resnet.parameters():
    param.requires_grad = False

resnet.fc = nn.Linear(in_features=resnet.fc.in_features, out_features=2)
```

### 손실함수, 최적화

1. 손실함수: CrossEntropy
2. 최적화: Adam

### 훈련 과정
1. 5 epoch 마다 parameter 저장 → 최선의 파라미터 선택

### 파일 구조
HW3/  
├── train/  
│   ├── bo  
│   │   ├── img1.jpg  
│   │   ├── ...  
│   ├── not_bo  
│   │   ├── img2.jpg  
│   │   ├── ...  
├── valid/  
│   ├── bo  
│   │   ├── img3.jpg  
│   │   ├── ...  
│   ├── not_bo  
│   │   ├── img4.jpg  
│   │   ├── ...  
├── hw3_202011505.ipynb  