## 1. Data preparation: data loader

In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from PIL import Image
import torchvision.transforms as transforms

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

transform = transforms.Compose([transforms.Resize(32), transforms.RandomCrop(28)])

import numpy as np

class ASL_Dataset(Dataset):
  def __init__(self, path, transform=None):
    super(ASL_Dataset, self).__init__()

    df = pd.read_csv(path)
    y = df['label']
    del df['label']
    x = df.values

    self.x = x
    self.y = y
    self.transform = transform

  def __getitem__(self, index):
    x_sample = np.uint8(self.x[index].reshape(28, 28))

    if self.transform:
      x_pil = Image.fromarray(x_sample)
      x_np = np.array(self.transform(x_pil))
      x_tensor = torch.tensor(x_np[np.newaxis,:,:]).float()
    else:
      x_tensor = torch.tensor(x_sample[np.newaxis,:,:]).float()

    y_tensor = torch.tensor(self.y[index])

    return x_tensor, y_tensor

  def __len__(self):
    return self.x.shape[0]

In [2]:
train_data = ASL_Dataset("/content/sign_mnist_train.csv", transform=transform)
valid_data = ASL_Dataset("/content/sign_mnist_valid.csv")

train_loader = DataLoader(dataset=train_data, batch_size=1000, shuffle=True)
validation_loader = DataLoader(dataset=valid_data, batch_size=1000, shuffle=False)

## 2. Constructing a neural network architecture

In [3]:
import torch.nn as nn

class Net(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(1, 16, 3, padding=1)
    self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
    self.conv3 = nn.Conv2d(32, 48, 3, padding=1)
    self.conv4 = nn.Conv2d(48, 64, 3, padding=1)
    self.conv5 = nn.Conv2d(64, 128, 3, padding=1)


    self.fc1 = nn.Linear(3*3*128, 800)
    self.fc2 = nn.Linear(800, 400)
    self.fc3 = nn.Linear(400, 24)

    self.relu = nn.ReLU()
    self.dropout = nn.Dropout(0.5)
    self.pool = nn.MaxPool2d(2, 2)

  def forward(self, x):
    x = self.conv1(x)     # 28x28x16
    x = self.relu(x)
    x = self.pool(x)      # 14x14x16

    x = self.conv2(x)     # 14x14x32
    x = self.relu(x)
    x = self.pool(x)      # 7x7x32

    x = self.conv3(x)     # 7x7x48
    x = self.relu(x)
    x = self.pool(x)      # 3x3x48

    x = self.conv4(x)     # 3x3x64
    x = self.relu(x)
    x = self.conv5(x)     # 3x3x128

    x = x.view(-1, 3*3*128)
    x = self.fc1(x)
    x = self.relu(x)
    x = self.dropout(x)

    x = self.fc2(x)
    x = self.relu(x)
    x = self.dropout(x)

    x = self.fc3(x)

    return x

## 3. Loss function and optimization method

In [4]:
convnet = Net()
convnet.to(device)

loss_fucntion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(convnet.parameters(), lr=0.001)

## 4. Training of the neural network

In [5]:
para = []
for epoch in range(50):
  loss_val = 0
  for itr, data in enumerate(train_loader):
    optimizer.zero_grad()
    inputs, labels = data
    inputs = inputs.to(device)
    labels = labels.to(device)

    pred = convnet(inputs)
    loss = loss_fucntion(pred, labels)

    loss.backward()
    optimizer.step()

    loss_val += loss.item()

  if epoch%5 == 0:
    para.append(convnet.state_dict())

  print("Epoch:", epoch+1, " , Loss:", loss_val)

Epoch: 1  , Loss: 77.30523777008057
Epoch: 2  , Loss: 44.987425446510315
Epoch: 3  , Loss: 24.33303874731064
Epoch: 4  , Loss: 14.934046000242233
Epoch: 5  , Loss: 8.499658539891243
Epoch: 6  , Loss: 5.5991392731666565
Epoch: 7  , Loss: 3.500436209142208
Epoch: 8  , Loss: 2.6034172996878624
Epoch: 9  , Loss: 1.8052716292440891
Epoch: 10  , Loss: 1.5435235649347305
Epoch: 11  , Loss: 1.2780825719237328
Epoch: 12  , Loss: 1.0884171519428492
Epoch: 13  , Loss: 1.3481677267700434
Epoch: 14  , Loss: 0.9684214796870947
Epoch: 15  , Loss: 0.8869905341416597
Epoch: 16  , Loss: 0.6182541800662875
Epoch: 17  , Loss: 0.5467308862134814
Epoch: 18  , Loss: 0.5236349082551897
Epoch: 19  , Loss: 0.8368290374055505
Epoch: 20  , Loss: 0.7151383636519313
Epoch: 21  , Loss: 0.4568202761001885
Epoch: 22  , Loss: 0.5319659383967519
Epoch: 23  , Loss: 0.5443051503971219
Epoch: 24  , Loss: 0.9241000050678849
Epoch: 25  , Loss: 0.5165129201486707
Epoch: 26  , Loss: 0.5304880635812879
Epoch: 27  , Loss: 0.4166

## 5. Prediction and Evaluation for test set

In [6]:
def test_accuracy(net, validation_loader):
  pred_list = []
  label_list = []
  for itr, data in enumerate(validation_loader):
    inputs, labels = data

    pred_test = net(inputs)
    pred_category = torch.argmax(pred_test, dim=1)

    pred_list = pred_list + list(pred_category)
    label_list = label_list + list(labels)

  accu = np.mean(np.array(pred_list) == np.array(label_list))
  return accu

# print("Test accuracy", accu)

In [7]:
max_accu = 0
best_net = None
convnet = convnet.to('cpu')

for i, tmp_param in enumerate(para):
  convnet.load_state_dict(tmp_param)

  accu = test_accuracy(convnet, validation_loader)
  print(f"Test for model {i+1}: {accu}")

  if accu > max_accu:
    max_accu = accu
    best_net = tmp_param

convnet.load_state_dict(best_net)

Test for model 1: 0.9655605131065254
Test for model 2: 0.9683491355270496
Test for model 3: 0.9654210819854991
Test for model 4: 0.9644450641383157
Test for model 5: 0.9662576687116564
Test for model 6: 0.9655605131065254
Test for model 7: 0.9636084774121584
Test for model 8: 0.9647239263803681
Test for model 9: 0.9650027886224205
Test for model 10: 0.9665365309537088


<All keys matched successfully>

In [8]:
test_accuracy(convnet, validation_loader)

0.9680702732849972

## 보고서 내용

### 데이터셋

1. ASL 데이터를 처리하기 위한 데이터셋 클래스 정의
2. 추가적인 데이터를 위해 데이터 증강
```python
transform = transforms.Compose([transforms.Resize(32), transforms.RandomCrop(28)])
```

### 네트워크 모델

1. CNN 모델 → 필터: 5개, fcn layer: 3개
2. 활성화 함수: ReLU
3. pooling layer: 2x2
4. fcn layer에 dropout 적용

### 손실함수, 최적화

1. 손실함수: CrossEntropy
2. 최적화: Adam

### 훈련 과정
1. 5 epoch 마다 parameter 저장 → 최선의 파라미터 선택