In [1]:
import cv2 # opencv
import numpy as np
import torch
from torch import nn
from torch.optim import SGD
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
import albumentations as A
from sklearn.metrics import classification_report

from glob import glob
import os

# 1. Dataloader

In [2]:
class OKNGDataset(Dataset):
    def __init__(
        self,
        ok_img_path,
        ng_img_path,
        train_val_split_ratio,
        img_size,
        mode
    ):
        super().__init__()
        ok_img_path_list = sorted(glob(os.path.join(ok_img_path, '*')))
        ng_img_path_list = sorted(glob(os.path.join(ng_img_path, '*')))
        
        if mode == 'train':
            ok_img_path_list = ok_img_path_list[
                :int(train_val_split_ratio * len(ok_img_path_list))
            ]
            ng_img_path_list = ng_img_path_list[
                :int(train_val_split_ratio * len(ng_img_path_list))
            ]
        elif mode == 'val':
            ok_img_path_list = ok_img_path_list[
                int(train_val_split_ratio * len(ok_img_path_list)):
            ]
            ng_img_path_list = ng_img_path_list[
                int(train_val_split_ratio * len(ng_img_path_list)):
            ]
        else:
            raise

        ok_label_list = [0] * len(ok_img_path_list)
        ng_label_list = [1] * len(ng_img_path_list)

        self.img_list = ok_img_path_list + ng_img_path_list
        self.label_list = ok_label_list + ng_label_list
        
        self.tensor_transform = T.Compose([
            T.ToTensor(),
            T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
        ])
        self.resize_transform = A.Resize(img_size, img_size)
        self.augmentation_transform = A.Compose([
            A.RandomRotate90(),
            A.HorizontalFlip(),
            A.VerticalFlip(),
            A.RandomBrightnessContrast(p=0.5),
        ])
        self.mode = mode

    def __len__(self):
        return len(self.img_list)

    def __getitem__(self, index):
#         print(self.img_list[index])
        img = cv2.imread(self.img_list[index])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        img = self.resize_transform(image=img)['image']
        if self.mode == 'train':
            img = self.augmentation_transform(image=img)['image']
        img = self.tensor_transform(img)
        
        label = self.label_list[index]
        return img, label

In [3]:
train_dataset = OKNGDataset(
    ok_img_path='data/ok_img/',
    ng_img_path='data/ng_img/',
    train_val_split_ratio=0.8,
    img_size=256,
    mode='train'
)
len(train_dataset)

1676

In [4]:
train_dataset[0]

(tensor([[[-0.9877, -0.2342, -0.8335,  ...,  0.4679,  0.3481, -1.5357],
          [-1.2103, -0.6109, -1.2103,  ..., -0.5424, -1.2959, -0.1314],
          [-0.5767, -0.8507,  0.1083,  ...,  1.4269, -0.4054, -0.9705],
          ...,
          [-1.5528, -0.0287,  0.2282,  ..., -0.3883, -1.6727, -1.7925],
          [-1.5699, -0.2171, -0.4739,  ...,  0.5364, -1.2274, -1.8439],
          [-1.7412,  0.0569, -1.3815,  ..., -0.9534, -1.4158, -1.7240]],
 
         [[-0.8803, -0.1099, -0.7227,  ...,  0.6078,  0.4853, -1.4405],
          [-1.1078, -0.4951, -1.1078,  ..., -0.4251, -1.1954, -0.0049],
          [-0.4601, -0.7402,  0.2402,  ...,  1.5882, -0.2850, -0.8627],
          ...,
          [-1.4580,  0.1001,  0.3627,  ..., -0.2675, -1.5805, -1.7031],
          [-1.4755, -0.0924, -0.3550,  ...,  0.6779, -1.1253, -1.7556],
          [-1.6506,  0.1877, -1.2829,  ..., -0.8452, -1.3179, -1.6331]],
 
         [[-0.6541,  0.1128, -0.4973,  ...,  0.8274,  0.7054, -1.2119],
          [-0.8807, -0.2707,

In [5]:
val_dataset = OKNGDataset(
    ok_img_path='data/ok_img/',
    ng_img_path='data/ng_img/',
    train_val_split_ratio=0.8,
    img_size=256,
    mode='val'
)
len(val_dataset)

420

In [6]:
val_dataset[0]

(tensor([[[-2.0323,  0.3652, -1.6384,  ..., -1.5357, -1.3130, -1.9980],
          [-1.7925, -1.2103, -0.0458,  ..., -0.8335, -1.2788, -1.4672],
          [-0.5767,  1.4269,  0.0056,  ...,  0.8447,  1.5639, -1.4329],
          ...,
          [ 0.0398,  1.8550,  1.4954,  ...,  1.1529, -1.4158, -1.7240],
          [-0.5253,  0.6734, -0.1314,  ...,  0.6734, -1.8782, -1.1589],
          [-0.4054,  0.6734,  0.5536,  ...,  1.4783, -1.6727, -2.0494]],
 
         [[-1.9482,  0.5028, -1.5455,  ..., -1.4405, -1.2129, -1.9132],
          [-1.7031, -1.1078,  0.0826,  ..., -0.7227, -1.1779, -1.3704],
          [-0.4601,  1.5882,  0.1352,  ...,  0.9930,  1.7283, -1.3354],
          ...,
          [ 0.1702,  2.0259,  1.6583,  ...,  1.3081, -1.3179, -1.6331],
          [-0.4076,  0.8179, -0.0049,  ...,  0.8179, -1.7906, -1.0553],
          [-0.2850,  0.8179,  0.6954,  ...,  1.6408, -1.5805, -1.9657]],
 
         [[-1.7173,  0.7228, -1.3164,  ..., -1.2119, -0.9853, -1.6824],
          [-1.4733, -0.8807,

In [7]:
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=2, num_workers=0)

In [8]:
for data, label in train_loader:
    print(data, data.shape)
    print(label, label.shape)
    break

tensor([[[[-1.4672, -0.5938, -0.5253,  ...,  0.1597, -1.4500, -2.0494],
          [-1.5870, -1.3987,  0.3994,  ..., -0.1314, -0.2856, -2.1179],
          [-1.2445,  0.0741,  0.7248,  ...,  0.7248, -0.0458, -2.1179],
          ...,
          [-2.1179, -0.0287, -0.4054,  ..., -1.2788, -1.6555, -1.7925],
          [-2.1179, -0.5767, -1.3130,  ..., -1.0048, -1.7583, -2.1179],
          [-2.1179, -1.7925, -1.0219,  ..., -0.2856, -2.1179, -2.1179]],

         [[-1.3704, -0.4776, -0.4076,  ...,  0.2927, -1.3529, -1.9657],
          [-1.4930, -1.3004,  0.5378,  ..., -0.0049, -0.1625, -2.0357],
          [-1.1429,  0.2052,  0.8704,  ...,  0.8704,  0.0826, -2.0357],
          ...,
          [-2.0357,  0.1001, -0.2850,  ..., -1.1779, -1.5630, -1.7031],
          [-2.0357, -0.4601, -1.2129,  ..., -0.8978, -1.6681, -2.0357],
          [-2.0357, -1.7031, -0.9153,  ..., -0.1625, -2.0357, -2.0357]],

         [[-1.1421, -0.2532, -0.1835,  ...,  0.5136, -1.1247, -1.7347],
          [-1.2641, -1.0724,  

# 2. Model

In [9]:
#     Conv2D(filters=8, kernel_size=3, activation='relu', padding='same', input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)),
#     Conv2D(filters=8, kernel_size=3, activation='relu', padding='same'),
#     BatchNormalization(),
#     MaxPooling2D(pool_size=2, strides=2),

#     Conv2D(filters=16, kernel_size=3, activation='relu', padding='same'),
#     Conv2D(filters=16, kernel_size=3, activation='relu', padding='same'),
#     BatchNormalization(),
#     MaxPooling2D(pool_size=2, strides=2),

#     Conv2D(filters=32, kernel_size=3, activation='relu', padding='same'),
#     Conv2D(filters=32, kernel_size=3, activation='relu', padding='same'),
#     BatchNormalization(),
#     MaxPooling2D(pool_size=2, strides=2),

#     Conv2D(filters=64, kernel_size=3, activation='relu', padding='same'),
#     Conv2D(filters=64, kernel_size=3, activation='relu', padding='same'),
#     BatchNormalization(),
#     MaxPooling2D(pool_size=2, strides=2),

#     Conv2D(filters=128, kernel_size=3, activation='relu', padding='same'),
#     Conv2D(filters=128, kernel_size=3, activation='relu', padding='same'),
#     BatchNormalization(),
#     MaxPooling2D(pool_size=2, strides=2),

#     Flatten(),
#     Dense(units=256, activation='relu'),
#     Dropout(rate=0.3),
#     Dense(units=1, activation='sigmoid')

In [10]:
class ClassificationModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv_1_1 = nn.Conv2d(3, 8, 3)
        self.relu_1_1 = nn.ReLU()
        self.conv_1_2 = nn.Conv2d(8, 8, 3)
        self.relu_1_2 = nn.ReLU()
        self.bn = nn.BatchNorm2d(8)
        self.max_pool_1 = nn.MaxPool2d(2, 2)
        
        self.flatten = nn.Flatten()
        self.linear_1 = nn.Linear(127008, 256)
        self.drop_out = nn.Dropout(0.3)
        self.linear_2 = nn.Linear(256, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.conv_1_1(x)
        x = self.relu_1_1(x)
        x = self.conv_1_2(x)
        x = self.relu_1_2(x)
        x = self.bn(x)
        x = self.max_pool_1(x)

        x = self.flatten(x)
        x = self.linear_1(x)
        x = self.drop_out(x)
        x = self.linear_2(x)
        x = self.sigmoid(x)

        return x

In [11]:
model = ClassificationModel()

In [12]:
for data, label in train_loader:
    print(model(data))
    break

tensor([[0.6622],
        [0.6335]], grad_fn=<SigmoidBackward0>)


# 3. Training pipeline

In [13]:
loss_func = nn.BCELoss()

In [14]:
optimizer = SGD(params=model.parameters(), lr=0.001)

In [15]:
# TRAINING
for data, label in train_loader:
    
    # Switch model into training mode
    model.train()

    # Free all gradient in model
    # gradient = 0
    optimizer.zero_grad()

    # Forward model
    prediction = model(data)

    # Calculate loss
    loss_value = loss_func(prediction, label.unsqueeze(1).float())
    print(loss_value)

    # Backward
    # gradient != 0
    loss_value.backward()
    
    # Optimize - Update weights
    optimizer.step()
    
    break

tensor(0.4965, grad_fn=<BinaryCrossEntropyBackward0>)


In [16]:
# VALIDATION

prediction_list, label_list = [], []
with torch.no_grad():
    for data, label in val_loader:
        # Switch model into evaluation mode
        model.eval()

        # Forward model
        prediction = model(data)
        
        # Calculate loss
        loss_value = loss_func(prediction, label.unsqueeze(1).float())
        print(loss_value)
        
        # Prepare predictions and labels for metric
        prediction_np = prediction.numpy()
        prediction_np[prediction_np >= 0.5] = 1
        prediction_np[prediction_np < 0.5] = 0

        prediction_list.append(prediction_np)
        label_list.append(label.numpy())
        
        break

tensor(2.4294)


In [17]:
# Calculate metric

all_prediction = np.concatenate(prediction_list)
all_prediction.shape

(2, 1)

In [18]:
all_label = np.concatenate(label_list)
all_label.shape

(2,)

In [19]:
print(classification_report(all_prediction, all_label))

              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00       0.0
         1.0       0.00      0.00      0.00       2.0

    accuracy                           0.00       2.0
   macro avg       0.00      0.00      0.00       2.0
weighted avg       0.00      0.00      0.00       2.0



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
