In [1]:
!nvidia-smi

zsh:1: command not found: nvidia-smi


In [3]:
!tree -d ../

[01;34m../[0m
├── [01;34mdataset[0m
│   ├── [01;34mIMAGES[0m
│   ├── [01;34moriginal_zip_file[0m
│   └── [01;34mtfrecord[0m
└── [01;34mscript[0m
    └── [01;34m__pycache__[0m

6 directories


In [7]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras as keras
import albumentations as A
import cv2
import math
import warnings
warnings.filterwarnings(action='ignore')

In [8]:
## 추후 Aguumentation을 위한 albumentations 
transforms = A.Compose([   
                            A.Normalize( mean = ( 0.485, 0.456, 0.406) , 
                                         std = (0.229, 0.224, 0.225) )
        ])

val_transforms = A.Compose([   
                            A.Normalize( mean = (  0.485, 0.456, 0.406) , 
                                         std = (0.229, 0.224, 0.225) )
        ])

def aug_fn(image ):
    data = {"image":image}
    aug_data = transforms(**data)
    aug_img = aug_data["image"]
    return aug_img

def process_data(image, label):
    aug_img = tf.numpy_function(func=aug_fn, inp=[image], Tout=tf.float32)
    return aug_img, label

In [9]:
class DataGenerator(keras.utils.Sequence):
    def __init__(self, batch_size, csv_path, fold, image_size, transforms , mode='train', shuffle=True):
        self.batch_size = batch_size
        self.image_size = image_size
        self.shuffle = shuffle
        self.fold = fold
        self.transforms = transforms
        self.mode = mode
        
        ## fold값과 file_name을 불러오기위한 csv
        self.df = pd.read_csv(csv_path)
        
        
        ## train set과 val set 나누기
        if self.mode == 'train':
            self.df = self.df[self.df['fold'] != self.fold]
        elif self.mode == 'val':
            self.df = self.df[self.df['fold'] == self.fold]
        
        self.on_epoch_end()
        
    def __len__(self):
        return math.ceil(len(self.df) / self.batch_size)
        
    def __getitem__(self, idx):
        strt = idx * self.batch_size
        fin = (idx + 1) * self.batch_size
        data = self.df.iloc[strt:fin]
        
        batch_x, batch_y = self.get_data(data)
        
        return np.array(batch_x), np.array(batch_y)
        
    ## csv파일을 통해 mini-batch data를 가져옴
    def get_data(self, data):
        batch_x = []
        batch_y = []
        
        for _, r in data.iterrows():
            file_name = r['image_id']
            
            image = cv2.imread(f'../dataset/IMAGES/{file_name}')
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image = cv2.resize(image, (self.image_size, self.image_size)) / 255.
            
            ## transform
            image , label = process_data(image , r['target'])
            
            batch_x.append(image)
            batch_y.append(label)
        
        return batch_x, batch_y
    
    ## 한 epoch돌때마다 shuffle
    def on_epoch_end(self):
        if self.shuffle:
            self.df = self.df.sample(frac=1).reset_index(drop=True)

## Load Generator

In [10]:
csv_path = '../dataset/data_label.csv'

train_generator = DataGenerator(
    batch_size=128, 
    csv_path=csv_path,
    image_size=299,
    mode='train',
    shuffle=True, 
    fold = 3 , 
    transforms = transforms
)

val_generator = DataGenerator(
    batch_size=128, 
    csv_path=csv_path,
    image_size=299,
    mode='val',
    shuffle=False , 
    fold = 3 , 
        transforms = val_transforms
)

In [12]:
next(iter(train_generator
          ))

2022-07-14 10:37:59.872510: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


(array([[[[-2.117904 , -2.0357141, -1.8044444],
          [-2.117904 , -2.0357141, -1.8044444],
          [-2.117904 , -2.0357141, -1.8043077],
          ...,
          [-2.117904 , -2.0357141, -1.8044444],
          [-2.117904 , -2.0357141, -1.8044444],
          [-2.117904 , -2.0355768, -1.8044444]],
 
         [[-2.1178367, -2.0356455, -1.804376 ],
          [-2.1178367, -2.0356455, -1.804376 ],
          [-2.1178367, -2.0356455, -1.804376 ],
          ...,
          [-2.117904 , -2.0356455, -1.8043077],
          [-2.1178367, -2.0356455, -1.804376 ],
          [-2.117904 , -2.0356455, -1.8044444]],
 
         [[-2.117904 , -2.0355768, -1.804376 ],
          [-2.117904 , -2.0357141, -1.8044444],
          [-2.1178367, -2.0356455, -1.8044444],
          ...,
          [-2.117904 , -2.0356455, -1.804376 ],
          [-2.1178367, -2.0356455, -1.804376 ],
          [-2.117904 , -2.0357141, -1.8044444]],
 
         ...,
 
         [[-2.1178367, -2.0356455, -1.804376 ],
          [-2.1178

## Model 만들기

In [11]:
from tensorflow.keras.utils import plot_model
from tensorflow.keras.layers import Input , Dense , concatenate , Conv2D , MaxPooling2D , BatchNormalization , ReLU , GlobalAveragePooling2D
from tensorflow.keras.models import Model , Sequential
import tensorflow.keras.layers as layers
from tensorflow.keras.layers import Flatten , Dropout

In [8]:
class InceptionV3(tf.keras.Model):
    def __init__(self):
        super(InceptionV3 , self).__init__()
        self.model = tf.keras.applications.InceptionV3(
                                                    include_top=False,
                                                    weights="imagenet",
                                                    input_tensor=None,
                                                    input_shape=(299, 299 , 3),
                                                    pooling=None,
                                                    classes=1000,
                                                    classifier_activation="softmax",
                                                  )
        self.classifier = keras.Sequential([
                                            GlobalAveragePooling2D(),
                                            Dropout(0.5) , 
                                            Dense(2 , activation= 'softmax' , name = 'output')
                                            ])
        
    def call(self, inputs):
        x = self.model(inputs)
        x = self.classifier(x)
        return x
    
    def summary(self , input_shape):
        inputs = Input(input_shape)
        Model(inputs, self.call(inputs)).summary()

In [9]:
model = InceptionV3()
model.summary((299, 299 , 3))

2022-07-07 00:14:44.613706: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-07-07 00:14:44.964163: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 242 MB memory:  -> device: 0, name: GeForce RTX 3090, pci bus id: 0000:01:00.0, compute capability: 8.6


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 299, 299, 3)]     0         
                                                                 
 inception_v3 (Functional)   (None, 8, 8, 2048)        21802784  
                                                                 
 sequential (Sequential)     (None, 2)                 4098      
                                                                 
Total params: 21,806,882
Trainable params: 21,772,450
Non-trainable params: 34,432
_________________________________________________________________


# 학습 부분

In [13]:
optimizer = tf.keras.optimizers.SGD( )
loss_function = tf.keras.losses.SparseCategoricalCrossentropy()

train_loss = tf.keras.metrics.Mean()
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()

val_loss = tf.keras.metrics.Mean()
val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()

In [20]:
labels

array([0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0,
       0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])

In [14]:
@tf.function
def train_step(images , labels):
    with tf.GradientTape() as tape:
        pred = model(images , training = True)
#         pred = tf.squeeze(pred)
        # 손실
        loss = loss_function(labels , pred)
    
    #미분 계산
    gradients = tape.gradient(loss , model.trainable_weights)
    
    #optimizer
    optimizer.apply_gradients(zip(gradients , model.trainable_weights) )
    
    ## loss값 계산
    train_loss(loss)
    train_accuracy(labels , pred)
    return loss

In [15]:
@tf.function
def val_step(images , labels):
    pred = model(images , training= False)
#     pred =tf.squeeze(pred)
    #손실
    loss = loss_function(labels , pred)
    
    ## loss값 계산
    val_loss(loss)
    val_accuracy(labels , pred)

In [16]:
print('start')
for epoch in range(50):
    
    for i , (images , labels) in enumerate(train_generator):
        train_step(images , labels)
        
    for images , labels in val_generator:
        val_step(images , labels)
        
    template = 'epoch: {} , loss: {:.3f} , acc: {:.3f} , val_loss: {:.3f} , vall_acc : {:.3f}'
    print(template.format(epoch+1 , train_loss.result() , train_accuracy.result() * 100 , 
                           val_loss.result() , val_accuracy.result() * 100))

start


ValueError: in user code:

    File "/tmp/ipykernel_3488443/149747007.py", line 7, in train_step  *
        loss = loss_function(labels , pred)
    File "/home/user304/anaconda3/lib/python3.9/site-packages/keras/losses.py", line 139, in __call__  **
        losses = call_fn(y_true, y_pred)
    File "/home/user304/anaconda3/lib/python3.9/site-packages/keras/losses.py", line 243, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "/home/user304/anaconda3/lib/python3.9/site-packages/keras/losses.py", line 1787, in categorical_crossentropy
        return backend.categorical_crossentropy(
    File "/home/user304/anaconda3/lib/python3.9/site-packages/keras/backend.py", line 5119, in categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)

    ValueError: Shapes (128,) and (128, 2) are incompatible


In [51]:
print('start')
for epoch in range(50):
    
    for i , (images , labels) in enumerate(train_generator):
        train_step(images , labels)
        
    for images , labels in val_generator:
        val_step(images , labels)
        
    template = 'epoch: {} , loss: {:.3f} , acc: {:.3f} , val_loss: {:.3f} , vall_acc : {:.3f}'
    print(template.format(epoch+1 , train_loss.result() , train_accuracy.result() * 100 , 
                           val_loss.result() , val_accuracy.result() * 100))

start
epoch: 1 , loss: 0.024 , acc: 99.040 , val_loss: 0.703 , vall_acc : 80.412
epoch: 2 , loss: 0.024 , acc: 99.048 , val_loss: 0.703 , vall_acc : 80.451
epoch: 3 , loss: 0.024 , acc: 99.055 , val_loss: 0.702 , vall_acc : 80.490
epoch: 4 , loss: 0.024 , acc: 99.063 , val_loss: 0.702 , vall_acc : 80.528
epoch: 5 , loss: 0.024 , acc: 99.071 , val_loss: 0.702 , vall_acc : 80.565
epoch: 6 , loss: 0.024 , acc: 99.078 , val_loss: 0.702 , vall_acc : 80.604
epoch: 7 , loss: 0.023 , acc: 99.085 , val_loss: 0.702 , vall_acc : 80.642
epoch: 8 , loss: 0.023 , acc: 99.093 , val_loss: 0.702 , vall_acc : 80.675
epoch: 9 , loss: 0.023 , acc: 99.100 , val_loss: 0.702 , vall_acc : 80.708
epoch: 10 , loss: 0.023 , acc: 99.107 , val_loss: 0.701 , vall_acc : 80.742
epoch: 11 , loss: 0.023 , acc: 99.114 , val_loss: 0.701 , vall_acc : 80.773
epoch: 12 , loss: 0.022 , acc: 99.120 , val_loss: 0.701 , vall_acc : 80.804
epoch: 13 , loss: 0.022 , acc: 99.127 , val_loss: 0.701 , vall_acc : 80.839
epoch: 14 , los

In [52]:
pred_list = []
label_list = []
for images , labels in val_generator:
    pred = model(images , training = False)
    pred_list.extend(pred.numpy().tolist())
    label_list.extend(labels.tolist())

In [53]:
pred_list_test = np.array(pred_list).argmax(axis= 1)

In [54]:
import sklearn.metrics as metrics

In [55]:
metrics.confusion_matrix(label_list , pred_list_test  )

array([[242,  15],
       [ 37,  55]])

In [56]:
from sklearn.metrics import precision_score, recall_score, f1_score 

In [57]:
precision_score(label_list , pred_list_test)

0.7857142857142857

In [58]:
recall_score(label_list , pred_list_test)


0.5978260869565217