## 라이브러리 불러오기

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd /content/drive/MyDrive/dacon
!unzip -qq "/content/drive/MyDrive/dacon/open.zip"

/content/drive/MyDrive/dacon


In [None]:
from glob import glob
import tensorflow as tf
import pandas as pd
import random 
import cv2
from os.path import exists
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras import models
from tensorflow.keras.layers import *
from tensorflow.keras.applications.efficientnet import EfficientNetB0, EfficientNetB3, preprocess_input

In [None]:
IMG_SIZE = 224 
EPOCHS = 10
BATCH_SIZE = 32

## 데이터 불러오기


In [None]:
df = pd.read_csv('/content/drive/MyDrive/dacon/train.csv')
df_test = pd.read_csv('/content/drive/MyDrive/dacon/test.csv')

In [None]:
df = df.sample(frac=1)
train_len = int(len(df) * 0.8)

train = df[:train_len].reset_index(drop=True)
val = df[train_len:].reset_index(drop=True)

In [None]:
train

Unnamed: 0,id,img_path,A,B,C,D,E,F,G,H,I,J
0,TRAIN_18293,./train/TRAIN_18293.jpg,0,1,0,1,1,0,1,1,1,0
1,TRAIN_05778,./train/TRAIN_05778.jpg,0,1,1,1,1,0,0,0,0,0
2,TRAIN_13304,./train/TRAIN_13304.jpg,0,0,1,1,0,1,0,0,1,1
3,TRAIN_21574,./train/TRAIN_21574.jpg,0,1,1,1,0,1,1,1,1,0
4,TRAIN_32207,./train/TRAIN_32207.jpg,0,1,0,1,0,0,1,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...
26390,TRAIN_15651,./train/TRAIN_15651.jpg,1,1,0,0,1,1,1,0,0,1
26391,TRAIN_21946,./train/TRAIN_21946.jpg,0,1,0,1,1,0,1,1,1,1
26392,TRAIN_03212,./train/TRAIN_03212.jpg,0,0,1,0,0,1,0,0,1,0
26393,TRAIN_21663,./train/TRAIN_21663.jpg,0,1,1,1,0,1,0,1,1,1


In [None]:
# 작업폴더 설정
cwd = '/content/drive/MyDrive/dacon/'

In [None]:
path_train = (cwd+ train['img_path'])
path_valid = (cwd+ val['img_path'])
path_test  = (cwd+ df_test['img_path'])

In [None]:
path_train

0        /content/drive/MyDrive/dacon/./train/TRAIN_120...
1        /content/drive/MyDrive/dacon/./train/TRAIN_102...
2        /content/drive/MyDrive/dacon/./train/TRAIN_173...
3        /content/drive/MyDrive/dacon/./train/TRAIN_125...
4        /content/drive/MyDrive/dacon/./train/TRAIN_305...
                               ...                        
26390    /content/drive/MyDrive/dacon/./train/TRAIN_132...
26391    /content/drive/MyDrive/dacon/./train/TRAIN_082...
26392    /content/drive/MyDrive/dacon/./train/TRAIN_219...
26393    /content/drive/MyDrive/dacon/./train/TRAIN_082...
26394    /content/drive/MyDrive/dacon/./train/TRAIN_034...
Name: img_path, Length: 26395, dtype: object

## TFRecord 생성


In [None]:
def _bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
    """Returns a float_list from a float / double."""
    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
    """Returns an int64_list from a bool / enum / int / uint."""
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

In [None]:
## TFRecord writer 생성
writer_image = tf.io.TFRecordWriter(cwd+'image_train.tfr')
for i_, path_ in tqdm(enumerate(path_train)):

    src = cv2.imread(path_)
    dst = cv2.cvtColor(src, cv2.COLOR_BGR2RGB)
    dst = cv2.resize(dst, (IMG_SIZE, IMG_SIZE), interpolation=cv2.INTER_CUBIC)
    bimage = dst.tobytes()
    
    classes = np.array(train.loc[i_, 'A':'J'], dtype=np.uint8).tobytes()
    
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image': _bytes_feature(bimage),
                'classes': _bytes_feature(classes)
            }
            )
        )
    
    writer_image.write(example.SerializeToString())
    
writer_image.close()

26395it [03:03, 143.96it/s]


In [None]:
writer_image_valid = tf.io.TFRecordWriter(cwd + 'image_valid.tfr')
for i_, path_ in tqdm(enumerate(path_valid)):

    src = cv2.imread(path_)
    dst = cv2.cvtColor(src, cv2.COLOR_BGR2RGB)
    dst = cv2.resize(dst, (IMG_SIZE, IMG_SIZE), interpolation=cv2.INTER_CUBIC)    
    bimage = dst.tobytes()
    
    classes = np.array(val.loc[i_, 'A':'J'], dtype=np.uint8).tobytes()
    
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image': _bytes_feature(bimage),
                'classes': _bytes_feature(classes)            
            }
            )
        )
    
    writer_image_valid.write(example.SerializeToString())
    
writer_image_valid.close()

6599it [00:44, 146.84it/s]


In [None]:
writer_image_test = tf.io.TFRecordWriter(cwd + 'image_test.tfr')

for i_, path_ in tqdm(enumerate(path_test)):

    src = cv2.imread(path_)
    dst = cv2.cvtColor(src, cv2.COLOR_BGR2RGB)
    dst = cv2.resize(dst, (IMG_SIZE, IMG_SIZE), interpolation=cv2.INTER_CUBIC)
    
    bimage = dst.tobytes()
    
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image': _bytes_feature(bimage)
            }
            )
        )
    
    writer_image_test.write(example.SerializeToString())
    
writer_image_test.close()

1460it [00:10, 138.93it/s]


In [None]:
## tfrecord file을 data로 parsing해주는 function
def _parse_function(tfrecord_serialized):
    features={'image': tf.io.FixedLenFeature([], tf.string),
              'classes': tf.io.FixedLenFeature([], tf.string)
             }
    parsed_features = tf.io.parse_single_example(tfrecord_serialized, features)
    
    image = tf.io.decode_raw(parsed_features['image'], tf.uint8)
    image = tf.reshape(image, [IMG_SIZE, IMG_SIZE, 3])
#     image = tf.cast(image, tf.float32)/255. 

    classes = tf.io.decode_raw(parsed_features['classes'], tf.uint8)    
    classes = tf.squeeze(classes)

    return image, classes

def _parse_function2(tfrecord_serialized):
    features={'image': tf.io.FixedLenFeature([], tf.string)             }
    parsed_features = tf.io.parse_single_example(tfrecord_serialized, features)
    
    image = tf.io.decode_raw(parsed_features['image'], tf.uint8)
    image = tf.reshape(image, [IMG_SIZE, IMG_SIZE, 3])
#     image = tf.cast(image, tf.float32)/255. 

    return image

In [None]:
## train dataset 만들기
train_dataset = tf.data.TFRecordDataset(cwd + 'image_train.tfr')
train_dataset = train_dataset.map(_parse_function, num_parallel_calls=tf.data.experimental.AUTOTUNE)
train_dataset = train_dataset.shuffle(2000).prefetch(tf.data.experimental.AUTOTUNE).batch(BATCH_SIZE)

In [None]:
## validation dataset 만들기
val_dataset = tf.data.TFRecordDataset(cwd + 'image_valid.tfr')
val_dataset = val_dataset.map(_parse_function, num_parallel_calls=tf.data.experimental.AUTOTUNE)
val_dataset = val_dataset.prefetch(tf.data.experimental.AUTOTUNE).batch(BATCH_SIZE)

In [None]:
## test dataset 만들기
test_dataset = tf.data.TFRecordDataset(cwd + 'image_test.tfr')
test_dataset = test_dataset.map(_parse_function2, num_parallel_calls=tf.data.experimental.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE)

## Models

In [None]:
# callbacks
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5,
                                                 patience=3, min_lr=1e-06)


In [None]:
inputs = Input(shape = (IMG_SIZE, IMG_SIZE, 3))
x = EfficientNetB0(weights='imagenet', include_top=False)(inputs, training=True)
x = GlobalAveragePooling2D()(x)
x = Dense(1000, activation='relu')(x)
x = Dropout(0.4, seed=1234)(x)
x = Dense(128, activation='relu')(x)
output = Dense(10, activation='sigmoid')(x)

model = models.Model(inputs, output)
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 efficientnetb0 (Functional)  (None, None, None, 1280)  4049571  
                                                                 
 global_average_pooling2d_1   (None, 1280)             0         
 (GlobalAveragePooling2D)                                        
                                                                 
 dense_3 (Dense)             (None, 1000)              1281000   
                                                                 
 dropout_1 (Dropout)         (None, 1000)              0         
                                                                 
 dense_4 (Dense)             (None, 128)               128128    
                                                           

In [None]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
              optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4), 
              metrics=['binary_accuracy'])  

hist  = model.fit(train_dataset,
                  validation_data=val_dataset,    
                  verbose =1,
                  epochs=EPOCHS, 
                  callbacks=[early_stopping, reduce_lr])


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Prediction

In [None]:
pred_ = model.predict(test_dataset)



In [None]:
df_subm = pd.read_csv('./sample_submission.csv')
df_subm.iloc[:, 1:] = ((pred_>0.5) + 0)
df_subm.to_csv('./subm.csv', index=False)