In [1]:
pwd

'C:\\Windows\\system32'

In [2]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 14660691197799822053
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 6904558400
locality {
  bus_id: 1
  links {
  }
}
incarnation: 1601745002740763326
physical_device_desc: "device: 0, name: NVIDIA GeForce RTX 3070, pci bus id: 0000:2b:00.0, compute capability: 8.6"
]


In [2]:
import pandas as pd
import tensorflow as tf
import matplotlib.style as style
import matplotlib.pyplot as plt

from tensorflow.keras.layers import Dense, Dropout, Flatten, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.applications import VGG19
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

from sklearn.model_selection import train_test_split
from sklearn import preprocessing

df = pd.read_csv("train.csv")

# X:이미지 y:artist
X_train, X_val, y_train, y_val = train_test_split(df, df['artist'].values, test_size=0.2)
print("Number of posters for training: ", len(X_train))
print("Number of posters for validation: ", len(X_val))


artist_df['num'] = artist_train
artist_df = artist_df.drop('id', axis=1)
artist_df = artist_df.drop('img_path', axis=1)


artist_df.set_index('num', inplace=True)
artist_df = artist_df.sort_index()


artist_test_dic = artist_df['artist'].to_dict()


# 데이터 노이즈 수정
df.loc[df['id'] == 3896, 'artist'] = 'Titian'
df.loc[df['id'] == 3986, 'artist'] = 'Alfred Sisley'

# VGG19 모델 로드
base_model =VGG19(weights = 'imagenet', include_top = False, input_shape=(244,244,3), input_tensor=None, pooling=None)

# 새로운 Fully Connected Layer 추가
x = GlobalAveragePooling2D()(base_model.output)
predictions = Dense(50, activation = 'softmax')(x)

# 전체 모델 구성
model = Model(inputs=base_model.input, outputs=predictions)


"""
# 기존 모델 레이어 동결 (삭제하고 컴파일 해보기)
for layer in base_model.layers:
    layer.trainable = False
"""


LearningRate = 1e-3

# 모델 컴파일
# momentum 0.9 0.95 0.99 test
model.compile(optimizer=
         SGD(learning_rate=LearningRate, momentum=0.95, nesterov=True), 
         loss='categorical_crossentropy',
         metrics=['acc']
)

# 콜백함수 생성
CP = ModelCheckpoint(filepath='model/' +
                     'VGG16-Sigmoid-{epoch:03d}-{loss:.4f}-{val_loss:.4f}.hdf5',
     monitor='val_loss', verbose=1, save_best_only=True, mode='min')

LR = ReduceLROnPlateau(monitor='val_loss',factor=0.5,patience=3, verbose=1, min_lr=0.00005)
CALLBACK = [CP, LR]

Number of posters for training:  4728
Number of posters for validation:  1183


In [4]:
DATAGEN_TRAIN = ImageDataGenerator(
    rescale=1./255,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    data_format="channels_last",
    validation_split=0.10
    )


 
# Generator의 instance 생성 (Train)
TRAIN_GENERATOR = DATAGEN_TRAIN.flow_from_dataframe(
                                        dataframe = X_train, x_col='img_path', y_col='artist',
                                        target_size=(244, 244), 
                                        class_mode='categorical',
                                        batch_size=32, shuffle=False,
                                        subset = "training")
# Validaion Data에는 rescale만 적용
VALID_GENERATOR = DATAGEN_TRAIN.flow_from_dataframe(
                                        dataframe = X_train, x_col='img_path', y_col='artist',
                                        target_size=(244, 244), 
                                        class_mode='categorical',
                                        batch_size=32, shuffle=False,
                                        subset = "validation")

Found 4256 validated image filenames belonging to 50 classes.
Found 472 validated image filenames belonging to 50 classes.


In [5]:
history = model.fit(TRAIN_GENERATOR, epochs=30, callbacks=CALLBACK, shuffle=True, validation_data=VALID_GENERATOR)

Epoch 1/30

Epoch 00001: val_loss improved from 2.96339 to 2.17913, saving model to model\VGG16-Sigmoid-001-2.2355-2.1791.hdf5
Epoch 2/30

Epoch 00002: val_loss improved from 2.17913 to 2.10349, saving model to model\VGG16-Sigmoid-002-1.8785-2.1035.hdf5
Epoch 3/30

Epoch 00003: val_loss improved from 2.10349 to 1.91792, saving model to model\VGG16-Sigmoid-003-1.6152-1.9179.hdf5
Epoch 4/30

Epoch 00004: val_loss did not improve from 1.91792
Epoch 5/30

Epoch 00005: val_loss did not improve from 1.91792
Epoch 6/30

Epoch 00006: val_loss did not improve from 1.91792

Epoch 00006: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 7/30

Epoch 00007: val_loss improved from 1.91792 to 1.47507, saving model to model\VGG16-Sigmoid-007-0.7128-1.4751.hdf5
Epoch 8/30

Epoch 00008: val_loss did not improve from 1.47507
Epoch 9/30

Epoch 00009: val_loss did not improve from 1.47507
Epoch 10/30

Epoch 00010: val_loss did not improve from 1.47507

Epoch 00010: ReduceLROnPlateau 

In [6]:
import numpy as np
import pandas as pd
X_test = pd.read_csv("test.csv")

DATAGEN_TEST = ImageDataGenerator(
    rescale=1./255,
    data_format="channels_last")

TEST_GENERATOR = DATAGEN_TEST.flow_from_dataframe(
                                        dataframe=X_test, x_col='img_path', y_col='id',
                                        target_size=(244, 244), class_mode='raw',
                                        batch_size=32, shuffle=False)

TEST_GENERATOR.reset()



Found 12670 validated image filenames.


In [7]:
TEST_Prediction = model.predict(TEST_GENERATOR, verbose=1)

artist_num = np.argmax(TEST_Prediction, axis = 1)
num_list = pd.DataFrame(artist_num, columns = ['artist'])
num_list.to_csv("ansTVGG16_split_50.csv", index = False)



In [8]:
artist_num = np.argmax(TEST_Prediction, axis=1)
num_list = pd.DataFrame(artist_num, columns=['artist'])

artist_name = []
for i in num_list['artist']:
    artist_name.append(artist_test_dic[i])


last_ans = pd.read_csv("sample_submission.csv")

last_ans.rename(columns={'artist':'artist_name'}, inplace=True)

last_ans['artist'] = artist_name
last_ans.drop('artist_name', axis=1, inplace=True)

last_ans.to_csv("VGG16_50.csv", index=False)