In [1]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 15338015631132111906
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 15134713447
locality {
  bus_id: 1
  links {
  }
}
incarnation: 17889367360850721277
physical_device_desc: "device: 0, name: Quadro RTX 5000, pci bus id: 0000:65:00.0, compute capability: 7.5"
]


In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
import os
import cv2
import glob
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt

In [3]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_df = pd.read_csv('./train.csv')
print("size of train_df:", len(train_df))

# 데이터 노이즈 수정
train_df.loc[train_df['id'] == 3896, 'artist'] = 'Titian'
train_df.loc[train_df['id'] == 3986, 'artist'] = 'Alfred Sisley'

DATAGEN_TRAIN = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    data_format="channels_last",
    validation_split=0.10
) # Train / Validation

TRAIN_GENERATOR = DATAGEN_TRAIN.flow_from_dataframe(
    dataframe=train_df,
    x_col='img_path',
    y_col='artist',
    target_size=(244, 244),
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    subset="training"
)

VALID_GENERATOR = DATAGEN_TRAIN.flow_from_dataframe(
    dataframe=train_df,
    x_col='img_path',
    y_col='artist',
    target_size=(244, 244),
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    subset="validation"
)

size of train_df: 5911
Found 5320 validated image filenames belonging to 50 classes.
Found 591 validated image filenames belonging to 50 classes.


In [4]:
# ResNet50 모델 로드
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(244, 244, 3), pooling=None, input_tensor=None)

# 새로운 Fully Connected Layer 추가
x = base_model.output
x = Flatten()(x)
x = Dropout(0.2)(x) # 드롭아웃 추가
predictions = Dense(50, activation='softmax')(x)

#전체 모델 구성
model = Model(inputs=base_model.input, outputs=predictions)

# 모델 컴파일
model.compile(optimizer=Adam(lr=3e-4), loss='categorical_crossentropy', metrics=['accuracy'])

In [5]:
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
CP = ModelCheckpoint(filepath='googlenet_model/' + 'GOOGLENET-Sigmoid-{epoch:03d}-{accuracy:.4f}-{val_loss:.4f}.hdf5',
                     monitor='val_loss', verbose=1, save_best_only=True, mode='min'
)

LR = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, verbose=1, min_lr=0.00005)
CALLBACK = [CP, LR]

In [6]:
# 모델 학습
history = model.fit(TRAIN_GENERATOR,
                    epochs=50,
                    validation_data=VALID_GENERATOR,
                    callbacks=CALLBACK,
)

Train for 167 steps, validate for 19 steps
Epoch 1/50
Epoch 00001: val_loss improved from inf to 3.23453, saving model to googlenet_model/GOOGLENET-Sigmoid-001-0.2628-3.2345.hdf5
Epoch 2/50
Epoch 00002: val_loss did not improve from 3.23453
Epoch 3/50
Epoch 00003: val_loss did not improve from 3.23453
Epoch 4/50
Epoch 00004: val_loss improved from 3.23453 to 2.29246, saving model to googlenet_model/GOOGLENET-Sigmoid-004-0.6720-2.2925.hdf5
Epoch 5/50
Epoch 00005: val_loss did not improve from 2.29246
Epoch 6/50
Epoch 00006: val_loss improved from 2.29246 to 1.88361, saving model to googlenet_model/GOOGLENET-Sigmoid-006-0.7671-1.8836.hdf5
Epoch 7/50
Epoch 00007: val_loss did not improve from 1.88361
Epoch 8/50
Epoch 00008: val_loss did not improve from 1.88361
Epoch 9/50
Epoch 00009: val_loss did not improve from 1.88361
Epoch 10/50
Epoch 00010: val_loss did not improve from 1.88361
Epoch 11/50
Epoch 00011: val_loss did not improve from 1.88361

Epoch 00011: ReduceLROnPlateau reducing le

Epoch 28/50
Epoch 00028: val_loss did not improve from 1.67243
Epoch 29/50
Epoch 00029: val_loss did not improve from 1.67243
Epoch 30/50
Epoch 00030: val_loss did not improve from 1.67243
Epoch 31/50
Epoch 00031: val_loss did not improve from 1.67243
Epoch 32/50
Epoch 00032: val_loss did not improve from 1.67243
Epoch 33/50
Epoch 00033: val_loss did not improve from 1.67243
Epoch 34/50
  1/167 [..............................] - ETA: 3:26:36

KeyError: 'accuracy'

In [7]:
# 손실 그래프
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Training Loss vs Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend(['loss', 'val_loss'])
plt.show()

NameError: name 'history' is not defined

In [None]:
# 정확도 그래프
plt.plot([x * 100 for x in history.history['accuracy']])
plt.plot([x * 100 for x in history.history['val_accuracy']])
plt.title('Training Accuracy vs Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy (%)')
plt.legend(['accuracy', 'val_accuracy'])
plt.show()

In [None]:
# 최신 체크포인트 파일 찾기
checkpoint_files = glob.glob('googlenet_model/*.hdf5')
latest_checkpoint = max(checkpoint_files, key=os.path.getctime)

# 모델에 최신 가중치 로드
model.load_weights(latest_checkpoint)

# Validation 데이터에 대한 정확도 확인
val_loss, val_accuracy = model.evaluate(VALID_GENERATOR)
print('Validation Loss:', round(val_loss, 6))
print('Validation Accuracy:', round(val_accuracy, 3))

In [None]:
test_df = pd.read_csv('test.csv')

TEST_GENERATOR = DATAGEN_TRAIN.flow_from_dataframe(
    dataframe=test_df,
    x_col='img_path',
    y_col='id',
    target_size=(244, 244),
    class_mode='raw',
    batch_size=32,
    shuffle=False,
)

TEST_GENERATOR.reset()

# 최신 체크포인트 파일 찾기
checkpoint_files = glob.glob('googlenet_model/*.hdf5')
latest_checkpoint = max(checkpoint_files, key=os.path.getctime)

print(latest_checkpoint)

# 모델에 최신 가중치 로드
model.load_weights(latest_checkpoint)

test_prediction = model.predict(TEST_GENERATOR, verbose=1)
display(test_prediction)

In [None]:
test_prediction = np.argmax(test_prediction, axis = 1)
test_prediction

In [None]:
test_prediction_df = pd.DataFrame(test_prediction, columns = ['artist'])
test_prediction_df

In [None]:
label_encoder = LabelEncoder()
artist_df = train_df.copy()
artist_le = label_encoder.fit_transform(artist_df['artist'].values)
artist_df['num'] = artist_le
artist_df = artist_df.drop('id', axis=1)
artist_df = artist_df.drop('img_path', axis=1)
test_prediction_dic = test_prediction_df.to_dict()
test_prediction_values = test_prediction_dic['artist'].values()
test_prediction_list = list(test_prediction_values)
artist_df.set_index('num', inplace=True)
artist_df = artist_df.sort_index()
artist_info_dic = artist_df['artist'].to_dict()

In [None]:
artist_name = []
for i in test_prediction_list:
    artist_name.append(artist_info_dic[i])
artist_name[:5]

In [None]:
submission_df = pd.read_csv("./sample_submission.csv")
submission_df = submission_df.drop('artist', axis=1)
submission_df['artist'] = artist_name
submission_df.head()

In [None]:
submission_df.to_csv("googlenet_submission.csv", index=False)