In [None]:
import pandas as pd
import tensorflow as tf
import matplotlib.style as style
import matplotlib.pyplot as plt
import seaborn as sns

from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

from sklearn.model_selection import train_test_split
from sklearn import preprocessing

In [None]:
%cd drive/MyDrive/Colab\ Notebooks

df = pd.read_csv("train.csv")

X_train, X_val, y_train, y_val = train_test_split(df, df['artist'].values, test_size=0.2)
print("Number of posters for training: ", len(X_train))
print("Number of posters for validation: ", len(X_val))

[Errno 2] No such file or directory: 'drive/MyDrive/Colab Notebooks'
/content/drive/MyDrive/Colab Notebooks
Number of posters for training:  4728
Number of posters for validation:  1183


In [None]:
# GoogleNet 모델 로드
base_model = InceptionV3(weights = 'imagenet', include_top = False, input_shape=(244,244,3))

# 새로운 Fully Connected Layer 추가
x= base_model.output
x= Flatten()(x)
predictions = Dense(50, activation = 'softmax')(x)

# 전체 모델 구성
model = Model(inputs=base_model.input, outputs=predictions)

# 기존 모델 레이어 동결
for layer in base_model.layers:
    layer.trainable = False

# 모델 컴파일
model.compile(optimizer='adam', loss= 'categorical_crossentropy', metrics = ['accuracy'])

# 콜백함수 설정 
CP = ModelCheckpoint(filepath='model/' +
                     'InceptionV3-Sigmoid-{epoch:03d}-{loss:.4f}-{val_loss:.4f}.hdf5',
     monitor='val_loss', verbose=1, save_best_only=True, mode='min')

LR = ReduceLROnPlateau(monitor='val_loss',factor=0.5,patience=5, verbose=1, min_lr=0.00005)
CALLBACK = [CP, LR]

In [None]:
DATAGEN_TRAIN = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    data_format="channels_last",
    validation_split=0.10) # Train / Validation
 
# Generator의 instance 생성 (Train)
TRAIN_GENERATOR = DATAGEN_TRAIN.flow_from_dataframe(
                                        dataframe = X_train, x_col='img_path', y_col='artist',
                                        target_size=(244, 244), 
                                        class_mode='categorical',
                                        batch_size=32, shuffle=True,
                                        subset = "training")
 
VALID_GENERATOR = DATAGEN_TRAIN.flow_from_dataframe(
                                        dataframe = X_train, x_col='img_path', y_col='artist',
                                        target_size=(244, 244), 
                                        class_mode='categorical',
                                        batch_size=32, shuffle=True,
                                        subset = "validation")

Found 4256 validated image filenames belonging to 50 classes.
Found 472 validated image filenames belonging to 50 classes.


In [None]:
history = model.fit(TRAIN_GENERATOR, epochs=20, callbacks=CALLBACK, shuffle=True, validation_data=VALID_GENERATOR)

Epoch 1/20
Epoch 1: val_loss improved from inf to 14.36413, saving model to model/InceptionV3-Sigmoid-001-26.1936-14.3641.hdf5
Epoch 2/20
Epoch 2: val_loss improved from 14.36413 to 13.03530, saving model to model/InceptionV3-Sigmoid-002-15.1497-13.0353.hdf5
Epoch 3/20
Epoch 3: val_loss did not improve from 13.03530
Epoch 4/20
Epoch 4: val_loss did not improve from 13.03530
Epoch 5/20
Epoch 5: val_loss did not improve from 13.03530
Epoch 6/20
Epoch 6: val_loss did not improve from 13.03530
Epoch 7/20
Epoch 7: val_loss did not improve from 13.03530

Epoch 7: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 8/20
Epoch 8: val_loss did not improve from 13.03530
Epoch 9/20
Epoch 9: val_loss did not improve from 13.03530
Epoch 10/20
Epoch 10: val_loss did not improve from 13.03530
Epoch 11/20
Epoch 11: val_loss did not improve from 13.03530
Epoch 12/20
Epoch 12: val_loss did not improve from 13.03530

Epoch 12: ReduceLROnPlateau reducing learning rate to 0.00025000001

In [None]:
import numpy as np

X_test = pd.read_csv("test.csv")

DATAGEN_TEST = ImageDataGenerator(
    rescale=1./255,
    data_format="channels_last")

TEST_GENERATOR = DATAGEN_TEST.flow_from_dataframe(
                                        dataframe=X_test, x_col='img_path', y_col='id',
                                        target_size=(244, 244), class_mode='raw',
                                        batch_size=32, shuffle=False)

TEST_Prediction = model.predict_generator(TEST_GENERATOR, verbose=1)

artist_num = np.argmax(TEST_Prediction, axis = 1)
num_list = pd.DataFrame(artist_num, columns = ['artist'])
num_list.to_csv("ansTgooglenet_split_32.csv", index = False)

Found 12670 validated image filenames.


  TEST_Prediction = model.predict_generator(TEST_GENERATOR, verbose=1)




In [None]:
df = pd.read_csv("ansTgooglenet_split_32.csv")
pic = pd.read_csv("sample_submission.csv")
artist_info = pd.read_csv("artists_info.csv")

con = pd.concat([pic, df], axis=1)

pre_ans = con.drop('artist', axis=1)

num_50 = np.arange(50)

new_df = pd.DataFrame(artist_info['name'])
new_df['num'] = num_50

list1 = df.to_dict()
list2 = list1['artist'].values()
ans_num = list(list2)

dict1 = new_df['name'].to_dict()

artist_name = []
for i in ans_num:
    artist_name.append(dict1[i])

last_ans = pd.read_csv("sample_submission.csv")

last_ans.rename(columns = {'artist':'artist_name'},inplace=True)

last_ans['artist'] = artist_name
last_ans.drop('artist_name', axis=1, inplace=True)

last_ans.to_csv("answer_googlenet_split_32.csv", index=False)