In [1]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 2756921396493926592
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 15116533760
locality {
  bus_id: 1
  links {
  }
}
incarnation: 5288381725030214843
physical_device_desc: "device: 0, name: Quadro RTX 5000, pci bus id: 0000:65:00.0, compute capability: 7.5"
]


In [3]:
pwd

'C:\\Windows\\system32'

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, Flatten, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from sklearn.preprocessing import LabelEncoder


def cutout(image, label, probability=0.5):
    if np.random.rand() < probability:
        h, w = image.shape[:2]
        size = np.random.randint(w // 2)
        x1 = np.random.randint(w)
        y1 = np.random.randint(h)
        x2 = np.clip(x1 + size, 0, w)
        y2 = np.clip(y1 + size, 0, h)
        image[y1:y2, x1:x2, :] = np.random.rand(y2 - y1, x2 - x1, 3)
    return image, label

df = pd.read_csv("train.csv")
label_encoder = LabelEncoder()
artist_df = df.copy()
artist_train = label_encoder.fit_transform(df['artist'].values)
print(artist_df.head(10))
print(artist_train[:5])

artist_df['num'] = artist_train
artist_df = artist_df.drop('id', axis=1)
artist_df = artist_df.drop('img_path', axis=1)
display(artist_df.head())

artist_df.set_index('num', inplace=True)
artist_df = artist_df.sort_index()
display(artist_df)

artist_test_dic = artist_df['artist'].to_dict()
print(artist_test_dic)

# 데이터 노이즈 수정
df.loc[df['id'] == 3896, 'artist'] = 'Titian'
df.loc[df['id'] == 3986, 'artist'] = 'Alfred Sisley'

X_train, X_val, y_train, y_val = train_test_split(df, df['artist'].values, test_size=0.1)
print("Number of posters for training: ", len(X_train))
print("Number of posters for validation: ", len(X_val))



   id          img_path                 artist
0   0  ./train/0000.jpg        Diego Velazquez
1   1  ./train/0001.jpg       Vincent van Gogh
2   2  ./train/0002.jpg           Claude Monet
3   3  ./train/0003.jpg            Edgar Degas
4   4  ./train/0004.jpg       Hieronymus Bosch
5   5  ./train/0005.jpg  Pierre-Auguste Renoir
6   6  ./train/0006.jpg          Rene Magritte
7   7  ./train/0007.jpg          Rene Magritte
8   8  ./train/0008.jpg           Michelangelo
9   9  ./train/0009.jpg      Peter Paul Rubens
[ 9 48  7 10 24]


Unnamed: 0,artist,num
0,Diego Velazquez,9
1,Vincent van Gogh,48
2,Claude Monet,7
3,Edgar Degas,10
4,Hieronymus Bosch,24


Unnamed: 0_level_0,artist
num,Unnamed: 1_level_1
0,Albrecht Du rer
0,Albrecht Du rer
0,Albrecht Du rer
0,Albrecht Du rer
0,Albrecht Du rer
...,...
49,William Turner
49,William Turner
49,William Turner
49,William Turner


{0: 'Albrecht Du rer', 1: 'Alfred Sisley', 2: 'Amedeo Modigliani', 3: 'Andrei Rublev', 4: 'Andy Warhol', 5: 'Camille Pissarro', 6: 'Caravaggio', 7: 'Claude Monet', 8: 'Diego Rivera', 9: 'Diego Velazquez', 10: 'Edgar Degas', 11: 'Edouard Manet', 12: 'Edvard Munch', 13: 'El Greco', 14: 'Eugene Delacroix', 15: 'Francisco Goya', 16: 'Frida Kahlo', 17: 'Georges Seurat', 18: 'Giotto di Bondone', 19: 'Gustav Klimt', 20: 'Gustave Courbet', 21: 'Henri Matisse', 22: 'Henri Rousseau', 23: 'Henri de Toulouse-Lautrec', 24: 'Hieronymus Bosch', 25: 'Jackson Pollock', 26: 'Jan van Eyck', 27: 'Joan Miro', 28: 'Kazimir Malevich', 29: 'Leonardo da Vinci', 30: 'Marc Chagall', 31: 'Michelangelo', 32: 'Mikhail Vrubel', 33: 'Pablo Picasso', 34: 'Paul Cezanne', 35: 'Paul Gauguin', 36: 'Paul Klee', 37: 'Peter Paul Rubens', 38: 'Pierre-Auguste Renoir', 39: 'Piet Mondrian', 40: 'Pieter Bruegel', 41: 'Raphael', 42: 'Rembrandt', 43: 'Rene Magritte', 44: 'Salvador Dali', 45: 'Sandro Botticelli', 46: 'Titian', 47: '

In [2]:
class_weights = class_weight.compute_class_weight('balanced', classes=pd.unique(y_train), y=y_train)
class_weights_dict = {i: class_weights[i] for i in range(len(class_weights))}

base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(244,244,3), input_tensor=None, pooling=None)

x = GlobalAveragePooling2D()(base_model.output)
predictions = Dense(50, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

LearningRate = 1e-3

model.compile(optimizer=SGD(learning_rate=LearningRate, momentum=0.9, nesterov=True), 
              loss='categorical_crossentropy',
              metrics=['acc'])

CP = ModelCheckpoint(filepath='model/' +
                     'ResNet50-Sigmoid-{epoch:03d}-{loss:.4f}-{val_loss:.4f}.hdf5',
                     monitor='val_loss', verbose=1, save_best_only=True, mode='min')

LR = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1, min_lr=0.00005)
CALLBACK = [CP, LR]


DATAGEN_TRAIN = ImageDataGenerator(
    rescale=1/255.0,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    data_format="channels_last",
    validation_split=0.10,
)


TRAIN_GENERATOR = DATAGEN_TRAIN.flow_from_dataframe(
    dataframe=X_train, x_col='img_path', y_col='artist',
    target_size=(244, 244), 
    class_mode='categorical',
    batch_size=64, shuffle=True,
    subset="training",
    preprocessing_function=cutout,
)

VALID_GENERATOR = DATAGEN_TRAIN.flow_from_dataframe(
    dataframe=X_train, x_col='img_path', y_col='artist',
    target_size=(244, 244), 
    class_mode='categorical',
    batch_size=64, shuffle=True,
    subset="validation",
    preprocessing_function=cutout,
)

Found 4788 validated image filenames belonging to 50 classes.
Found 531 validated image filenames belonging to 50 classes.


In [3]:
history = model.fit(
    TRAIN_GENERATOR,
    epochs=50,
    callbacks=CALLBACK,
    shuffle=True,
    validation_data=VALID_GENERATOR,
    class_weight=class_weights_dict
)

Train for 75 steps, validate for 9 steps
Epoch 1/50
Epoch 00001: val_loss improved from inf to 5.52397, saving model to model/ResNet50-Sigmoid-001-3.2223-5.5240.hdf5
Epoch 2/50
Epoch 00002: val_loss did not improve from 5.52397
Epoch 3/50
Epoch 00003: val_loss did not improve from 5.52397
Epoch 4/50
Epoch 00004: val_loss did not improve from 5.52397

Epoch 00004: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 5/50
Epoch 00005: val_loss did not improve from 5.52397
Epoch 6/50
Epoch 00006: val_loss did not improve from 5.52397
Epoch 7/50
Epoch 00007: val_loss did not improve from 5.52397

Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 8/50
Epoch 00008: val_loss improved from 5.52397 to 5.48965, saving model to model/ResNet50-Sigmoid-008-0.8155-5.4896.hdf5
Epoch 9/50
Epoch 00009: val_loss improved from 5.48965 to 5.30172, saving model to model/ResNet50-Sigmoid-009-0.7748-5.3017.hdf5
Epoch 10/50
Epoch 00010: val_loss improved

In [4]:
X_test = pd.read_csv("test.csv")

DATAGEN_TEST = ImageDataGenerator(
    rescale=1./255,
    data_format="channels_last"
)

TEST_GENERATOR = DATAGEN_TEST.flow_from_dataframe(
    dataframe=X_test,
    x_col='img_path',
    y_col=None,
    target_size=(244, 244),
    color_mode='rgb',
    class_mode=None,
    batch_size=64,
    shuffle=False
)

TEST_Prediction = model.predict(TEST_GENERATOR, verbose=1)

Found 12670 validated image filenames.


In [5]:
artist_num = np.argmax(TEST_Prediction, axis=1)
num_list = pd.DataFrame(artist_num, columns=['artist'])
num_list.to_csv("ansRN50.csv", index=False)
df_ans = pd.read_csv("ansRN50.csv")

#df_ans(예측한 숫자가 담긴 파일)를 받아와서 처음에 만들었던 label_encoder 숫자를 보고 숫자를 작가로 바꾼다 
artist_name = []
for i in num_list['artist']:
    artist_name.append(artist_test_dic[i])

last_ans = pd.read_csv("sample_submission.csv")

last_ans.rename(columns={'artist':'artist_name'}, inplace=True)

last_ans['artist'] = artist_name
last_ans.drop('artist_name', axis=1, inplace=True)

last_ans.to_csv("ResNet50_50.csv", index=False)