In [None]:
import pandas as pd

In [None]:
df_labels = pd.read_csv('../input/dog-breed-identification/labels.csv')
df_labels.head()

In [None]:
df_labels.shape
# train 파일 총 10222개 

In [None]:
# train에 breed의 분포
df_labels['breed'].value_counts()

In [None]:
# train에 breed가 120개 다 존재하는 지 check
label_name = df_labels['breed'].sort_values().unique()

In [None]:
len(label_name)

In [None]:
df_labels.head()

In [None]:
# breed를 label encoding
# LabelEncoder 객체 생성 -> fit (기준정보) -> transform(변환)

from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()  # 객체 생성
encoder.fit(df_labels['breed'])  # 기준정보
df_labels['breed']= encoder.transform(df_labels['breed']) # 변환


In [None]:
df_labels.head()

In [None]:
print(encoder.classes_)
print(encoder.inverse_transform([1,2]))

In [None]:
# df_labels['id'] ==> 000bec180eb18c7604dcecc8fe0dba07.jpg 실습해주세요

df_labels['id'] = df_labels['id'] + '.jpg'
df_labels.head()

In [None]:
# 전략
# cnn , 데이터 증식, flow_from_dataframe

from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator ( 
    rescale =1/255.,
    rotation_range = 20,
    width_shift_range = 0.1,
    height_shift_range =0.1,
    shear_range = 0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode = 'nearest',
    validation_split=0.2
)

val_datagen = ImageDataGenerator ( 
    rescale=1/255. ,
    validation_split=0.2
)


In [None]:
# TypeError: If class_mode="categorical", 
# y_col="breed" column values must be type string, list or tuple.
# [실습]

df_labels['breed'] = df_labels['breed'].astype(str)
df_labels.info()

In [None]:
train_dir = '../input/dog-breed-identification/train'

In [None]:
# 어디에서, 소스, 타겟, 타겟의 class_mode, batch_size
train_generator = train_datagen.flow_from_dataframe (
    dataframe = df_labels,
    directory = train_dir,
    x_col = 'id' ,      # filename
    y_col = 'breed',       # class
    target_size = (150,150),
    batch_size = 128,
    class_mode = 'categorical',    # 예. cats&dog: 'binary'
    subset = 'training',
    seed = 7   
)

val_generator = val_datagen.flow_from_dataframe (
    dataframe = df_labels,
    directory = train_dir,
    x_col = 'id' ,      # filename
    y_col = 'breed',       # class
    target_size = (150,150),
    batch_size = 128,
    class_mode = 'categorical',    # 예. cats&dog: 'binary'
    subset = 'validation',
    seed = 7   
)

In [None]:
df_labels.head()

0. Sequential 객체 생성
1. conv layer ( filter 32, kernel size (3,3), activation 'relu' , input_shape = ???? )
2. pooling layer ( pool_size ( 2,2 ))
3. conv layer ( filter 64, kernel size (3,3), activation 'relu')
4. pooling layer ( pool_size ( 2,2 ))
5. conv layer ( filter 128, kernel size (3,3), activation 'relu')
6. pooling layer ( pool_size ( 2,2 ))
7. conv layer ( filter 128, kernel size (3,3), activation 'relu')
8. pooling layer ( pool_size ( 2,2 ))
9. flatten layer
9. Dropout (0.5)
10. Dense layer 출력 뉴런 수 512개, 활성화 함수 'relu'
11. Dense layer 출력 뉴런 수 ?????, 활성화 함수'????'

In [None]:
from tensorflow.keras import layers, models, optimizers

model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu',input_shape=(150,150,3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dropout(0.5))
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(120, activation='softmax'))

In [None]:
model.summary()

In [None]:
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])
history = model.fit(
    train_generator,    
    epochs = 3,     
    steps_per_epoch=len(train_generator) - 1, # batch_size 128
    validation_data=val_generator,
    validation_steps = len(val_generator) - 1    
)

In [None]:
## test data generate

base_dir = '../input/dog-breed-identification'

test_datagen = ImageDataGenerator ( 
    rescale=1/255. 
)

test_generator = test_datagen.flow_from_directory (
    base_dir,
    target_size = (150,150),
    batch_size = 128,
    classes=['test']
)

In [None]:
# predict
preds = model.predict(test_generator, batch_size=len(test_generator)-1)

In [None]:
df_submission = pd.read_csv("../input/dog-breed-identification/sample_submission.csv")
df_submission.head()

In [None]:
preds.shape

In [None]:
label_name = df_labels['breed'].sort_values().unique()

In [None]:
class_to_num = dict(zip(label_name, range(120)))  # affenpinscher : 0
for name in label_name:  
    df_submission[name] = preds[:,class_to_num[name]]

In [None]:
df_submission.head()

In [None]:
df_submission.to_csv('submission_dog_cnn.csv', index=False)