In [None]:
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization, Conv2D, MaxPooling2D, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2 
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import SGD, Adam, RMSprop
from tensorflow.keras.models import Model
from IPython.display import clear_output
from tensorflow.keras import optimizers
from keras_preprocessing import image
from keras.models import Sequential
import matplotlib.pyplot as plt  
from tqdm.notebook import tqdm
from pandas import read_csv
from os import walk
import numpy as np
import pandas as pd
import cv2

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)

clear_output()

In [None]:
train_data=pd.read_csv('/kaggle/input/imet-2020-fgvc7/train.csv')
labels_data=pd.read_csv('/kaggle/input/imet-2020-fgvc7/labels.csv')
sample_submission=pd.read_csv('/kaggle/input/imet-2020-fgvc7/sample_submission.csv')

In [None]:
train_data['id'] += '.png'
sample_submission['id'] += '.png'

In [None]:
# !unzip /content/drive/MyDrive/ML/imet-2020-fgvc7.zip -d /content/drive/MyDrive/ML/Data/
# clear_output()

In [None]:
# !ls /content/drive/MyDrive/ML/Data/train | wc -l

In [None]:
train_data['attribute_ids'] = train_data['attribute_ids'].apply(lambda x: x.split())

In [None]:
datagen = ImageDataGenerator(rescale=1./255)

In [None]:
batch_size = 64
size = 32 
input_shape = (size,size,3)

In [None]:
train_ds=datagen.flow_from_dataframe(dataframe=train_data,
                                          directory="/kaggle/input/imet-2020-fgvc7/train",
                                          x_col='id',
                                          y_col='attribute_ids',
                                          class_mode='categorical',
                                          subset='training',
                                          shuffle=True,
                                          batch_size=batch_size,
                                          target_size=(size,size)
                                          )

In [None]:
valid_ds=datagen.flow_from_dataframe(dataframe=train_data,
                                          directory="/kaggle/input/imet-2020-fgvc7/train",
                                          x_col='id',
                                          y_col='attribute_ids',
                                          class_mode='categorical',
                                          subset='validation',
                                          shuffle=True, 
                                          batch_size=batch_size,
                                          target_size=(size,size)
                                          )

In [None]:
test_ds=datagen.flow_from_dataframe(dataframe=sample_submission,
                                        directory="/kaggle/input/imet-2020-fgvc7/test",
                                        x_col='id',
                                        batch_size=batch_size,
                                        shuffle=False,
                                        class_mode=None,
                                        target_size=(size,size))

In [None]:
for image_batch,labels_batch in train_ds:
    print(image_batch.shape)
    print(labels_batch.shape)
    break

In [None]:
model_2 = Sequential()

model_2.add(Conv2D(16,3 ,padding='same',input_shape=input_shape,activation='relu'))
model_2.add(MaxPooling2D())
model_2.add(Conv2D(32,3 ,padding='same',activation='relu'))
model_2.add(MaxPooling2D())
model_2.add(Conv2D(64,3 ,padding='same',activation='relu'))
model_2.add(MaxPooling2D())
model_2.add(Dropout(0.2))
model_2.add(Dense(512,activation='relu'))
model_2.add(BatchNormalization())
model_2.add(Dropout(0.2))
model_2.add(Flatten())
model_2.add(Dense(1024,activation='relu'))
model_2.add(BatchNormalization())
model_2.add(Dropout(0.2))
model_2.add(Dense(3471,activation='sigmoid'))


base_learning_rate = 0.001
adam = Adam(learning_rate=base_learning_rate)

model_2.compile(optimizer=adam,loss='binary_crossentropy',metrics=['accuracy'])

model_2.summary()

Old Training Model

In [None]:
# epochs=10
# history=model_2.fit(train_ds,
#                   epochs = epochs,
#                   steps_per_epoch = 100,
#                   validation_data = valid_ds,
#                   validation_steps = 100,
#                   verbose = 1,
#                   callbacks = None,
#                   use_multiprocessing = True)

Latest

In [None]:
epochs=5
history=model_2.fit(train_ds,
                  epochs = epochs,
                  steps_per_epoch = 1000,
                  validation_data = valid_ds,
                  validation_steps = 100,
                  verbose = 1,
                  callbacks = None,
                  use_multiprocessing = True)

In [None]:
accuracy = history.history['accuracy']
# val_accuracy = history.history['val_accuracy']

loss = history.history['loss']
# val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize = (8,8))
plt.subplot(1,2,1)
plt.plot(epochs_range,accuracy,label = 'Training Accuracy')
# plt.plot(epochs_range,val_accuracy,label = 'Validation Accuracy')
plt.legend(loc = 'lower right')
plt.title('Training and validation accuracy')

plt.subplot(1,2,2)
plt.plot(epochs_range,loss,label = 'Training Loss')
# plt.plot(epochs_range,val_loss,label = 'Validation Loss')
plt.legend(loc = 'upper right')
plt.title('Training and validation loss')

In [None]:
predictions = model_2.predict(test_ds,verbose = 1)

In [None]:
pred_boolean = (predictions > 0.2)

result = []

labels = train_ds.class_indices

labels = dict((x,y) for y,x in labels.items())

for i in pred_boolean:
    list_labels = []
    for j,k in enumerate(i):
        if k:
            list_labels.append(labels[j])
    result.append( " ".join(list_labels))

    
imagenames = test_ds.filenames

submission = pd.DataFrame({"id":imagenames,"attribute_ids":result})
submission.to_csv('submission.csv', index = False)
submission.head()

In [None]:
submission = pd.DataFrame({"id":imagenames,"attribute_ids":result})
submission['id']=submission['id'].apply( lambda x: x.split('.')[0])
submission.to_csv('submission.csv', index = False)
submission.head()

ใช้ f1 score: from sklearn.metrics import f1_score | f1_score(y_true, y_pred, average='macro')
 1. สร้าง list ที่เก็บคำตอบจริง ๆทั้งหมดของทุก data ไว้
 2. สร้าง list ที่เก็บคำ predict label จากข้อมูลใน sample submission

   2.1 ใช้ for ดึงชื่อรูปจากไฟล์ sample submission 

   2.2 นำชื่อรูปไปดึงรูปใน Dataset

   2.3 นำข้อมูลรูปที่ได้ไป predict