In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

# import numpy as np # linear algebra
# import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import PIL
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import Conv2D,MaxPooling2D,Dense,Dropout,BatchNormalization,Flatten
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
os.listdir('../input/imet-2020-fgvc7')

In [None]:
train_data=pd.read_csv('../input/imet-2020-fgvc7/train.csv')
labels_data=pd.read_csv('../input/imet-2020-fgvc7/labels.csv')
sample_submission=pd.read_csv('../input/imet-2020-fgvc7/sample_submission.csv')

In [None]:
train_data.info()

In [None]:
train_data.head

In [None]:
train_data.columns

In [None]:
train_data.head(3)

In [None]:
sample_submission.head

In [None]:
sample_submission.info

In [None]:
train_data['id'] += '.png'
sample_submission['id']+= '.png'

In [None]:
train_data['attribute_ids']=train_data['attribute_ids'].apply(lambda x: x.split())

In [None]:
train_data.head(5)

**Image Preprocessing**
Image Data generator provides the easy way to augment your images

In [None]:
train_datagen=tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255,
                                 shear_range=0.2,
                                 zoom_range=0.2,
                                 horizontal_flip=True,
                                 validation_split=0.2,                             
                                 fill_mode='nearest'                             
                                    )

test_datagen=tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

In [None]:
batch_size=32


**Flow from dataframe is a method in ImageDataGenerator class that allows you to directly augment images by reading its name and target value from dataframe**

In [None]:
train_ds=train_datagen.flow_from_dataframe(dataframe=train_data,
                                          directory="/kaggle/input/imet-2020-fgvc7/train",
                                          x_col='id',
                                          y_col='attribute_ids',
                                          class_mode='categorical',
                                          subset='training',
                                          seed=123,
                                          shuffle=True,
                                          batch_size=batch_size,
                                          target_size=(128,128)
                                          )


In [None]:
valid_ds=train_datagen.flow_from_dataframe(dataframe=train_data,
                                          directory="/kaggle/input/imet-2020-fgvc7/train",
                                          x_col='id',
                                          y_col='attribute_ids',
                                          class_mode='categorical',
                                          subset='validation',
                                          seed=123,
                                          shuflle=True, 
                                          batch_size=batch_size,
                                          target_size=(128,128)
                                          )

In [None]:
test_ds=test_datagen.flow_from_dataframe(dataframe=sample_submission,
                                        directory="/kaggle/input/imet-2020-fgvc7/test",
                                        x_col='id',
                                        batch_size=batch_size,
                                        shuffle=False,
                                        class_mode=None,
                                        target_size=(128,128))

In [None]:
for image_batch,labels_batch in train_ds:
    print(image_batch.shape)
    print(labels_batch.shape)
    break

In [None]:
input_shape=(128,128,3)

model=Sequential()

model.add(Conv2D(16,3,padding='same',input_shape=input_shape,activation='relu'))
model.add(MaxPooling2D())
model.add(Conv2D(32,3 ,padding='same',activation='relu'))
model.add(MaxPooling2D())
model.add(Conv2D(64,3 ,padding='same',activation='relu'))
model.add(MaxPooling2D())
model.add(Dropout(0.2))
model.add(Dense(128,activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(512,activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(3471,activation='sigmoid'))

model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [None]:
model.summary()

**Training the model**

In [None]:
type(train_ds)

In [None]:
epochs=10
history=model.fit(train_ds,epochs=epochs,steps_per_epoch=200,
                            validation_data=valid_ds,validation_steps=80,
                            verbose=1,callbacks=None,
                           use_multiprocessing=False)

In [None]:
accuracy=history.history['accuracy']
val_accuracy=history.history['val_accuracy']


loss=history.history['loss']
val_loss=history.history['val_loss']

epochs_range=range(epochs)


plt.figure(figsize=(8,8))
plt.subplot(1,2,1)
plt.plot(epochs_range,accuracy,label='Training Accuracy')
plt.plot(epochs_range,val_accuracy,label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and validation accuracy')



plt.subplot(1,2,2)
plt.plot(epochs_range,loss,label='Training Loss')
plt.plot(epochs_range,val_loss,label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and validation loss')






In [None]:
predictions=model.predict(test_ds,verbose=1)

In [None]:
pred_boolean=(predictions>0.2)

result=[]

labels=train_ds.class_indices

labels=dict((x,y) for y,x in labels.items())

for i in pred_boolean:
    list_labels=[]
    for j,k in enumerate(i):
        if k:
            list_labels.append(labels[j])
    result.append( " ".join(list_labels))

    
imagenames=test_ds.filenames

submission=pd.DataFrame({"id":imagenames,"attribute_ids":result})



In [None]:
submission.head(5)

In [None]:
submission['id']=submission['id'].apply( lambda x: x.split('.')[0])

In [None]:
submission.head(5)

In [None]:
submission.to_csv('submission.csv',index=False)