In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
import pandas as pd

In [2]:
sample_submission = pd.read_csv('../input/planets-dataset/planet/planet/sample_submission.csv')
sample_submission.head(2)

In [3]:
train_file = pd.read_csv('../input/planets-dataset/planet/planet/train_classes.csv', dtype = str)
train_file.head()

In [4]:
from skimage import io
%matplotlib inline

In [5]:
sample_image = io.imread('../input/planets-dataset/planet/planet/train-jpg/train_20.jpg')
sample_image.shape

In [6]:
plt.imshow(sample_image)

In [7]:
train_file[train_file['image_name'] == 'train_20']

In [8]:
unique_labels = set()
def append_labels(tags):
    for tag in tags.split():
        unique_labels.add(tag)
        
train_df = train_file.copy()
train_df['tags'].apply(append_labels)
unique_labels = list(unique_labels)
print(unique_labels)

In [9]:
assert len(train_df['image_name'].unique()) == train_df.shape[0]

In [10]:
for tag in unique_labels:
    train_df[tag] = train_df['tags'].apply(lambda x: 1 if tag in x.split() else 0)
train_df.head()

In [11]:
train_df['image_name'] = train_df['image_name'].apply(lambda x: '{}.jpg'.format(x))
train_df.head()

In [12]:
labels = list(train_df.columns[2:])
labels

In [13]:
from tensorflow import keras
from keras.preprocessing.image import ImageDataGenerator

In [14]:
image_gen = ImageDataGenerator(rescale = 1/255, validation_split = 0.20)

In [15]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Conv2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [16]:
def fbeta(ytrue, ypred, beta = 2, epsilon = 1e-4):
    beta_sqd = beta**2
    ytrue = tf.cast(ytrue, tf.float32)
    ypred = tf.cast(tf.greater(tf.cast(ypred, tf.float32), tf.constant(0.5)), tf.float32)
    
    true_pos = tf.reduce_sum(ytrue * ypred, axis = 1)
    false_pos = tf.reduce_sum(ypred, axis = 1) - true_pos
    false_neg = tf.reduce_sum(ytrue, axis = 1) - true_pos
    
    precision = true_pos / (true_pos + false_pos + epsilon)
    recall = true_pos / (true_pos + false_neg + epsilon)
    
    fb = (1 + beta_sqd) * precision * recall / (beta_sqd * precision + recall + epsilon)
    return fb

In [17]:
def multi_label_acc(ytrue, ypred, epsilon = 1e-4):
    ytrue = tf.cast(ytrue, tf.float32)
    ypred = tf.cast(tf.greater(tf.cast(ypred, tf.float32), tf.constant(0.5)), tf.float32)
    
    true_pos = tf.reduce_sum(ytrue * ypred, axis = 1)
    false_pos = tf.reduce_sum(ypred, axis = 1) - true_pos
    false_neg = tf.reduce_sum(ytrue, axis = 1) - true_pos
    
    ytrue = tf.cast(ytrue, tf.bool)
    ypred = tf.cast(ypred, tf.bool)
    
    true_neg = tf.reduce_sum(tf.cast(tf.logical_not(ytrue), tf.float32) * tf.cast(tf.logical_not(ypred), tf.float32), \
                             axis = 1)
    
    mla = (true_pos + true_neg) / (true_pos + true_neg + false_pos + false_neg + epsilon)
    return mla

In [18]:
def build_model():
    model = Sequential()
    model.add(Conv2D(filters = 128, kernel_size = 3, input_shape = (128, 128, 3)))
    model.add(Flatten())
    model.add(Dense(17, activation = 'sigmoid'))
    opt = Adam(lr=1e-2)
    model.compile(loss = 'binary_crossentropy', optimizer = opt, metrics = [multi_label_acc, fbeta])
    
    return model

In [19]:
save_best_checkpoint = ModelCheckpoint(filepath = 'best_model.hdf5', monitor = 'val_fbeta', \
                                      mode = 'max', save_best_only = True, save_weights_only = True)

In [20]:
train_gen = image_gen.flow_from_dataframe(dataframe = train_df, \
                                         directory = '../input/planets-dataset/planet/planet/train-jpg', x_col = 'image_name', y_col = labels, \
                                         target_size = (128,128), class_mode = 'raw', seed = 2, batch_size = 128, subset = 'training')

val_gen = image_gen.flow_from_dataframe(dataframe = train_df, \
                                         directory = '../input/planets-dataset/planet/planet/train-jpg', x_col = 'image_name', y_col = labels, \
                                         target_size = (128,128), class_mode = 'raw', seed = 2, batch_size = 128, subset = 'validation')


In [21]:

step_train_size = int(np.ceil(train_gen.samples / train_gen.batch_size))

step_val_size = int(np.ceil(val_gen.samples / val_gen.batch_size))

In [22]:
model1 = build_model()

model1.fit(x = train_gen, steps_per_epoch = step_train_size, validation_data = val_gen, validation_steps = step_val_size,
          epochs = 12, callbacks = [save_best_checkpoint])

In [23]:
model2 = build_model()

model2.load_weights('best_model.hdf5')

In [24]:
sample_submission_df = sample_submission.copy()
sample_submission_df['image_name'] = sample_submission_df['image_name'].apply(lambda x: '{}.jpg'.format(x))
len(sample_submission_df)

In [25]:
test1_df = sample_submission_df.iloc[:40669]['image_name'].reset_index()
test1_df.drop('index', axis = 1)
test1_df.head(2)

In [26]:
test1_gen = image_gen.flow_from_dataframe(dataframe = test1_df, \
                                         directory = '../input/planets-dataset/planet/planet/test-jpg', x_col = 'image_name', y_col = None, \
                                         target_size = (128,128), class_mode = None, shuffle = False, batch_size = 128)

In [27]:
step_test1_size = int(np.ceil(test1_gen.samples / test1_gen.batch_size))

In [28]:
test1_gen.reset()
pred1 = model2.predict(test1_gen, steps = step_test1_size, verbose = 1)

In [29]:
test1_file_names = test1_gen.filenames

pred1_tags = pd.DataFrame(pred1)
pred1_tags = pred1_tags.apply(lambda x: ' '.join(np.array(unique_labels)[x > 0.5]), axis = 1)

In [30]:
result1 = pd.DataFrame({'image_name': test1_file_names, 'tags': pred1_tags})
result1.head(2)

In [31]:
test2_df = sample_submission_df.iloc[40669:]['image_name'].reset_index()
test2_df.drop('index', axis = 1)
test2_df.head(2)

In [32]:
test2_gen = image_gen.flow_from_dataframe(dataframe = test2_df, \
                                         directory = '../input/planets-dataset/test-jpg-additional/test-jpg-additional', x_col = 'image_name', y_col = None, \
                                         target_size = (128,128), class_mode = None, shuffle = False, batch_size = 128)


step_test2_size = int(np.ceil(test2_gen.samples / test2_gen.batch_size))

test2_gen.reset()
pred2 = model2.predict(test2_gen, steps = step_test2_size, verbose = 1)

test2_file_names = test2_gen.filenames

pred2_tags = pd.DataFrame(pred2)
pred2_tags = pred2_tags.apply(lambda x: ' '.join(np.array(unique_labels)[x > 0.5]), axis = 1)

result2 = pd.DataFrame({'image_name': test2_file_names, 'tags': pred2_tags})
result2.head(2)

In [33]:
final_result = pd.concat([result1, result2])

final_result = final_result.reset_index().drop('index', axis = 1)

final_result.head(2)

In [34]:
assert sum(sample_submission_df['image_name'] == final_result['image_name']) == 61191

In [35]:

final_result['image_name'] = final_result['image_name'].apply(lambda x: x[:-4])

In [36]:
final_result.to_csv('Planet Submission.csv', index = False)