In [None]:
#Necessary imports
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.python.keras.applications.vgg19 import VGG19
from tensorflow.python.keras.models import Sequential, Model
from tensorflow.python.keras.layers import Dense, Flatten, GlobalAveragePooling2D
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

In [None]:
#Storing dataset containg names into a dataframe
df=pd.read_csv("../input/train_v2.csv")
df['tags']=df['tags'].map(lambda x: x.split(' '))

In [None]:
#Seventeen labels stored in list “classes”
classes=['haze','primary','agriculture','clear','water','road','habitation','cultivation',
'slash_burn','cloudy','partly_cloudy','blow_down','conventional_mine','artisinal_mine',
'blooming','selective_logging','bare_ground']
new_df=pd.DataFrame(0,columns=classes,index=range(40479))

In [None]:
#Creation of concatenated dataframe
concat_df=pd.concat([df,new_df], axis=1,join='inner')
concat_df.head()
l=0
for i in concat_df['tags']:
    for k in i:
        concat_df.loc[l,k]=1
    l=l+1

In [None]:
#Splitting labels into train and validation sets
train_labels,validation_labels=train_test_split(concat_df,test_size=0.2)
file_path="../input/train-jpg"
trainfile_paths=[]
validation_paths=[]
for i in train_labels['image_name']:
    trainfile_paths.append(file_path+"/"+i+".jpg")
for i in validation_labels['image_name']:
    validation_paths.append(file_path+"/"+i+".jpg")

In [None]:
#Getting generators
def get_data_iter(data_gen, img_size, labels_df, img_list, batch_size, shuffle):
    generator = data_gen.flow_from_directory(directory = file_path, target_size=(img_size,img_size),
                                            class_mode = 'sparse', batch_size=batch_size, shuffle=shuffle)
    generator.filenames = img_list
    generator.classes = labels_df.iloc[:,2:].values
    generator.samples = labels_df.iloc[:,2:].values.shape[0]
    generator.n = labels_df.iloc[:,2:].values.shape[0]
    generator.directory = ''
    generator._set_index_array()
    return generator

data_generator_aug = ImageDataGenerator(rescale = 1./255, vertical_flip = True,
                                        horizontal_flip = True, height_shift_range = 0.1,
                                        width_shift_range = 0.1, rotation_range =10)
data_generator_no_aug = ImageDataGenerator(rescale = 1./255)
img_size = 64

In [None]:
#Creating train and validation generators
train_gen = get_data_iter(data_generator_aug,img_size,train_labels,trainfile_paths,64,shuffle=False)
valid_gen = get_data_iter(data_generator_no_aug,img_size,validation_labels,validation_paths,128,shuffle=False)
imgs,label_values= next(train_gen)

In [None]:
#Creating a new model
my_new_model=Sequential()
vgg_weights_path = '../input/vgg19-weights/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5'

In [None]:
#Defining model layers
vgg19 = VGG19(include_top=False,input_shape=(img_size,img_size,3), weights=vgg_weights_path)
base_model = vgg19
x = base_model.output
x = Flatten()(x)
predictions = Dense(17, activation = 'sigmoid')(x)
my_new_model = Model(inputs = base_model.input, outputs = predictions)
for i in range(0,17) :
    my_new_model.layers[i].trainable = False
for i, layer in enumerate(my_new_model.layers):
    print(i, layer.name, layer.trainable)

In [None]:
#Generating F2 score
def f2_score(y_true, y_pred):
    y_true = tf.cast(y_true, "int32")
    y_pred = tf.cast(tf.round(y_pred), "int32") # implicit 0.5 threshold via tf.round
    y_correct = y_true * y_pred
    sum_true = tf.reduce_sum(y_true, axis=1)
    sum_pred = tf.reduce_sum(y_pred, axis=1)
    sum_correct = tf.reduce_sum(y_correct, axis=1)
    precision = sum_correct / sum_pred
    recall = sum_correct / sum_true
    f_score = 5 * precision * recall / (4 * precision + recall)
    f_score = tf.where(tf.is_nan(f_score), tf.zeros_like(f_score), f_score)
    return tf.reduce_mean(f_score)

In [None]:
#Compiling the model
my_new_model.compile(optimizer='adam',loss='binary_crossentropy', metrics =[f2_score])

In [None]:
#Fitting the model
my_new_model.fit_generator(train_gen,steps_per_epoch=506,epochs=20,verbose=1,
                            validation_data=valid_gen,validation_steps=127,workers=4)

In [None]:
#Checking on test file
test_dir_path = '..input/test-jpg-v2'
test_images_names = os.listdir(test_dir_path)
test_images_paths = [os.path.join(test_dir_path, img_name) for img_name in test_images_names]
test_data_gen = ImageDataGenerator(rescale = 1./255)
test_generator = data_gen.flow_from_directory(directory=file_path,target_size=(img_size,img_size),
                                            class_mode='sparse',batch_size=batch_size,shuffle=shuffle)
test_generator.filenames = test_images_paths
test_generator.n = len(test_images_paths)
test_generator.directory = ''
test_generator._set_index_array()
predictions = my_new_model.predict_generator(test_generator, verbose = 1)
predictions = np.rint(predictions)

In [None]:
#Creation of a csv file containg labels for test set images
test_images_series = pd.Series([test_image_name.split('.')[0] for test_image_name in test_images_names])
print(test_images_series.head())