In [None]:
import numpy as np
import pandas as pd

In [None]:
!activate tensorflow

In [None]:
!pwd

In [None]:
!python --version

In [None]:
!ls -la

In [None]:
!ls -la ./data/train-jpg/*.jpg | wc -l

In [None]:
df_labels = pd.read_csv('./data/train-csv/train.csv')
df_labels.head(10)

In [None]:
labels = df_labels.tags.values
labels

In [None]:
labels_list = []
for lbl in labels:
    labels_list.extend(lbl.split(' '))
labels_set = set(labels_list)
labels_set

In [None]:
for i, label in enumerate(labels_set):
    print('label:%s  %d/%d' %(label, i+1, len(labels_set)))
    %time df_labels[label] = df_labels.tags.apply(lambda x : 1 if label in x.split(' ') else 0)


In [None]:
df_labels

In [None]:
df = df_labels.tags.str.get_dummies(' ')
df.insert(0, 'image_name', df_labels.image_name)
df.head(10)

In [None]:
%matplotlib inline

In [None]:
df[list(labels_set)].sum().sort_values()

In [None]:
df[list(labels_set)].sum().sort_values().plot(kind='bar')

In [None]:
ordered_labels = df[list(labels_set)].sum().sort_values(ascending=False).index
type(ordered_labels)

In [None]:
df[ordered_labels].corr()

In [None]:
import seaborn as sns

In [None]:
df_occurence = df[ordered_labels].T.dot(df[ordered_labels])
df_occurence

In [None]:
sns.heatmap(df_occurence)

In [None]:
df_percentage = df[ordered_labels].sum() / df[ordered_labels].count() * 100
df_percentage

In [None]:
df_percentage.plot(kind='bar')

In [None]:
TRAIN_PATH = './data/train-jpg/'

In [None]:
import os

In [None]:
from PIL import Image
import random
import matplotlib.pyplot as plt

In [None]:
def plot_graph(label):

    images = df[df[label] == 1].image_name.values

    fig , ax = plt.subplots(nrows=3, ncols=3, figsize=(8,8))
    ax = ax.flatten()

    for i in range(0,9):
        f = random.choice(images)
        img = Image.open(os.path.join(TRAIN_PATH, f + '.jpg'))
        ax[i].imshow(img)
        ax[i].set_xticks([])
        ax[i].set_yticks([])
        ax[i].set_title("%s h:%s w:%s" % (f, img.height,img.width))
    plt.tight_layout()

In [None]:
%time plot_graph('primary')

In [None]:
%time plot_graph('clear')

In [None]:
%time plot_graph('agriculture')

In [None]:
%time plot_graph('habitation')

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
df_train, df_test, df_y_train, df_y_test = train_test_split(df.image_name, df[ordered_labels], test_size=60, random_state=42)

In [None]:
df_train = pd.concat([df_train, df_y_train], axis = 1)

In [None]:
df_train.set_index(np.arange(df_train.shape[0]), inplace=True)

In [None]:
df_train = df_train.reindex(index=np.arange(df_train.shape[0]))

In [None]:
df_test = pd.concat([df_test, df_y_test], axis = 1)

In [None]:
df_test.set_index(np.arange(df_test.shape[0]), inplace=True)

In [None]:
%time df_train['image_name_w_ext'] = df_train['image_name'] + '.jpg'

In [None]:
%time df_test['image_name_w_ext'] =  df_test['image_name'] + '.jpg'

In [None]:
#if not 'image_name_w_ext' in df_train.columns:
#    %time df_train = df_train[['image_name', 'image_name_w_ext'] + list(df_train.columns.values[1:-1])]
#    %time df_test = df_test[['image_name', 'image_name_w_ext'] + list(df_test.columns.values[1:-1])]
#df_train.head(10)

In [None]:
#for f in df_train.image_name_w_ext:
#    if os.path.exists(os.path.join(TRAIN_PATH, f)) == False:
#        print("%s is missing" % image_name)

In [None]:
%time shapes = [Image.open(os.path.join(TRAIN_PATH, f)).size for f in df_train.image_name_w_ext[:1000]]

In [None]:
%time pd.Series(shapes).value_counts()

In [None]:
MINI_BATCH_SIZE = 9
NB_MINI_BATCH = df_train.shape[0] / MINI_BATCH_SIZE
SHUFFLE = False
BATCH_SIZE = df_train.shape[0]
#NB_CATEGORIES = len(ordered_labels)

In [None]:
%time idx = np.random.permutation(len(df_train)) if SHUFFLE == True else np.arange(BATCH_SIZE)
idx

In [None]:
%time batches = np.array_split(idx, NB_MINI_BATCH)

In [None]:
df_train

## Build Graph

In [None]:
import tensorflow as tf
print(tf.__version__)

In [None]:
tf.reset_default_graph()

In [None]:
config = tf.ConfigProto()

In [None]:
config.log_device_placement = True

In [None]:
sess = tf.InteractiveSession(config=config)

In [None]:
X = tf.placeholder(dtype=tf.float32,shape=(None, 128,128,3), name="X")
y = tf.placeholder(dtype=tf.float32,shape=(None,17), name="y")

In [None]:
with tf.name_scope('CONV1'):
    W_conv1 = tf.get_variable(name="W_conv1", shape=[5,5,3,64], dtype=tf.float32, initializer= tf.contrib.layers.xavier_initializer())
    b_conv1 = tf.get_variable(name="b_conv1", shape=[64], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())
    
    conv1 = tf.nn.conv2d(
        input=X,
        filter=W_conv1,
        strides=[1,1,1,1],
        padding='SAME',
        name='conv1'
    ) #64 * 64 * 32

    relu1 = tf.nn.elu(
        features = conv1 + b_conv1,
        name='relu1'
    ) #64 * 64 * 32

    pool1 = tf.nn.max_pool(
        value = relu1,
        ksize=[1,2,2,1],
        strides=[1,2,2,1],
        padding='SAME',
        name='pool1'
    ) #32 * 32 * 32
    
    tf.summary.histogram('W_conv1', W_conv1)

In [None]:
with tf.name_scope('CONV2'):
    W_conv2 = tf.get_variable(name="W_conv2", shape=[5,5,64,64], dtype=tf.float32, initializer= tf.contrib.layers.xavier_initializer())
    b_conv2 = tf.get_variable(name="b_conv2", shape=[64], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())

    conv2 = tf.nn.conv2d(
        input=pool1,
        filter=W_conv2,
        strides=[1,1,1,1],
        padding='SAME',
        name='conv2'
    ) #32 * 32 * 64

    relu2 = tf.nn.elu(
        features = conv2 + b_conv2,
        name='relu2'
    ) #32 * 32 *64

    pool2 = tf.nn.max_pool(
        value = relu2,
        ksize=[1,2,2,1],
        strides=[1,2,2,1],
        padding='SAME',
        name='pool2'
    ) #16 * 16 *64
    
            
    tf.summary.histogram('W_conv2', W_conv2)

In [None]:
with tf.name_scope('FC1'):
    W_fc1 = tf.get_variable(name="W_fc1", shape=[32*32*64,256], dtype=tf.float32, initializer= tf.contrib.layers.xavier_initializer())
    b_fc1 = tf.get_variable(name="b_fc1", shape=[256], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())

    pool_2_flat = tf.reshape(
        tensor=pool2,
        shape=(-1,32*32*64),
        name='pool_2-flat'
    )

    fc1 = tf.nn.elu(tf.matmul(pool_2_flat,W_fc1) + b_fc1)
    
    tf.summary.histogram('W_fc1', W_fc1)

In [None]:
with tf.name_scope('DROPOUT'):
    keep_prob = tf.placeholder(dtype=tf.float32, name="keep_prob")
    
    dropout = tf.nn.dropout(
        x=fc1,
        keep_prob=keep_prob
    )

In [None]:
with tf.name_scope('FC2'):
    W_fc2 = tf.get_variable(name="W_fc2", shape=[256, 17], dtype=tf.float32, initializer= tf.contrib.layers.xavier_initializer())
    b_fc2 = tf.get_variable(name="b_fc2", shape=[17], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())
    
    fc2 = tf.matmul(dropout, W_fc2) + b_fc2 #1024 * 17
    
    probabilities = tf.nn.sigmoid(fc2)
    
    tf.summary.histogram('W_fc2', W_fc2)

## Feed data

In [None]:
with tf.name_scope('CROSS_ENTROPY'):
    cross_entropy =  tf.nn.sigmoid_cross_entropy_with_logits(labels = y, logits= fc2, name='cross_entropy')

In [None]:
train_step = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(cross_entropy)

In [None]:
tf.add_to_collection(name='train_step', value=train_step)

In [None]:
f_beta_score = tf.Variable(0, name='f_beta_score', dtype=tf.float32)
test_accuracy =  tf.Variable(0, name='test_accuracy',dtype=tf.float32)
loss = tf.Variable(0, name='loss', dtype=tf.float32)

tf.summary.scalar('f_beta_score', f_beta_score)
tf.summary.scalar('test_accuracy', test_accuracy)
tf.summary.scalar('loss', loss)

In [None]:
import cv2

In [None]:
import datetime as dt
import os

In [None]:
from sklearn.metrics import fbeta_score

In [None]:
saver = tf.train.Saver()

In [None]:
%time sess.run(tf.global_variables_initializer())

In [None]:
NB_EPOCHS = 1
THRESHOLD = 0.2
DROPOuT = 0.1

In [None]:
folder = dt.datetime.now().strftime('%Y%m%d_%H%M%S')
if not os.path.exists(folder):
    os.mkdir(folder)
    
merged_summary = tf.summary.merge_all()
train_writer = tf.summary.FileWriter(os.path.join('./logs',folder), graph=sess.graph)

In [None]:
training_start_time = dt.datetime.now()
print('Starting training....')

for n in range(NB_EPOCHS):
    
    epoch_start_time = dt.datetime.now()
    avg_accuracy = 0
    
    for step,batch in enumerate(batches):
        batch_start_time = dt.datetime.now()
        
        images_names = df_train.loc[batch,:].image_name_w_ext
        images_names = images_names.apply(lambda x : os.path.join(TRAIN_PATH, x))        
        
        images_list = []
        
        for im in images_names:
            im = cv2.imread(im, cv2.IMREAD_COLOR)
            im = np.array(im, dtype=np.uint8)
            im[:, :, 0] = cv2.equalizeHist(im[:, :, 0])
            im[:, :, 1] = cv2.equalizeHist(im[:, :, 1])
            im[:, :, 2] = cv2.equalizeHist(im[:, :, 2])
            im = cv2.resize(im, dsize=(128,128),interpolation = cv2.INTER_CUBIC)
            im = im / 255
            images_list.append(im)
        
        images_array = np.asarray(images_list)        
        model_name = 'mymodel_' + dt.datetime.now().strftime('%Y%m%d')
        
        train_step.run(feed_dict={
                X: images_array, 
                y: df_train[ordered_labels].values[batch,:],
                keep_prob: DROPOuT})
        
        predictions = tf.round(probabilities)
      
        s = sess.run(merged_summary, feed_dict={
                X: images_array, 
                y: df_train[ordered_labels].values[batch,:],
                keep_prob:1})

        images_names = df_test.image_name_w_ext
        images_names = images_names.apply(lambda x : os.path.join(TRAIN_PATH, x))        
        
        images_list= []
        for im in images_names:
            im = cv2.imread(im, cv2.IMREAD_COLOR)
            im = np.array(im, dtype=np.uint8)
            im[:, :, 0] = cv2.equalizeHist(im[:, :, 0])
            im[:, :, 1] = cv2.equalizeHist(im[:, :, 1])
            im[:, :, 2] = cv2.equalizeHist(im[:, :, 2])
            im = cv2.resize(im, dsize=(128,128),interpolation = cv2.INTER_CUBIC)
            im = im / 255
            images_list.append(im)
        
        images_array = np.asarray(images_list)
        
        predictions_array = predictions.eval(feed_dict={
            X: images_array,            
            keep_prob: 1})
        
        predictions_array = predictions_array > THRESHOLD
        predictions_array = np.asarray(predictions_array, dtype=np.int8)
        
        test_accuracy.assign(100 * np.mean(np.amin(np.equal(predictions_array, df_test[ordered_labels].values), axis = 1))).eval()         
        f_beta_score.assign(fbeta_score(df_test[ordered_labels].values, predictions_array, 2, average='samples')).eval()
        loss.assign(tf.reduce_mean(tf.reduce_sum(cross_entropy, axis=1),axis=0)).eval(feed_dict={
            X: images_array,
            y: df_test[ordered_labels].values,
            keep_prob: 1})

        avg_accuracy += test_accuracy.eval() / NB_MINI_BATCH
        
        train_writer.add_summary(s, step)
        
        if step%25 == 0:
            print (predictions_array)
            print ("epoch: %d step: %d avg_accuracy: %.2f test_accuracy: %.2f fbeta_score: %.2f duration: %s nbTestImages: %d" % (n,step,avg_accuracy,test_accuracy.eval(),f_beta_score.eval(),str(dt.datetime.now()-batch_start_time),images_names.size))
            saver.save(sess, os.path.join('./ckpt', model_name), global_step=step)         
        
    saver.save(sess, os.path.join('./ckpt', 'mymodel_' + dt.datetime.now().strftime('%Y%m%d') + '_final.ckpt'))     
    print("epoch: %d avg_accuracy: %.2f batch_accuracy: %.2f fbeta_score: %.2f duration: %s" % (n, avg_accuracy,test_accuracy.eval(), f_beta_score.eval(),str(dt.datetime.now()-epoch_start_time)))

    
print('End of training: %s' % str(dt.datetime.now() - training_start_time))

The test size needs not to be too large otherwise ==> OOM exception.
Same thing for the batch size => we need to properly calibrate its dimension
Initially we started with eta=0.001 et relu function
After eta =0.0001 et elu function => weights were not learning Fast enough
switch dropout to 0.1
number of filters multiplies by 2 => nan of the wieghts in backpropagation => increase dropout from 0 to 0.2

In [None]:
sess.close()