In [1]:
import tensorflow as tf
#tf.enable_eager_execution()
#tf.VERSION
import pandas as pd
import numpy as np
import cv2
from PIL import Image

  from ._conv import register_converters as _register_converters


In [2]:
import seaborn as sns
import matplotlib.pyplot as plt 
from glob import glob
%matplotlib inline

In [3]:
train = pd.read_csv('/home/sneha/personal_stuff/dl_begin/dl_beginner/meta-data/train.csv')
test = pd.read_csv('/home/sneha/personal_stuff/dl_begin/dl_beginner/meta-data/test.csv')
TRAIN_PATH = '/home/sneha/personal_stuff/dl_begin/dl_beginner/train/' 
TEST_PATH = '/home/sneha/personal_stuff/dl_begin/dl_beginner/test/'

In [4]:
train.head()

Unnamed: 0,Image_id,Animal
0,Img-1.jpg,hippopotamus
1,Img-2.jpg,squirrel
2,Img-3.jpg,grizzly+bear
3,Img-4.jpg,ox
4,Img-5.jpg,german+shepherd


In [5]:
print('There are {} images in the train dataset.'.format(train.shape[0]))
print('There are {} images in the test datasets.'.format(test.shape[0]))

There are 13000 images in the train dataset.
There are 6000 images in the test datasets.


In [6]:
print('The dataset contain images of the following animals:\n\n', train.Animal.unique())

The dataset contain images of the following animals:

 ['hippopotamus' 'squirrel' 'grizzly+bear' 'ox' 'german+shepherd' 'buffalo'
 'otter' 'bobcat' 'wolf' 'persian+cat' 'collie' 'antelope' 'seal'
 'dalmatian' 'siamese+cat' 'moose' 'horse' 'killer+whale' 'mouse' 'walrus'
 'beaver' 'rhinoceros' 'chimpanzee' 'weasel' 'spider+monkey' 'raccoon'
 'rat' 'chihuahua' 'mole' 'bat']


In [7]:
def preprocess_image(image):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize_images(image, [224, 224])
    image /= 255.0  # normalize to [0,1] range
    return image

def load_and_preprocess_image(path):
    image = tf.read_file(path)
    return preprocess_image(image)

In [8]:
train.Image_id = TRAIN_PATH+train.Image_id

In [43]:
test.Image_id = TEST_PATH+test.Image_id

path_ds = tf.data.Dataset.from_tensor_slices(train.Image_id)

print('shape: ', repr(path_ds.output_shapes))
print('type: ', path_ds.output_types)
print()
print(path_ds)

image_ds = path_ds.map(load_and_preprocess_image, num_parallel_calls=8)

label_to_index = dict((name, index) for index,name in enumerate(train.Animal.unique()))
print(label_to_index)

label_ds = tf.data.Dataset.from_tensor_slices(tf.cast([label_to_index[i] for i in train.Animal], tf.int64))

for label in label_ds.take(10):
    print(train.Animal.iloc[label.numpy()])

image_label_ds = tf.data.Dataset.zip((image_ds, label_ds))

print('image shape: ', image_label_ds.output_shapes[0])
print('label shape: ', image_label_ds.output_shapes[1])
print('types: ', image_label_ds.output_types)
print()
print(image_label_ds)

In [9]:
BATCH_SIZE = 32
num_epochs = 10

def train_input_fn():
    path_ds = tf.data.Dataset.from_tensor_slices(train.Image_id)
    image_ds = path_ds.map(load_and_preprocess_image, num_parallel_calls=8)

    label_to_index = dict((name, index) for index,name in enumerate(train.Animal.unique()))
    label_ds = tf.data.Dataset.from_tensor_slices(tf.cast([label_to_index[i] for i in train.Animal], tf.int64))
    image_label_ds = tf.data.Dataset.zip((image_ds, label_ds))

    # Setting a shuffle buffer size as large as the dataset ensures that the data is
    # completely shuffled.
    ds = image_label_ds.shuffle(buffer_size=train.shape[0])
    ds = ds.repeat(num_epochs)
    ds = ds.batch(BATCH_SIZE)
    # `prefetch` lets the dataset fetch batches, in the background while the model is training.
    ds = ds.prefetch(buffer_size=BATCH_SIZE)
    return ds.make_one_shot_iterator().get_next()    

In [45]:
BATCH_SIZE = 32
num_epochs = 10

def test_input_fn():
    path_ds = tf.data.Dataset.from_tensor_slices(test.Image_id)
    image_ds = path_ds.map(load_and_preprocess_image, num_parallel_calls=8)
    ds=image_ds
    ds = ds.batch(BATCH_SIZE)
    # `prefetch` lets the dataset fetch batches, in the background while the model is training.
    ds = ds.prefetch(buffer_size=BATCH_SIZE)
    return ds.make_one_shot_iterator().get_next()    

In [11]:
def cnn_model_fn(features, labels, mode):
    """Model function for CNN."""
    # Input Layer
    input_layer = tf.reshape(features, [-1, 224, 224, 3])

    # Convolutional Layer #1
    conv1 = tf.layers.conv2d(
      inputs=input_layer,
      filters=32,
      kernel_size=[3, 3],
      padding="same",
      activation=tf.nn.relu)
    
    conv2 = tf.layers.conv2d(
      inputs=conv1,
      filters=64,
      kernel_size=[3, 3],
      padding="same",
      activation=tf.nn.relu)
    
    conv3 = tf.layers.conv2d(
      inputs=conv2,
      filters=64,
      kernel_size=[3, 3],
      padding="same",
      activation=tf.nn.relu)

    # Pooling Layer #1
    pool1 = tf.layers.max_pooling2d(inputs=conv3, pool_size=[2, 2], strides=2)
    
    dropout1 = tf.layers.dropout(
      inputs=pool1, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)

    conv4 = tf.layers.conv2d(
      inputs=dropout1,
      filters=128,
      kernel_size=[3, 3],
      padding="same",
      activation=tf.nn.relu)
    
    conv5 = tf.layers.conv2d(
      inputs=conv4,
      filters=128,
      kernel_size=[3, 3],
      padding="same",
      activation=tf.nn.relu)
    
    conv6 = tf.layers.conv2d(
      inputs=conv5,
      filters=128,
      kernel_size=[3, 3],
      padding="same",
      activation=tf.nn.relu)

    pool2 = tf.layers.max_pooling2d(inputs=conv6, pool_size=[2, 2], strides=2)
    print()

    # Dense Layer
    pool2_flat = tf.reshape(pool2, [-1,56*56*128])
    dropout2_flat = tf.layers.dropout(
      inputs=pool2_flat, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)

    dense = tf.layers.dense(inputs=dropout2_flat, units=100, activation=tf.nn.relu)
    dropout = tf.layers.dropout(
      inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)

    # Logits Layer
    logits = tf.layers.dense(inputs=dropout, units=30)

    predictions = {
      # Generate predictions (for PREDICT and EVAL mode)
      "classes": tf.argmax(input=logits, axis=1),
      # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
      # `logging_hook`.
      "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
    }

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # Calculate Loss (for both TRAIN and EVAL modes)
    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)

    # Configure the Training Op (for TRAIN mode)
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
        train_op = optimizer.minimize(
            loss=loss,
            global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

    # Add evaluation metrics (for EVAL mode)
    eval_metric_ops = {"accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions["classes"])}
    tf.summary.scalar('accuracy', eval_metric_ops['accuracy'])
    return tf.estimator.EstimatorSpec(
      mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

In [12]:
import os, datetime
modeldir = os.path.join(os.getcwd(), datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
os.makedirs(modeldir)

In [13]:
# Create the Estimator
animal_classifier = tf.estimator.Estimator(
    model_fn=cnn_model_fn, model_dir=modeldir)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/home/sneha/personal_stuff/dl_begin/2019-01-27_23-32-10', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f5a67e730b8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [14]:
# Set up logging for predictions
tensors_to_log = {"probabilities": "softmax_tensor","loss" : "loss"}

logging_hook = tf.train.LoggingTensorHook(
    tensors=tensors_to_log, every_n_iter=5)

In [15]:
# train one step and display the probabilties
animal_classifier.train(
    input_fn=train_input_fn,
    steps=1,
    hooks=[logging_hook])

INFO:tensorflow:Calling model_fn.

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /home/sneha/personal_stuff/dl_begin/2019-01-27_23-32-10/model.ckpt.
INFO:tensorflow:loss = b'\n\x0b\n\x04loss\x15\xf1\xe3Y@', probabilities = [[0.03282968 0.03134601 0.03300159 0.03403704 0.03221723 0.03502687
  0.03392608 0.03190167 0.03563409 0.03275656 0.0345325  0.03535771
  0.03323707 0.03380083 0.03445099 0.03452428 0.03334696 0.03428003
  0.03464954 0.03110941 0.03107699 0.03267748 0.03302389 0.0318406
  0.03389145 0.032053   0.03334408 0.03221411 0.03343787 0.03447441]
 [0.03109831 0.03265173 0.03288491 0.03262954 0.03241358 0.03616821
  0.03405002 0.0348274  0.03329391 0.0341601  0.03242457 0.03563067
  0.03374429 0.03557314 0.03202108 0.0347045  0.03324044 0.03298395
  0.03298784 0.03269177 0.0321

INFO:tensorflow:loss = 3.4045374, step = 1
INFO:tensorflow:Saving checkpoints for 1 into /home/sneha/personal_stuff/dl_begin/2019-01-27_23-32-10/model.ckpt.
INFO:tensorflow:Loss for final step: 3.4045374.


<tensorflow.python.estimator.estimator.Estimator at 0x7f5a67b21860>

In [100]:
results = animal_classifier.predict(
    input_fn=test_input_fn,
    predict_keys=None,
    hooks=None,
    checkpoint_path=None,
    yield_single_examples=True)
print('evaluate done')
#df = pd.DataFrame(np.zeros([1,30]))
appended_data = []

evaluate done


In [101]:
for result in results:
    #print('result: {}'.format(result['probabilities']))
    df2 = pd.DataFrame([result['probabilities']])
    #df.append(df2,ignore_index=True)
    appended_data.append(df2)
appended_data = pd.concat(appended_data, axis=0)
# write DataFrame to an excel sheet 
appended_data.to_excel('appended.xlsx')

INFO:tensorflow:Calling model_fn.

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /home/sneha/personal_stuff/dl_begin/2019-01-27_23-32-10/model.ckpt-1
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


KeyboardInterrupt: 

In [102]:
appended_data = pd.concat(appended_data, axis=0)
# write DataFrame to an excel sheet 
appended_data.to_excel('appended.xlsx')

In [103]:
appended_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
0,0.033262,0.032823,0.033145,0.033233,0.033209,0.033913,0.033254,0.033426,0.033702,0.032676,...,0.032799,0.033343,0.032273,0.032708,0.032900,0.033168,0.033620,0.033144,0.033960,0.033615
0,0.033013,0.032734,0.033213,0.033315,0.033145,0.033970,0.033404,0.033361,0.033364,0.032927,...,0.032467,0.033281,0.032655,0.032496,0.032889,0.033744,0.033812,0.033280,0.034002,0.033667
0,0.032907,0.033005,0.033122,0.033301,0.033145,0.033925,0.033033,0.033193,0.033633,0.032826,...,0.032758,0.033479,0.032593,0.032654,0.032883,0.033431,0.033545,0.033342,0.034032,0.033689
0,0.032985,0.032598,0.033162,0.033320,0.033124,0.033880,0.033139,0.033619,0.033772,0.032825,...,0.032703,0.033665,0.032736,0.032393,0.032971,0.033410,0.033707,0.033168,0.033798,0.033470
0,0.032996,0.032830,0.033143,0.033293,0.033244,0.033795,0.033262,0.033338,0.033579,0.032848,...,0.032793,0.033412,0.032796,0.032894,0.032852,0.033465,0.033699,0.033268,0.034034,0.033587
0,0.032928,0.032766,0.032983,0.033501,0.033221,0.034060,0.033322,0.033295,0.033675,0.033070,...,0.032488,0.032974,0.032532,0.032193,0.032978,0.033566,0.033556,0.033072,0.034136,0.033647
0,0.032964,0.032489,0.032678,0.033230,0.033234,0.034558,0.033250,0.032991,0.033736,0.032875,...,0.032320,0.033269,0.032247,0.032545,0.032726,0.033869,0.033384,0.033059,0.034323,0.033889
0,0.033086,0.032923,0.033194,0.033511,0.032937,0.033788,0.033486,0.033513,0.033488,0.033024,...,0.032689,0.033398,0.032815,0.032945,0.033061,0.033794,0.033285,0.033424,0.033861,0.033353
0,0.033161,0.032528,0.033177,0.033398,0.032914,0.033946,0.033987,0.033284,0.033322,0.033231,...,0.032600,0.033327,0.032658,0.032872,0.033149,0.033420,0.033400,0.033513,0.033924,0.033613
0,0.033061,0.032829,0.033143,0.033170,0.033299,0.033925,0.033158,0.032985,0.033841,0.032852,...,0.032856,0.033836,0.032516,0.032441,0.032666,0.033418,0.033619,0.033298,0.033935,0.033341
