## Install TensorFlow 2.0 (GPU variant) W&B

In [0]:
!pip install -q tensorflow-gpu
!pip install wandb

## Imports and installation verification

In [0]:
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from wandb.keras import WandbCallback
import tensorflow as tf
import numpy as np
import wandb
import time

In [2]:
tf.__version__

'2.0.0'

## W&B setup

In [0]:
!wandb login

In [0]:
wandb.init()
config = wandb.config

config.batch_size = 128
config.epochs=5

## Data collection and preprocessing

In [0]:
fashion_mnist = mnist = tf.keras.datasets.fashion_mnist

(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

In [0]:
x_train  = x_train.reshape((60000, 28, 28, 1))
x_test = x_test.reshape((10000, 28, 28, 1))

In [7]:
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((60000, 28, 28, 1), (60000,), (10000, 28, 28, 1), (10000,))

## Model building and summarization

In [0]:
def create_model():
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
        MaxPooling2D((2,2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2,2)),
        Conv2D(64, (3, 3), activation='relu')
    ])
    
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dense(10, activation='softmax'))

    model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
    
    return model

In [9]:
model = create_model()
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 11, 11, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 64)          0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 3, 3, 64)          36928     
_________________________________________________________________
flatten (Flatten)            (None, 576)               0         
_________________________________________________________________
dense (Dense)                (None, 64)                3

## Model training profiling

In [10]:
%%prun
model.fit(x_train, y_train, validation_data=(x_test, y_test),
          epochs=config.epochs, batch_size=config.batch_size, 
          callbacks=[WandbCallback()], verbose=1)

Train on 60000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
 

Here are the major logs from the profiling:

```

         4766351 function calls (4456966 primitive calls) in 31.667 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     2755   16.461    0.006   16.461    0.006 {built-in method _pywrap_tensorflow_internal.TFE_Py_Execute}
     8396    8.319    0.001    8.319    0.001 {method '_numpy' of 'tensorflow.python.framework.ops.EagerTensor' objects}
    10452    0.724    0.000    0.724    0.000 {method 'acquire' of '_thread.lock' objects}
     3268    0.345    0.000    0.345    0.000 socket.py:333(send)
598631/598365    0.209    0.000    0.425    0.000 {built-in method builtins.isinstance}
      721    0.193    0.000    0.204    0.000 tf_stack.py:141(extract_stack)
   116188    0.126    0.000    0.233    0.000 abc.py:180(__instancecheck__)
    22054    0.122    0.000    0.221    0.000 nest.py:117(_sequence_like)
87712/21928    0.117    0.000    0.363    0.000 type_spec.py:311(__make_cmp_key)
    23516    0.099    0.000    0.099    0.000 {built-in method numpy.array}
    14829    0.099    0.000    0.113    0.000 {built-in method _pywrap_tensorflow_internal.Flatten}
22054/5593    0.093    0.000    0.589    0.000 nest.py:384(_packed_nest_with_indices)
      369    0.089    0.000    0.089    0.000 {built-in method _pywrap_tensorflow_internal.TFE_Py_FastPathExecute}
   187793    0.086    0.000    0.086    0.000 _weakrefset.py:70(__contains__)
     2741    0.085    0.000   17.175    0.006 function.py:1143(_call_flat)
     2345    0.082    0.000    0.930    0.000 generic_utils.py:351(update)
 ```

CPU and Memory profilings can be found [here](https://app.wandb.ai/sayakpaul/uncategorized/runs/xukoozma). TensorFlow 2.0 is indeed faster because of the fact that Eager Execution happens here by default. 