<a href="https://colab.research.google.com/github/mnansary/pyF2O/blob/master/colab_gen_unet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# colab specific task
*   mount google drive
*   TPU check
*   Check TF version

In [1]:
!pip3 install tensorflow==1.13.1



In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [3]:

# tpu check
import os
import pprint
import tensorflow as tf
if 'COLAB_TPU_ADDR' not in os.environ:
  print('ERROR: Not connected to a TPU runtime; please see the first cell in this notebook for instructions!')
else:
  TPU_ADDRESS = 'grpc://' + os.environ['COLAB_TPU_ADDR']
  print ('TPU address is', TPU_ADDRESS)

  with tf.Session(TPU_ADDRESS) as session:
    devices = session.list_devices()
    
  print('TPU devices:')
  pprint.pprint(devices)



  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


TPU address is grpc://10.16.101.162:8470
TPU devices:
[_DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:CPU:0, CPU, -1, 7858302473141818004),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 12623185630599305096),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 4937265352828163642),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 5445691478193049863),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:2, TPU, 17179869184, 5494180979706028767),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:3, TPU, 17179869184, 10769982321566407128),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:4, TPU, 17179869184, 8558279051891193923),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:5, TPU, 17179869184, 15998393232634022417),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:6, TPU, 17179869184, 102934379519

# GCS specific task 
* **auth user**
* **save** and **upload** credentials to **tpu**
* set project information


In [0]:
# auth user for cloud SDK
from google.colab import auth
auth.authenticate_user()

In [5]:
# Save credentials
import json
SERVICE_KEY_PATH='/content/adc.json' # @param
# Upload credentials to TPU.
with tf.Session(TPU_ADDRESS) as sess:    
    with open(SERVICE_KEY_PATH, 'r') as f:
        auth_info = json.load(f)
        tf.contrib.cloud.configure_gcs(sess, credentials=auth_info)
# set service_account
JSON_DATA=json.load(open(SERVICE_KEY_PATH))
SERVICE_ACCOUNT=str(JSON_DATA['client_id']).split('.')[0]
print('Service Account:',SERVICE_ACCOUNT)


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Service Account: 32555940559


#### SET PROJECT INFORMATION 

In [6]:
PROJECT_ID    ='record-1106154'        # @param 
BUCKET        ='tfalldata'             # @param 
RECORD_IDEN  = 'TFRECORD'             # @param
# LIST FILES
TFRECORDS_DIR= 'gs://{}/{}/'.format(BUCKET,RECORD_IDEN)
#
# change TFRECORDS_DIR specific to structre
#
!gcloud config set project {PROJECT_ID}
!gsutil ls {TFRECORDS_DIR}

Updated property [core/project].
gs://tfalldata/TFRECORD/Eval/
gs://tfalldata/TFRECORD/Train/


# ConvNet3D Model Training

## Data Pipeline
* set **FLAGS** and **PARAMS**
* For TRAIN and EVAL data size check **info.json** in DataSet Dir
* define input functions

## FLAGS AND PARAMS

In [7]:


class FLAGS:
    IMAGE_DIM       = 128 # @param
    NB_CHANNELS     = 1   # @param
    BATCH_SIZE      = 8 # @param
    SHUFFLE_BUFFER  = 1000 # @param
    NB_CLASSES      = 17   # @param
    MIN_SEQ_LEN     = 6    # @param
    
NB_TRAIN_DATA       = 49920 # @param
NB_EVAL_DATA        = 3456  # @param
EPOCHS            = 10   # @param
NB_TOTAL_DATA= NB_TRAIN_DATA+ NB_EVAL_DATA
STEPS_PER_EPOCH     =  NB_TOTAL_DATA // FLAGS.BATCH_SIZE 
VALIDATION_STEPS    =  NB_EVAL_DATA  // FLAGS.BATCH_SIZE 

print('Steps Per epoch:',STEPS_PER_EPOCH)
print('Validation Steps:', VALIDATION_STEPS)



Steps Per epoch: 6672
Validation Steps: 432


## Data Input Functions
* get **bucket** 
* define **train_in_fn()** and **eval_in_fn()**
* **NOTE:AVOID USING PARTIALS** 
 

In [8]:
from google.cloud import storage
client = storage.Client(PROJECT_ID)
# get bucket from the project
bucket=client.get_bucket(BUCKET)
print(bucket)
def data_input_fn(FLAGS,mode): 
    
    def _parser(example):
        data  ={ 'feats':tf.io.FixedLenFeature((FLAGS.MIN_SEQ_LEN,FLAGS.IMAGE_DIM,FLAGS.IMAGE_DIM,FLAGS.NB_CHANNELS),tf.int64),
                 'label':tf.io.FixedLenFeature((),tf.int64)
        }    
        parsed_example=tf.io.parse_single_example(example,data)
        feats=tf.cast(parsed_example['feats'],tf.float32)/255.0
        feats=tf.reshape(feats,(FLAGS.MIN_SEQ_LEN,FLAGS.IMAGE_DIM,FLAGS.IMAGE_DIM,FLAGS.NB_CHANNELS))
        
        idx = tf.cast(parsed_example['label'], tf.int64)
        label=tf.one_hot(idx,FLAGS.NB_CLASSES,dtype=tf.int64)
        return feats,label

    dataset = tf.data.TFRecordDataset([os.path.join('gs://{}/'.format(BUCKET), f.name) for f in bucket.list_blobs(prefix='{}/{}'.format(RECORD_IDEN,mode))])
    dataset = dataset.shuffle(FLAGS.SHUFFLE_BUFFER,reshuffle_each_iteration=True)
    dataset = dataset.map(_parser)
    dataset = dataset.repeat()
    dataset = dataset.batch(FLAGS.BATCH_SIZE,drop_remainder=True)
    iterator = dataset.make_one_shot_iterator()
    X, y = iterator.get_next()
    sess = tf.Session()
    with sess.as_default():
      feats=X.eval()
      labels=y.eval()
      yield feats,labels

def train_in_fn():
  return data_input_fn(FLAGS,'Train')
  
  return data_input_fn(FLAGS,'Train')
def eval_in_fn():
  return data_input_fn(FLAGS,'Eval')



<Bucket: tfalldata>


## Change to git repo dir

In [9]:
cd /content/gdrive/My\ Drive/PROJECTS/HACT/pyACTRECOG/

/content/gdrive/My Drive/PROJECTS/HACT/pyACTRECOG


## COMPILE MODEL




In [10]:
import sys
sys.path.append('.')
from tensorflow.keras.losses import categorical_crossentropy
from coreLib.model import convNet3D

model=convNet3D(seq_len=FLAGS.MIN_SEQ_LEN,
                img_dim=FLAGS.IMAGE_DIM,
                nb_channels=FLAGS.NB_CHANNELS,
                nb_classes=FLAGS.NB_CLASSES)
model.summary()
model.compile(optimizer=tf.train.RMSPropOptimizer(1e-2), 
              loss=categorical_crossentropy,
              metrics=['accuracy'])


Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 6, 128, 128, 1)    0         
_________________________________________________________________
INITIAL_CONV3D (Conv3D)      (None, 6, 128, 128, 64)   1792      
_________________________________________________________________
INITIAL_POOL3D (MaxPooling3D (None, 6, 64, 64, 64)     0         
_________________________________________________________________
CONV3D_1_C1 (Conv3D)         (None, 6, 64, 64, 128)    221312    
_________________________________________________________________
POOL3D_1 (MaxPooling3D)      (None, 6, 32, 32, 128)    0         
_________________________________________________________________
CONV3D_2_C1 (Conv3D) 

## EXPORT TO TPU MODEL

In [11]:
TPU_WORKER = 'grpc://' + os.environ['COLAB_TPU_ADDR']
tf.logging.set_verbosity(tf.logging.INFO)
cluster_resolver=tf.contrib.cluster_resolver.TPUClusterResolver(TPU_WORKER)
strategy=tf.contrib.tpu.TPUDistributionStrategy(cluster_resolver)
model = tf.contrib.tpu.keras_to_tpu_model(model,strategy=strategy)


INFO:tensorflow:Querying Tensorflow master (grpc://10.16.101.162:8470) for TPU system metadata.
INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, -1, 7858302473141818004)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 12623185630599305096)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 4937265352828163642)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 5445691478193049863)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 17179869184, 5494180979706028767)
INFO:tensorflow:*** Available Device: _DeviceAttributes(

## Train
* define **checkpoints** and **callbacks** (tensorboard avoided)
* train model

In [12]:
MODEL_DIR=os.path.join(os.getcwd(),'info')
MODEL_NAME='convNet3D' # @param

history =  model.fit_generator(
              train_in_fn(),
              epochs= EPOCHS,
              steps_per_epoch= STEPS_PER_EPOCH,
              validation_data=eval_in_fn(),
              validation_steps= VALIDATION_STEPS,
              verbose=1
            )


Epoch 1/10
INFO:tensorflow:New input shapes; (re-)compiling: mode=train (# of cores 8), [TensorSpec(shape=(1,), dtype=tf.int32, name='core_id0'), TensorSpec(shape=(1, 6, 128, 128, 1), dtype=tf.float32, name='input_1_10'), TensorSpec(shape=(1, 17), dtype=tf.float32, name='DENSE_CLASS_target_30')]
INFO:tensorflow:Overriding default placeholder.
INFO:tensorflow:Remapping placeholder for input_1
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use tf.cast instead.
INFO:tensorflow:Started compiling
INFO:tensorflow:Finished compiling. Time elapsed: 28.62976050376892 secs
INFO:tensorflow:Setting weights on TPU model.


ResourceExhaustedError: ignored

## Save Model



In [0]:
model.save_weights(os.path.join(MODEL_DIR,'{}.h5'.format(MODEL_NAME)))

#### Plot Training Histoty

In [0]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('LOSS History')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.savefig(os.path.join(MODEL_DIR,'{}_history.png'.format(model_name)))