<a href="https://colab.research.google.com/github/mnansary/pyF2O/blob/master/colab_gen_unet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# colab specific task
*   mount google drive
*   TPU check
*   Check TF version
*   Change to git repo 

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:

# tpu check
import os
import pprint
import tensorflow as tf
if 'COLAB_TPU_ADDR' not in os.environ:
  print('ERROR: Not connected to a TPU runtime; please see the first cell in this notebook for instructions!')
else:
  TPU_ADDRESS = 'grpc://' + os.environ['COLAB_TPU_ADDR']
  print ('TPU address is', TPU_ADDRESS)

  with tf.Session(TPU_ADDRESS) as session:
    devices = session.list_devices()
    
  print('TPU devices:')
  pprint.pprint(devices)

tf.__version__

TPU address is grpc://10.53.190.234:8470
TPU devices:
[_DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:CPU:0, CPU, -1, 6228487531686393789),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 7530780843371562102),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 18156448597659050412),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 15532005598923809813),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:2, TPU, 17179869184, 6061655270171357145),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:3, TPU, 17179869184, 17848330891640839161),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:4, TPU, 17179869184, 11052733503231459291),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:5, TPU, 17179869184, 6169043040051159501),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:6, TPU, 17179869184, 15684053147

'1.15.0'

In [3]:
cd /content/gdrive/My\ Drive/PROJECTS/HACT/pyACTRECOG/

/content/gdrive/My Drive/PROJECTS/HACT/pyACTRECOG


# GCS specific task 
* **auth user**
* **save** and **upload** credentials to **tpu**
* set project information

In [0]:
# auth user for cloud SDK
from google.colab import auth
auth.authenticate_user()

In [5]:
# Save credentials
import json
SERVICE_KEY_PATH='/content/adc.json' # @param
# Upload credentials to TPU.
with tf.Session(TPU_ADDRESS) as sess:    
    with open(SERVICE_KEY_PATH, 'r') as f:
        auth_info = json.load(f)
        tf.contrib.cloud.configure_gcs(sess, credentials=auth_info)
# set service_account
JSON_DATA=json.load(open(SERVICE_KEY_PATH))
SERVICE_ACCOUNT=str(JSON_DATA['client_id']).split('.')[0]
print('Service Account:',SERVICE_ACCOUNT)

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Service Account: 32555940559


#### SET PROJECT INFORMATION 

In [6]:
PROJECT_ID    ='record-1106154'     # @param 
BUCKET        ='tfalldata'          # @param 
TFIDEN        ='TFRECORD'            # @param
# LIST FILES
TFRECORDS_DIR= 'gs://{}/{}/'.format(BUCKET,TFIDEN)
!gcloud config set project {PROJECT_ID}
!gsutil ls {TFRECORDS_DIR}


Updated property [core/project].
gs://tfalldata/TFRECORD/Eval/
gs://tfalldata/TFRECORD/Train/


# ConvNet3D Model Training

#### Data 
* Set **FLAGS** and **PARAMS**
* Create **Train** and **Eval** Data Generator


In [0]:
import sys
sys.path.append('.')
import numpy as np 

class FLAGS:
    BATCH_SIZE      = 32  #@param
    IMAGE_DIM       = 64   #@param
    NB_CHANNELS     = 3    #@param
    MIN_SEQ_LEN     = 6    #@param
    NB_CLASSES      = 17   #@param
    SHUFFLE_BUFFER  = 100 #@param

MODEL_DIR           = '/content/gdrive/My Drive/PROJECTS/HACT/Model/' # @param
MODEL_NAME          = 'convNet3D' # @param
EPOCHS              =  250           # @param
NB_TRAIN_DATA       =  49920       # @param
NB_EVAL_DATA        =  3456        # @param
NB_TOTAL_DATA       =  NB_TRAIN_DATA + NB_EVAL_DATA 
STEPS_PER_EPOCH     =  NB_TOTAL_DATA // FLAGS.BATCH_SIZE 
VALIDATION_STEPS    =  NB_EVAL_DATA  // FLAGS.BATCH_SIZE 
CHECK_DATA          =  False
LEARNING_RATE       = 1e-4 #@param


#### Data Generator

In [8]:
from google.cloud import storage
from functools import partial

client = storage.Client(PROJECT_ID)
# get bucket from the project
bucket=client.get_bucket(BUCKET)
print(bucket)

def data_input_fn(FLAGS,mode): 
    
    def _parser(example):
      data  ={ 'feats':tf.io.FixedLenFeature((FLAGS.MIN_SEQ_LEN,FLAGS.IMAGE_DIM,FLAGS.IMAGE_DIM,FLAGS.NB_CHANNELS),tf.float32),
                'label':tf.io.FixedLenFeature((),tf.int64)
      }    
      
      parsed_example=tf.io.parse_single_example(example,data)
      
      feats=tf.cast(parsed_example['feats'],tf.float32)
      feats=tf.reshape(feats,(FLAGS.MIN_SEQ_LEN,FLAGS.IMAGE_DIM,FLAGS.IMAGE_DIM,FLAGS.NB_CHANNELS))
      
      idx = tf.cast(parsed_example['label'], tf.int64)
      label=tf.one_hot(idx,FLAGS.NB_CLASSES,dtype=tf.int64)
      
      return feats,label

    dataset = tf.data.TFRecordDataset([os.path.join('gs://{}/'.format(BUCKET), f.name) for f in bucket.list_blobs(prefix='{}/{}'.format(TFIDEN,mode))])
    dataset = dataset.cache()
    dataset = dataset.map(_parser)
    dataset = dataset.shuffle(FLAGS.SHUFFLE_BUFFER,reshuffle_each_iteration=True)
    dataset = dataset.repeat()
    dataset = dataset.batch(FLAGS.BATCH_SIZE,drop_remainder=True)
    dataset = dataset.prefetch(-1) # autotune    
    return dataset

def train_in_fn():
    '''
    dataset=data_input_fn(FLAGS,'Train')
    iterator = dataset.make_one_shot_iterator()
    X, y = iterator.get_next()
    while True:
        with tf.Session() as sess:
            feats=X.eval()
            label=y.eval()
        yield feats,label
    '''
    return data_input_fn(FLAGS,'Train')    

def eval_in_fn():
    '''
    dataset=data_input_fn(FLAGS,'Eval')
    iterator = dataset.make_one_shot_iterator()
    X, y = iterator.get_next()
    while True:
        with tf.Session() as sess:
            feats=X.eval()
            label=y.eval()
        yield feats,label
    '''    
    return data_input_fn(FLAGS,'Eval')


<Bucket: tfalldata>


#### COMPILE MODEL




In [9]:
import sys
sys.path.append('.')
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import Adam
from coreLib.model import convNet3D

tf.logging.set_verbosity(tf.logging.INFO)
'''
TPU_WORKER = 'grpc://' + os.environ['COLAB_TPU_ADDR']
tf.logging.set_verbosity(tf.logging.INFO)
cluster_resolver=tf.contrib.cluster_resolver.TPUClusterResolver(TPU_WORKER)
strategy=tf.contrib.tpu.TPUDistributionStrategy(cluster_resolver)
#model.load_weights=os.path.join(MODEL_DIR,'{}.h5'.format(MODEL_NAME))
#model = tf.contrib.tpu.keras_to_tpu_model(model,strategy=strategy)
'''
resolver = tf.contrib.cluster_resolver.TPUClusterResolver('grpc://' + os.environ['COLAB_TPU_ADDR'])
tf.contrib.distribute.initialize_tpu_system(resolver)
strategy = tf.contrib.distribute.TPUStrategy(resolver)
with strategy.scope():
  model=convNet3D(seq_len=FLAGS.MIN_SEQ_LEN,
                  img_dim=FLAGS.IMAGE_DIM,
                  nb_channels=FLAGS.NB_CHANNELS,
                  nb_classes=FLAGS.NB_CLASSES)
  model.summary()
  model.compile(optimizer=Adam(),  #tf.train.AdamOptimizer(learning_rate=LEARNING_RATE), 
                loss=categorical_crossentropy,
                metrics=['accuracy'])



INFO:tensorflow:Initializing the TPU system: 10.53.190.234:8470
INFO:tensorflow:Finished initializing TPU system.
INFO:tensorflow:Querying Tensorflow master (grpc://10.53.190.234:8470) for TPU system metadata.
INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, -1, 6228487531686393789)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 18156448597659050412)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 15532005598923809813)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 17179869184, 6061655270171357145)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/de

#### Train


In [0]:
from tensorflow.keras.callbacks import ModelCheckpoint
checkpoint = ModelCheckpoint(filepath=os.path.join(MODEL_DIR,'{}.h5'.format(MODEL_NAME)), verbose=1, save_best_only=True)
history=model.fit(train_in_fn(), #_generator
                    epochs= EPOCHS,
                    steps_per_epoch=STEPS_PER_EPOCH, 
                    validation_data=eval_in_fn(),
                    validation_steps=VALIDATION_STEPS,
                    callbacks=[checkpoint],
                    verbose=1)

Epoch 1/250

#### Save Model



In [0]:
model.save_weights(os.path.join(MODEL_DIR,'{}_final.h5'.format(MODEL_NAME)))

#### Plot Training Histoty

In [0]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('LOSS History')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.savefig(os.path.join(MODEL_DIR,'{}_history.png'.format(MODEL_NAME)))