<a href="https://colab.research.google.com/github/mnansary/pyHOCR/blob/master/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
cd /content/gdrive/My\ Drive/OCR/pyHOCR/

/content/gdrive/My Drive/OCR/pyHOCR


In [3]:
!pip3 install tensorflow==1.13.1



# colab specific task

```
# mount google drive 
# change working directory to git repo
# update repo (if needed)
# TPU check
```

### TPU check

In [4]:
import os
import pprint
import tensorflow as tf

if 'COLAB_TPU_ADDR' not in os.environ:
  print('ERROR: Not connected to a TPU runtime; please see the first cell in this notebook for instructions!')
else:
  tpu_address = 'grpc://' + os.environ['COLAB_TPU_ADDR']
  print ('TPU address is', tpu_address)

  with tf.Session(tpu_address) as session:
    devices = session.list_devices()
    
  print('TPU devices:')
  pprint.pprint(devices)

tf.__version__

TPU address is grpc://10.123.182.226:8470
TPU devices:
[_DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:CPU:0, CPU, -1, 13353127054585782585),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 3960466896851279111),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 10083872595087331646),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 6795121186322065703),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:2, TPU, 17179869184, 486304870448692684),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:3, TPU, 17179869184, 17308647051292267530),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:4, TPU, 17179869184, 12042201856240171132),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:5, TPU, 17179869184, 2814125367760688655),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:6, TPU, 17179869184, 16974759967

'1.13.1'

# DenseNet Training


### Load Dataset

In [5]:
"""
@author: MD.Nazmuddoha Ansary
"""
from __future__ import print_function
import numpy as np
from DenseNet.utils import readh5
# dataset path
dset_dir=os.path.join(os.getcwd(),'DataSet')
# h5 paths
Xt_p=os.path.join(dset_dir,'Xt.h5')
Yt_p=os.path.join(dset_dir,'Yt.h5')
Xv_p=os.path.join(dset_dir,'Xv.h5')
Yv_p=os.path.join(dset_dir,'Yv.h5')

# train and validation data
Xt=readh5(Xt_p)
Yt=readh5(Yt_p)
Xv=readh5(Xv_p)
Yv=readh5(Yv_p)

# data set shapes
print('X-Train:{}'.format(Xt.shape))
print('Y-Train:{}'.format(Yt.shape))
print('X-Valid:{}'.format(Xv.shape))
print('Y-Valid:{}'.format(Yv.shape))

Using TensorFlow backend.


X-Train:(9600, 32, 32, 1)
Y-Train:(9600, 50)
X-Valid:(2400, 32, 32, 1)
Y-Valid:(2400, 50)


### Build Model


*   The Original Keras Model needs to converted to a TF model
*   Compile with optimizers and loss function from TF 



In [6]:
from DenseNet.models import denseNet
from tensorflow.keras.optimizers import Adam

model,model_name=denseNet()
                  
model.summary()
print(model_name)
# compile
model.compile(optimizer=Adam(), loss=tf.keras.losses.categorical_crossentropy, metrics=['accuracy'])

Instructions for updating:
Colocations handled automatically by placer.
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 32, 32, 1)    0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 32, 32, 24)   216         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_v1 (BatchNo (None, 32, 32, 24)   96          conv2d[0][0]                     
__________________________________________________________________________________________________
batch_normalization_v1_1 (Batch (None, 32, 32, 24)   96          batch_normalization_v1[0][0]     
_____________________________________

### Convert Keras Model to TPU Model

In [7]:
# This address identifies the TPU we'll use when configuring TensorFlow.
TPU_WORKER = 'grpc://' + os.environ['COLAB_TPU_ADDR']
tf.logging.set_verbosity(tf.logging.INFO)

model = tf.contrib.tpu.keras_to_tpu_model(model,strategy=tf.contrib.tpu.TPUDistributionStrategy(tf.contrib.cluster_resolver.TPUClusterResolver(TPU_WORKER)))


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

INFO:tensorflow:Querying Tensorflow master (grpc://10.123.182.226:8470) for TPU system metadata.
INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, -1, 13353127054585782585)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 3960466896851279111)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 10083872595087331646)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TP

### Training Parameters and Callbacks

In [8]:
from tensorflow.keras.callbacks import ModelCheckpoint
c_path=os.path.join(os.getcwd(),'DenseNet','model_weights')
import h5py
print(c_path)

checkpoint = ModelCheckpoint(filepath=os.path.join(c_path,'{}.h5'.format(model_name)), verbose=1, save_best_only=True)

epochs = 150
batch_size =30

/content/gdrive/My Drive/OCR/pyHOCR/DenseNet/model_weights


### Fit the data

In [9]:
history=model.fit(Xt,Yt,validation_data=(Xv,Yv),epochs=epochs,callbacks=[checkpoint], batch_size=batch_size, verbose=1)


Train on 9600 samples, validate on 2400 samples
Epoch 1/150
INFO:tensorflow:New input shapes; (re-)compiling: mode=train (# of cores 8), [TensorSpec(shape=(3,), dtype=tf.int32, name='core_id0'), TensorSpec(shape=(3, 32, 32, 1), dtype=tf.float32, name='input_1_10'), TensorSpec(shape=(3, 50), dtype=tf.float32, name='dense_target_30')]
INFO:tensorflow:Overriding default placeholder.
INFO:tensorflow:Cloning Adam {'lr': 0.0010000000474974513, 'beta_1': 0.8999999761581421, 'beta_2': 0.9990000128746033, 'decay': 0.0, 'epsilon': 1e-07, 'amsgrad': False}
INFO:tensorflow:Remapping placeholder for input_1
Instructions for updating:
Use tf.cast instead.
INFO:tensorflow:KerasCrossShard: <tensorflow.python.keras.optimizers.Adam object at 0x7f24be4b4080> []
Instructions for updating:
Use tf.cast instead.
INFO:tensorflow:Started compiling
INFO:tensorflow:Finished compiling. Time elapsed: 58.30991506576538 secs
INFO:tensorflow:Setting weights on TPU model.
INFO:tensorflow:CPU -> TPU lr: 0.0010000000474

### Load Best Model For testing

# Testing the model

In [12]:
from sklearn import metrics
from keras.models import load_model
from termcolor import colored

# load model with best val_loss
model,model_name=denseNet()
model.load_weights(os.path.join(c_path,'{}.h5'.format(model_name)))

# load tesing data
# h5 paths
Xtt_p=os.path.join(dset_dir,'Xtt.h5')
Ytt_p=os.path.join(dset_dir,'Ytt.h5')

# train and validation data
Xtt=readh5(Xtt_p)
Ytt=readh5(Ytt_p)

# data set shapes
print('X-Test:{}'.format(Xtt.shape))
print('Y-Test:{}'.format(Ytt.shape))



X-Test:(3000, 32, 32, 1)
Y-Test:(3000, 50)


### Get predictions and Accuracy

In [13]:
print(colored('# Generating Predictions','blue'))
predictions = [np.argmax(model.predict(np.expand_dims(tensor,axis=0))) for tensor in Xtt]

print(colored('# Getting Ground Truth','blue'))	    
ground_truth = [np.argmax(truth_value) for truth_value in Ytt]

print(colored('# Calculating Accuracy','blue'))	    

prediction_accuracy = 100* metrics.f1_score(ground_truth,predictions, average = 'micro')	   
print(colored('Test data Prediction Accuracy [F1 accuracy]: {}'.format(prediction_accuracy),'green'))




[34m# Generating Predictions[0m
[34m# Getting Ground Truth[0m
[34m# Calculating Accuracy[0m
[32mTest data Prediction Accuracy [F1 accuracy]: 98.36666666666667[0m
