In [0]:
import shutil
from termcolor import colored
from google.colab import auth
from googleapiclient.discovery import build
import io , requests, os
import sys
auth.authenticate_user()
drive_service = build('drive', 'v3')

def get_parent_folder(folder_name):
  page_token = None
  folder_array = []
  query = "name='%s' and mimeType='application/vnd.google-apps.folder'" % folder_name
  while True:
      response = drive_service.files().list(q=query,
                                          spaces='drive',
                                          fields='nextPageToken, files(id, name)',
                                          pageToken=page_token).execute()
      for file in response.get('files', []):
          # Process change
          #print (file.get('name'), file.get('id'))
          folder_array.append({"name" : file.get('name'), "id" : file.get('id')})
      page_token = response.get('nextPageToken', None)
      if page_token is None:
          break
  return folder_array
#get_parent_folder('Colab Notebooks')

def get_files_from_parent(parent_id):
  page_token = None
  folder_array = dict()
  query = "'%s' in parents" % parent_id
  while True:
      response = drive_service.files().list(q=query,
                                          spaces='drive',
                                          fields='nextPageToken, files(id, name)',
                                          pageToken=page_token).execute()
      for file in response.get('files', []):
          # Process change
          folder_array.update({file.get('name'):file.get('id')})
      page_token = response.get('nextPageToken', None)
      if page_token is None:
          break
  return folder_array
#get_files_from_parent('1ANJZJ0XnUFGz49L5my34aps5sGECq7o3')

def get_file_buffer(file_id, verbose=0):
  from googleapiclient.http import MediaIoBaseDownload
  request = drive_service.files().get_media(fileId=file_id)
  downloaded = io.BytesIO()
  downloader = MediaIoBaseDownload(downloaded, request)
  done = False
  while done is False:
    # _ is a placeholder for a progress object that we ignore.
    # (Our file is small, so we skip reporting progress.)
    progress, done = downloader.next_chunk()
    if verbose:
      sys.stdout.flush()
      sys.stdout.write('\r')
      percentage_done = progress.resumable_progress * 100/progress.total_size
      sys.stdout.write("[%-100s] %d%%" % ('='*int(percentage_done), int(percentage_done)))
  downloaded.seek(0)
  return downloaded
#get_file_buffer('1_FLa_5dVDnE3r_DkjWvEH3lsiNxAg5vs')

def downloads_files_and_data(input_file_meta):
  SOURCE_FOLDER='/content/'
  print('********** Download Logics Files **********')
  for file, id in input_file_meta.items():
    # If file is data -> need to go inside and download
    if file == 'data':
      datas = get_files_from_parent(id).items()
      # Remove and create data folder
      if(os.path.exists('data')):
        shutil.rmtree('data', ignore_errors=True)
      os.makedirs('data')
      for file, id in datas:
        DATA_FOLDER = '/content/data'
        print('********** Download Data Files **********')
        try:
          downloadedData = get_file_buffer(id, verbose=1)
          dest_data = os.path.join(DATA_FOLDER, file)
          print("processing %s data" % file)
          with open(dest_data, "wb") as out:
            out.write(downloaded.read())
            print("Done data %s" % dest_data)
        except ValueError:
          print('SOME_THING_WENT_WRONG', ValueError)    
    else:
      try:
        downloaded = get_file_buffer(id, verbose=1)
      except ValueError:
        print('SOME_THING_WENT_WRONG', ValueError)    
      dest_file = os.path.join(SOURCE_FOLDER, file)
      print("processing %s data" % file)
      with open(dest_file, "wb") as out:
        out.write(downloaded.read())
        print("Done logic file %s" % dest_file)
        
folder_name = 'googlecolab'
#folder_name = 'test'
parent_folder = get_parent_folder(folder_name)
print('parent_folder: ', parent_folder)

if parent_folder and len(parent_folder)>=1 :
  print('Lay thong tin folder hoan thanh','green')
  print('folder name: %s' % (parent_folder[0]['name']))
  print('folder id: %s' % (parent_folder[0]['id']))
  input_file_meta = get_files_from_parent(parent_folder[0]['id'])
else:
  print('folder khong ton tai','red')
  print('dong ung dung','red')
  sys.exit()
 
downloads_files_and_data(input_file_meta)

In [0]:
!unzip './OCR_demo-master - OK - 1.zip'

#!unzip './Bo_du_lieu_huan_luyen.zip'

In [0]:
#tiến hành huấn luyện

from utils_CNN_2BLSTM_CTC import *
from keras.optimizers import Adam
from keras.callbacks import *
from sklearn.utils import shuffle


all_character_list = get_all_character()
create_label_encoder(all_character_list)

model = create_model()
MODEL_PATH = 'model/xception_model_{val_loss:.5f}.h5'

if not os.path.exists('model'):
    os.mkdir('model')

all_image_list = get_image_list(DATA_FOLDER)
all_image_list = shuffle(all_image_list, random_state=123)
no_train_images = int(TRAIN_VAL_SPLIT * len(all_image_list))
train_image_list = all_image_list[:no_train_images]
val_image_list = all_image_list[no_train_images:]

print("Train on  ", no_train_images, 'images')
print ("Validate on", len(all_image_list) - no_train_images, 'images')

optimizer = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-9,
                 decay=1e-9, amsgrad=True, clipnorm=5., clipvalue=0.5)
model.compile(loss=fake_loss, optimizer=optimizer)
data_gen = DataGenerator(train_image_list, val_image_list)
step_val = len(data_gen.val_image_list) // BATCH_SIZE
step_train = len(data_gen.train_image_list) // BATCH_SIZE // 2

checkpointer = ModelCheckpoint(
    filepath=MODEL_PATH, save_best_only=True, verbose=1)
reduce_lr = ReduceLROnPlateau(patience=6, verbose=1, facttor=0.75)
model.fit_generator(generator=data_gen.next_train(), steps_per_epoch=step_train, epochs=150, verbose=1,
    callbacks=[checkpointer, reduce_lr], validation_data=data_gen.next_val(), validation_steps=step_val)


In [5]:
#kiểm tra nhận dạng 

from keras.models import load_model, Model
from utils_CNN_2BLSTM_CTC import *
import os
import itertools
from keras import backend as K


MODEL_PATH = './model/xception_model_9.71246.h5'
test_folder = './image/'
model = load_model(MODEL_PATH,
                   custom_objects={'squeeze_layer': squeeze_layer,
                                   'ctc_loss': ctc_loss,
                                   'fake_loss': fake_loss})
input_layer = model.inputs[0]
output_layer = model.layers[-5].output

model_new = Model(input_layer, output_layer)
# print (model_new.summary())

test_image_list = os.listdir(test_folder)
le = load_label_encoder()

for t in test_image_list:
    image = cv2.imread(os.path.join(test_folder, t), 0)    
    image = cv2.resize(image, (128, 64))     
    image = np.expand_dims(image, axis=0)
    image = np.expand_dims(image / 255., axis=-1)
    result = model_new.predict(image)
    result = np.squeeze(result)[2:, :]
    result = np.argmax(result, 1)
    result = [k for k, _ in itertools.groupby(result) if k != NO_CLASSES-1]
    result = le.inverse_transform(result)
    result = ''.join(result)

    image_path = './label/'+t.split('/')[-1].split('.png')[0]+'.txt'
    f = open(image_path, encoding="utf8")
    s = f.read()
    
    #print ("True label:", t.split('_')[0])
    print ("True label:", s)
    print ("Predicted :", result[:len(s)])
    print ('_'*150)



True label: Đông
Predicted : Đông
______________________________________________________________________________________________________________________________________________________
True label: đảo
Predicted : đảo
______________________________________________________________________________________________________________________________________________________
True label: Thành
Predicted : hinhế
______________________________________________________________________________________________________________________________________________________
True label: thi
Predicted : bịu
______________________________________________________________________________________________________________________________________________________
True label: bảo
Predicted : bảo
______________________________________________________________________________________________________________________________________________________
True label: hai
Predicted : hai
_______________________________________________