In [1]:
import argparse
from google.oauth2 import service_account
from google.cloud import vision
import matplotlib.pyplot as plt
import cv2
import io
import re
import numpy as np
import os
import xml.etree.ElementTree as ET 
from utilities.score_computors import are_sentences_alike

In [13]:
dir_path = 'datasets/KAIST/English/'
MAX_TRAINING = 1180 #5

In [14]:
def extract_clean_str(original_str):
    return re.sub('[^a-zA-Z0-9]+', ' ', original_str, flags=re.UNICODE).lower().strip().replace(' ', '')

def get_mapped(path):
    key_map, dict_idx = {}, 0
    for r, _, f in os.walk(dir_path):
        for file in f:
            if '.JPG' in file or '.jpg' in file:
                key_map[dict_idx] = file
                dict_idx += 1
    idxs = list(key_map.keys())          
    shuffled_idxs = np.random.randint(0, len(idxs), len(idxs))
    return key_map, shuffled_idxs

def fetch_from_xml(path, file):
    tree = ET.parse(os.path.join(path, file)).getroot()  
    return extract_clean_str(''.join([e.attrib['char'] for e in tree.iter(tag='character')]))

In [19]:
dict_files, files_idx = get_mapped(dir_path)
shuffled_idx = np.random.randint(0, len(files_idx), len(files_idx))
vision_client = vision.ImageAnnotatorClient()
correct_pred, total_pred = 0, 0

for idx in shuffled_idx:
    file = dict_files.get(idx)
    print ("Iterating for file = {}".format(file))
    xml_file = file[:len(file)-4] + '.xml' 
    
    if not os.path.exists(os.path.join(dir_path, xml_file)):
        continue
    ground_truth_str = fetch_from_xml(dir_path, xml_file)
    
    if len(ground_truth_str) == 0:
        continue
    with io.open(os.path.join(dir_path, file), 'rb') as image_file:
        content = (image_file.read())
    image = vision.types.Image(content=content)
    text_detection_response = vision_client.text_detection(image=image)
    detected_str = extract_clean_str(text_detection_response.full_text_annotation.text)
    
    if are_sentences_alike(detected_str, ground_truth_str, .6) and len(detected_str) != 0:
        correct_pred += 1
    
    total_pred += 1
    if total_pred >= MAX_TRAINING:
        break

Iterating for file = 44.JPG
Iterating for file = 090.jpg
Iterating for file = DSC02382.JPG
Iterating for file = DSC03631.JPG
Iterating for file = 054.JPG
Iterating for file = P1010113.JPG
Iterating for file = DSC03135.JPG
Iterating for file = P090912050.jpg
Iterating for file = DSC03613.JPG
Iterating for file = 086.jpg
Iterating for file = 2007-12-09-day-exterior-032.jpg
Iterating for file = DSC03176.JPG
Iterating for file = DSC03294.JPG
Iterating for file = DSC02429.JPG
Iterating for file = DSC04451.JPG
Iterating for file = DSC02379.JPG
Iterating for file = DSC04057.JPG
Iterating for file = DSC03519.JPG
Iterating for file = DSC02919.JPG
Iterating for file = P090831012.jpg
Iterating for file = DSC04246.JPG
Iterating for file = DSC02937.JPG
Iterating for file = 001.jpg
Iterating for file = DSC02624.JPG
Iterating for file = DSC03081.JPG
Iterating for file = DSC03167.JPG
Iterating for file = DSC04104.JPG
Iterating for file = DSC04431.JPG
Iterating for file = DSC03432.JPG
Iterating for fil

Iterating for file = DSC02775.JPG
Iterating for file = DSC02884.JPG
Iterating for file = P090831019.jpg
Iterating for file = DSC03839.JPG
Iterating for file = DSC02425.JPG
Iterating for file = DSC03829.JPG
Iterating for file = DSC03274.JPG
Iterating for file = P090911064.jpg
Iterating for file = P1010088.JPG
Iterating for file = DSC03052.JPG
Iterating for file = P1010009.JPG
Iterating for file = DSC03621.JPG
Iterating for file = DSC03656.JPG
Iterating for file = DSC03250.JPG
Iterating for file = DSC03300.JPG
Iterating for file = DSC04465.JPG
Iterating for file = DSC04139.JPG
Iterating for file = DSC02940.JPG
Iterating for file = DSC04409.JPG
Iterating for file = P1010062.JPG
Iterating for file = P090905002.jpg
Iterating for file = DSC03430.JPG
Iterating for file = 038.JPG
Iterating for file = 080119-0016.jpg
Iterating for file = 43.JPG
Iterating for file = DSC04151.JPG
Iterating for file = 080119-0012.jpg
Iterating for file = DSC02611.JPG
Iterating for file = DSC04197.JPG
Iterating for

Iterating for file = DSC03743.JPG
Iterating for file = DSC04029.JPG
Iterating for file = P090903092.jpg
Iterating for file = P090903024.jpg
Iterating for file = P1010048.JPG
Iterating for file = P090911016.jpg
Iterating for file = DSC02398.JPG
Iterating for file = DSC02450.JPG
Iterating for file = P1010094.JPG
Iterating for file = DSC03229.JPG
Iterating for file = 8.jpg
Iterating for file = DSC03539.JPG
Iterating for file = P1010133.JPG
Iterating for file = DSC02676.JPG
Iterating for file = DSC02903.JPG
Iterating for file = DSC02519.JPG
Iterating for file = DSC02867.JPG
Iterating for file = 2007-12-08-day-106.jpg
Iterating for file = P090831017.jpg
Iterating for file = DSC02394.JPG
Iterating for file = 032.jpg
Iterating for file = P090912066.jpg
Iterating for file = DSC03883.JPG
Iterating for file = DSC03609.JPG
Iterating for file = DSC02842.JPG
Iterating for file = DSC02317.JPG
Iterating for file = P1010060.JPG
Iterating for file = DSC03015.JPG
Iterating for file = DSC02527.JPG
Iterat

Iterating for file = DSC03579.JPG
Iterating for file = 063.JPG
Iterating for file = DSC03340.JPG
Iterating for file = 134.JPG
Iterating for file = P090831125.jpg
Iterating for file = P090911067.jpg
Iterating for file = DSC02355.JPG
Iterating for file = DSC02460.JPG
Iterating for file = DSC02411.JPG
Iterating for file = 6.jpg
Iterating for file = DSC02513.JPG
Iterating for file = 032.jpg
Iterating for file = DSC02398.JPG
Iterating for file = DSC03549.JPG
Iterating for file = 080119-0013.jpg
Iterating for file = 031.JPG
Iterating for file = DSC03185.JPG
Iterating for file = P1010099.JPG
Iterating for file = DSC02477.JPG
Iterating for file = P1010025.JPG
Iterating for file = DSC03273.JPG
Iterating for file = DSC03743.JPG
Iterating for file = DSC02483.JPG
Iterating for file = DSC03467.JPG
Iterating for file = DSC03785.JPG
Iterating for file = DSC02725.JPG
Iterating for file = 2007-12-08-day-103.jpg
Iterating for file = DSC03887.JPG
Iterating for file = DSC03063.JPG
Iterating for file = DSC

ParseError: XML or text declaration not at start of entity: line 1, column 4 (<string>)

In [18]:
os.path.exists("datasets/KAIST/English/36.xml")

False

In [20]:
print ("Accuracy = {}".format(correct_pred/total_pred))

Accuracy = 0.7703016241299304


In [None]:
vision_client = vision.ImageAnnotatorClient()
correct_pred, total_pred = 0, 0
idx = 20
file = dict_files.get(idx)
xml_file = file[:len(file)-4] + '.xml' 

ground_truth_str = fetch_from_xml(dir_path, xml_file)
if len(ground_truth_str) == 0:
    print ("A")
    #continue


with io.open(os.path.join(dir_path, file), 'rb') as image_file:
        content = (image_file.read())
image = vision.types.Image(content=content)
text_detection_response = vision_client.text_detection(image=image)
detected_str = extract_clean_str(text_detection_response.full_text_annotation.text)
 

if are_sentences_alike(detected_str, ground_truth_str, .3):
    correct_pred += 1

In [None]:
f = open(os.path.join(dir_path, 'DSC02629.xml'), "r")
print(f.read())

In [None]:
len(files_idx)

In [None]:
ground_truth

In [None]:
import xml.etree.ElementTree as ET 
tree = ET.parse(os.path.join(dir_path, 'DSC02629.xml')) 
root = tree.getroot() 
ground_truth = ''
for elem in tree.iter(tag='character'):
    ground_truth += (elem.attrib['char'])

In [None]:
vision_client = vision.ImageAnnotatorClient()
curr_training = 0
for idx in shuffled_idx:
    file = dict_files.get(idx)
    
    with io.open(dir_path, 'rb') as image_file:
        content = (image_file.read())
    image = vision.types.Image(content=content)
    text_detection_response = vision_client.text_detection(image=image)
    detected_str = text_detection_response.full_text_annotation.text
    
    
    curr_training += 1
    if curr_training > MAX_TRAINING:
        break

In [None]:

root = ET.parse(os.path.join(dir_path, 'DSC02629.xml')).getroot()  
ground_truth = extract_clean_str(''.join([e.attrib['char'] for e in tree.iter(tag='character')]))
ground_truth

In [None]:
dir_path, xml_file

In [None]:
are_sentences_alike(detected_str, ground_truth_str, .3)

In [None]:
len(ground_truth_str)

In [None]:
detected_str, ground_truth_str