-
Notifications
You must be signed in to change notification settings - Fork 1
/
tesseract.py
229 lines (201 loc) · 7.95 KB
/
tesseract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
import hgtk
from ppadb.client import Client
import os
import cv2
import numpy as np
import pytesseract
import mss
import re
adb = Client(host='127.0.0.1', port=5037)
devices = adb.devices()
device = devices[0]
def decompose_into_jamo(input_string):
decomposed_string = ""
for char in input_string:
if hgtk.checker.is_hangul(char):
jamo = hgtk.letter.decompose(char)
for component in jamo:
if component != '':
decomposed_string += component
else:
decomposed_string += char
return decomposed_string
# -----------------------------------------------------------------------------
# Keyboard Config
q_x, q_y = 60, 1700
p_x = 1020
a_x, a_y = 100, 1830
l_x = 975
z_x, z_y = 208, 1960
m_x = 863
spce_y = 2080
# -----------------------------------------------------------------------------
shft = (q_x, z_y)
engl = (q_x, spce_y)
spce = (p_x - q_x, spce_y)
entr = (p_x, spce_y)
alphabet_to_coord = {
'Q': (q_x, q_y),
'W': (q_x + (p_x - q_x) * 1 / 9, q_y),
'E': (q_x + (p_x - q_x) * 2 / 9, q_y),
'R': (q_x + (p_x - q_x) * 3 / 9, q_y),
'T': (q_x + (p_x - q_x) * 4 / 9, q_y),
'Y': (q_x + (p_x - q_x) * 5 / 9, q_y),
'U': (q_x + (p_x - q_x) * 6 / 9, q_y),
'I': (q_x + (p_x - q_x) * 7 / 9, q_y),
'O': (q_x + (p_x - q_x) * 8 / 9, q_y),
'P': (p_x, q_y),
'A': (a_x, a_y),
'S': (a_x + (l_x - a_x) * 1 / 8, a_y),
'D': (a_x + (l_x - a_x) * 2 / 8, a_y),
'F': (a_x + (l_x - a_x) * 3 / 8, a_y),
'G': (a_x + (l_x - a_x) * 4 / 8, a_y),
'H': (a_x + (l_x - a_x) * 5 / 8, a_y),
'J': (a_x + (l_x - a_x) * 6 / 8, a_y),
'K': (a_x + (l_x - a_x) * 7 / 8, a_y),
'L': (l_x, a_y),
'Z': (z_x, z_y),
'X': (z_x + (m_x - z_x) * 1 / 6, z_y),
'C': (z_x + (m_x - z_x) * 2 / 6, z_y),
'V': (z_x + (m_x - z_x) * 3 / 6, z_y),
'B': (z_x + (m_x - z_x) * 4 / 6, z_y),
'N': (z_x + (m_x - z_x) * 5 / 6, z_y),
'M': (m_x, z_y),
}
jamo_to_coord = {
'ㅂ': alphabet_to_coord['Q'],
'ㅈ': alphabet_to_coord['W'],
'ㄷ': alphabet_to_coord['E'],
'ㄱ': alphabet_to_coord['R'],
'ㅅ': alphabet_to_coord['T'],
'ㅛ': alphabet_to_coord['Y'],
'ㅕ': alphabet_to_coord['U'],
'ㅑ': alphabet_to_coord['I'],
'ㅐ': alphabet_to_coord['O'],
'ㅔ': alphabet_to_coord['P'],
'ㅁ': alphabet_to_coord['A'],
'ㄴ': alphabet_to_coord['S'],
'ㅇ': alphabet_to_coord['D'],
'ㄹ': alphabet_to_coord['F'],
'ㅎ': alphabet_to_coord['G'],
'ㅗ': alphabet_to_coord['H'],
'ㅓ': alphabet_to_coord['J'],
'ㅏ': alphabet_to_coord['K'],
'ㅣ': alphabet_to_coord['L'],
'ㅋ': alphabet_to_coord['Z'],
'ㅌ': alphabet_to_coord['X'],
'ㅊ': alphabet_to_coord['C'],
'ㅍ': alphabet_to_coord['V'],
'ㅠ': alphabet_to_coord['B'],
'ㅜ': alphabet_to_coord['N'],
'ㅡ': alphabet_to_coord['M'],
}
shift_jamo_to_coord = {
'ㅃ': jamo_to_coord['ㅂ'],
'ㅉ': jamo_to_coord['ㅈ'],
'ㄸ': jamo_to_coord['ㄷ'],
'ㄲ': jamo_to_coord['ㄱ'],
'ㅆ': jamo_to_coord['ㅅ'],
'ㅒ': jamo_to_coord['ㅐ'],
'ㅖ': jamo_to_coord['ㅔ'],
}
combined_jamo_to_coord = {
'ㅘ': (jamo_to_coord['ㅗ'], jamo_to_coord['ㅏ']),
'ㅙ': (jamo_to_coord['ㅗ'], jamo_to_coord['ㅐ']),
'ㅚ': (jamo_to_coord['ㅗ'], jamo_to_coord['ㅣ']),
'ㅝ': (jamo_to_coord['ㅜ'], jamo_to_coord['ㅓ']),
'ㅞ': (jamo_to_coord['ㅜ'], jamo_to_coord['ㅔ']),
'ㅟ': (jamo_to_coord['ㅜ'], jamo_to_coord['ㅣ']),
'ㅢ': (jamo_to_coord['ㅡ'], jamo_to_coord['ㅣ']),
'ㄳ': (jamo_to_coord['ㄱ'], jamo_to_coord['ㅅ']),
'ㄵ': (jamo_to_coord['ㄴ'], jamo_to_coord['ㅈ']),
'ㄶ': (jamo_to_coord['ㄴ'], jamo_to_coord['ㅎ']),
'ㄺ': (jamo_to_coord['ㄹ'], jamo_to_coord['ㄱ']),
'ㄻ': (jamo_to_coord['ㄹ'], jamo_to_coord['ㅁ']),
'ㄼ': (jamo_to_coord['ㄹ'], jamo_to_coord['ㅂ']),
'ㄽ': (jamo_to_coord['ㄹ'], jamo_to_coord['ㅅ']),
'ㄾ': (jamo_to_coord['ㄹ'], jamo_to_coord['ㅌ']),
'ㄿ': (jamo_to_coord['ㄹ'], jamo_to_coord['ㅍ']),
'ㅀ': (jamo_to_coord['ㄹ'], jamo_to_coord['ㅎ']),
'ㅄ': (jamo_to_coord['ㅂ'], jamo_to_coord['ㅅ']),
}
# We know for sure that there are no lower case alphabets nor special characters.
def type_on_phone(input_string):
decomposed_string = decompose_into_jamo(input_string)
for char in decomposed_string:
if char in jamo_to_coord:
x, y = jamo_to_coord[char]
device.shell(f'input touchscreen tap {x} {y}')
if char in shift_jamo_to_coord:
x, y = shft
device.shell(f'input touchscreen tap {x} {y}')
x, y = shift_jamo_to_coord[char]
device.shell(f'input touchscreen tap {x} {y}')
x, y = shft
device.shell(f'input touchscreen tap {x} {y}')
if char in combined_jamo_to_coord:
first, second = combined_jamo_to_coord[char]
x, y = first
device.shell(f'input touchscreen tap {x} {y}')
x, y = second
device.shell(f'input touchscreen tap {x} {y}')
if char in alphabet_to_coord:
x, y = engl
device.shell(f'input touchscreen tap {x} {y}')
x, y = alphabet_to_coord[char]
device.shell(f'input touchscreen tap {x} {y}')
x, y = engl
device.shell(f'input touchscreen tap {x} {y}')
if char == ' ':
x, y = spce
device.shell(f'input touchscreen tap {x} {y}')
x, y = entr
device.shell(f'input touchscreen tap {x} {y}')
# -----------------------------------------------------------------------------
# Model Config
# install tesseract: `brew install tesseract`
# check install location: `brew info tesseract` > `/opt/homebrew/Cellar/tesseract/5.3.1`
# `kor.traineddata` downloaded from https://github.com/tesseract-ocr/tessdata_best/blob/main/kor.traineddata should be placed at `/opt/homebrew/share/tessdata`
os.environ['TESSDATA_PREFIX'] = '/opt/homebrew/share/tessdata/'
pytesseract.pytesseract.tesseract_cmd = '/opt/homebrew/Cellar/tesseract/5.3.1/bin/tesseract'
custom_config = r'--oem 3 --psm 6 -l kor+eng'
min_confidence = 95
# -----------------------------------------------------------------------------
# Image Config
top_left_x, top_left_y = 35, 257
bottom_right_x, bottom_right_y = 432, 678
capture_region = {"top": top_left_y, "left": top_left_x, "width": bottom_right_x - top_left_x, "height": bottom_right_y - top_left_y}
brightness_threshold = 191
darkening_factor = 0.5
# -----------------------------------------------------------------------------
def custom_threshold(img, threshold, factor):
img_copy = img.copy()
for i in range(img.shape[0]):
for j in range(img.shape[1]):
if img[i, j] < threshold:
img_copy[i, j] = img[i, j] * factor
return img_copy
def process_ocr_result(ocr_result):
lines = ocr_result.split('\n')
regex = re.compile(r'[^A-Z\uac00-\ud7a3]')
processed_lines = [regex.sub('', line) for line in lines]
processed_lines = [line for line in processed_lines if line]
return processed_lines
while True:
with mss.mss() as sct:
screen_capture = sct.grab(capture_region)
image_np = np.array(screen_capture)
image_bgr = cv2.cvtColor(image_np, cv2.COLOR_BGRA2BGR)
image_gray = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY)
image_thresholded = custom_threshold(image_gray, brightness_threshold, darkening_factor)
ocr_result = pytesseract.image_to_string(image_thresholded, config=custom_config)
processed_lines = process_ocr_result(ocr_result)
print(processed_lines)
if processed_lines:
type_on_phone(processed_lines[-1])
output_height, output_width = image_thresholded.shape[:2]
resized_image_thresholded = cv2.resize(image_thresholded, (output_width // 2, output_height // 2))
cv2.imshow('OCR Input', resized_image_thresholded)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cv2.destroyAllWindows()