# Profile PreProcessing

## 1. Gray Filter
## 2. Cropping
## 3. Binary Inversion
## 4. Integration or Individuall OCR

스크린샷 위치 : C:\Users\KH_Home\Nox_share\ImageShare\Screenshots

tesseract_path : C:\Program Files\Tesseract-OCR\tesseract.exe

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import glob
import cv2
import pandas as pd

In [2]:
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

In [3]:
profile_path = './Screenshots/'
profile_list = glob.glob(profile_path + '*.png')
for fn in profile_list:
    print(fn)

./Screenshots\Screenshot_2020-07-17-19-11-13.png
./Screenshots\Screenshot_2020-07-17-19-11-18.png
./Screenshots\Screenshot_2020-07-17-19-11-23.png
./Screenshots\Screenshot_2020-07-17-19-11-31.png
./Screenshots\Screenshot_2020-07-17-19-11-36.png
./Screenshots\Screenshot_2020-07-17-19-11-41.png
./Screenshots\Screenshot_2020-07-17-19-11-55.png
./Screenshots\Screenshot_2020-07-17-19-12-01.png
./Screenshots\Screenshot_2020-07-17-19-12-07.png
./Screenshots\Screenshot_2020-07-17-19-12-13.png
./Screenshots\Screenshot_2020-07-17-19-12-20.png
./Screenshots\Screenshot_2020-07-17-19-12-26.png
./Screenshots\Screenshot_2020-07-17-19-12-32.png
./Screenshots\Screenshot_2020-07-17-19-12-38.png
./Screenshots\Screenshot_2020-07-17-19-12-48.png
./Screenshots\Screenshot_2020-07-17-19-12-54.png
./Screenshots\Screenshot_2020-07-17-19-13-01.png
./Screenshots\Screenshot_2020-07-17-19-13-07.png
./Screenshots\Screenshot_2020-07-17-19-13-13.png
./Screenshots\Screenshot_2020-07-17-19-13-19.png
./Screenshots\Screen

In [4]:
crop_dict = {
    'ID' : np.array([185, 216, 595, 672], dtype=int),
    'power' : np.array([290, 320, 690, 880], dtype=int),
    'kill' : np.array([290, 320, 880, 1050], dtype=int),
}

In [5]:
def bin_inv(img, thr=140, show=False):
    ret,thresh2 = cv2.threshold(img,thr,255,cv2.THRESH_BINARY_INV)
    ocr_result = pytesseract.image_to_string(thresh2)
    if show:
        plt.imshow(thresh2, 'gray')
        plt.show()
        print(ocr_result)
    return thresh2, ocr_result

In [6]:
def str2num(strnum):
    result = strnum.replace(',', '').replace('.', '')
    if result.isdigit():
        return int(result)
    else:
        return 0

In [7]:
class Player():
    def __init__(self, ID, power, kill):
        self.ID = ID
        self.power = power
        self.kill = kill
        self.name = None
        self.path = None

# Load id2name.xlsx to dict

In [8]:
def load_id2name(file_path):
    df_id2name = pd.read_excel(file_path)
    id2name = dict([])

    for i in range(len(df_id2name)):
        row = df_id2name.loc[i]
        id2name[row['ID']] = row['name']
    
    return id2name

In [9]:
id2name = load_id2name('id2name.xlsx')
id2name

{32858679: '에이치피',
 32886907: '최Garen',
 32916567: '민준하은',
 32888148: '정훈이',
 32860442: '돌팸환돌',
 44463382: '앤버는글럿',
 32881045: '화중군자',
 34174033: '알몬비',
 33691098: '으갹으갹갹',
 32951689: '문마마',
 32852651: 'GEOEMPIRE',
 32842266: 'Royal Toi',
 34068695: 'ccocco',
 35454450: '앱솔트',
 34012835: '허세노비',
 32881241: '전차',
 32820259: '마리오1023',
 32878345: '무적금아',
 32819484: 'JSUB0313',
 32900080: '뀰뀰',
 33334611: '명세지재',
 33898594: '고오니아빠',
 32910924: '인수성',
 33984315: 'TOYFLY',
 34011747: 'Dondekman',
 32878596: 'coreanito26',
 32904134: 'llii0993',
 32947520: '알파카조앙',
 32889863: '륭입니다',
 32873041: '꿈도없다',
 32858701: 'aquagu',
 33963794: '수라비',
 32870013: '섭리',
 32921831: 'TGG밀크커피',
 32850372: '킹갓권율',
 33995797: 'ALOHABIN',
 34027799: '썁새끼',
 34344247: '운동하자',
 32818701: 'kdc0627',
 33264554: 'TOP신상25',
 32886302: '파괴본능',
 32866794: '소혼시후',
 32916643: '람겸',
 34015869: '독고구천검',
 32909658: '다혜나라',
 32850382: 'NO ABE',
 32871044: 'N2coni',
 32908249: '아나머',
 32911721: '히니',
 32937381: '큰연못',
 32865

# Let's OCR

In [11]:
Players = dict([])

L = len(profile_list)
for i, fn in enumerate(profile_list):
    print('{}/{}'.format(i,L))
    img = cv2.imread(fn,0)

    IDp = crop_dict['ID']
    powerp = crop_dict['power']
    killp = crop_dict['kill']
    
    img_ID = img[IDp[0]:IDp[1], IDp[2]:IDp[3]]
    img_power = img[powerp[0]:powerp[1], powerp[2]:powerp[3]]
    img_kill = img[killp[0]:killp[1], killp[2]:killp[3]]
    
    _, str_ID = bin_inv(img_ID)
    _, str_power = bin_inv(img_power)
    _, str_kill = bin_inv(img_kill)
    
    ID, power, kill = str2num(str_ID), str2num(str_power), str2num(str_kill)
    
    Players[ID] = Player(ID,power,kill)        
    Players[ID].path = fn.split('\\')[-1]
    
    try:
        Players[ID].name = id2name[ID]
    except KeyError:
        print('KeyError!! : checkt {}'.format(Players[ID].path))

0/123
1/123
2/123
3/123
4/123
5/123
6/123
7/123
8/123
9/123
10/123
11/123
12/123
13/123
14/123
15/123
16/123
17/123
18/123
19/123
20/123
21/123
22/123
23/123
24/123
25/123
26/123
27/123
28/123
29/123
30/123
31/123
32/123
33/123
34/123
35/123
36/123
37/123
38/123
39/123
40/123
41/123
42/123
43/123
44/123
45/123
46/123
47/123
48/123
49/123
50/123
51/123
52/123
53/123
54/123
55/123
56/123
57/123
58/123
59/123
60/123
61/123
62/123
63/123
64/123
65/123
66/123
67/123
68/123
69/123
70/123
71/123
72/123
73/123
74/123
75/123
76/123
77/123
78/123
79/123
80/123
81/123
82/123
83/123
84/123
85/123
86/123
87/123
88/123
89/123
90/123
91/123
92/123
93/123
94/123
95/123
96/123
97/123
98/123
99/123
100/123
101/123
102/123
KeyError!! : checkt Screenshot_2020-07-17-19-22-33.png
103/123
104/123
105/123
106/123
107/123
108/123
109/123
110/123
111/123
112/123
113/123
114/123
115/123
116/123
117/123
118/123
119/123
120/123
121/123
122/123


In [14]:
df = {
    'ID' : [],
    'name' : [],
    'power' : [],
    'kill' : [],
    'path' : []
}
df = pd.DataFrame(df, dtype=int)

L = len(Players)
for ID, p in Players.items():
    df = df.append({
        'ID' : ID,
        'name' : p.name,
        'power' : p.power,
        'kill' : p.kill,
        'path' : p.path
    }, ignore_index=True)

In [15]:
df

Unnamed: 0,ID,name,power,kill,path
0,32858679,에이치피,37978538,1345170,Screenshot_2020-07-17-19-11-13.png
1,32886907,최Garen,35965119,2487123,Screenshot_2020-07-17-19-11-18.png
2,32916567,민준하은,34725757,3359320,Screenshot_2020-07-17-19-11-23.png
3,32888148,정훈이,31242617,5049266,Screenshot_2020-07-17-19-11-31.png
4,32860442,돌팸환돌,24991749,3029242,Screenshot_2020-07-17-19-11-36.png
...,...,...,...,...,...
105,38003357,유니지갑,3480996,30602,Screenshot_2020-07-17-19-23-37.png
106,55222825,우헤아라 아이,849942,332,Screenshot_2020-07-17-19-24-20.png
107,52115995,Dream공산팀,491354,0,Screenshot_2020-07-17-19-24-27.png
108,55223781,하마사키 마오,819236,0,Screenshot_2020-07-17-19-24-33.png


# save as xlsx

In [16]:
df.to_excel('tmp.xlsx', index=False)

# Matching ID -name

# 여긴 이제 안씀

# id name matching

In [None]:
df_id2name = pd.read_excel('id2name_00.xlsx')
id2name = dict([])
for i in range(len(df_id2name)):
    df_row = df_id2name.loc[i]
    id2name[df_row['ID']] = df_row['name']

In [None]:
id2name

# dict to xlsx

In [None]:
df_xlsx = {
    'ID' : [],
    'name' : [],
}
df_xlsx = pd.DataFrame(df_xlsx, dtype=int)

for ID, name in id2name.items():
    df_xlsx = df_xlsx.append({
        'ID' : ID,
        'name' : name
    }, ignore_index=True)
    
df_xlsx.to_excel('id2name.xlsx', index=False)

# ALL

In [None]:
# for fn in profile_list:
#     img = cv2.imread(fn,0)
#     plt.imshow(img, 'gray')
#     plt.show()

#     IDp = crop_dict['ID']
#     powerp = crop_dict['power']
#     killp = crop_dict['kill']
    
#     img_ID = img[IDp[0]:IDp[1], IDp[2]:IDp[3]]
#     img_power = img[powerp[0]:powerp[1], powerp[2]:powerp[3]]
#     img_kill = img[killp[0]:killp[1], killp[2]:killp[3]]
    
#     i1 = bin_inv(img_ID, show=True)
#     i2 = bin_inv(img_power, show=True)
#     i3 = bin_inv(img_kill, show=True)
    

#     a = input('y or n')
#     if a == 'n':
#         break