<a href="https://colab.research.google.com/github/olivia-sp/study-pjt/blob/main/EAST_license_plate_fine_tuning_v0_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# EAST 모델을 License Plate Detection 데이터에 맞게 Fine-Tuning하기
1. license_plate_detection 데이터를 training 데이터와 test 데이터로 나눔 (강사 제공)
2. data_preprocessor 함수에서 gt_로 replace하는 부분 삭제 https://github.com/solaris33/EAST-tf2/blob/master/data_processor.py#L588 (icdar2015 데이터셋은 txt 파일 앞에 gt_ 접미어가 붙지만 license_plate_detection 데이터는 jpg파일명과 txt 파일명이 동일함)
data_preprocessor.py 588번줄 주석처리
3. data_preprocessor.py의 load_annotation() 함수에서 데이터를 띄어쓰기 기준으로 split하는 함수 추가 https://github.com/solaris33/EAST-tf2/blob/master/data_processor.py#L45
4. 코드 수정한 data_preprocessor.py 파일로 덮어쓰기
5. train.py 파일 실행시에 training_data_path 경로를 license plate detection 데이터로 변경해서 license plate detector 데이터셋에 맞게 EAST 파라미터 Fine-Tuning
6. 학습이 끝난 모델을 이용해서 eval.py 파일을 이용한 evaluation

Reference : https://github.com/solaris33/EAST-tf2

###환경설정

In [1]:
!git clone https://github.com/solaris33/EAST-tf2

fatal: destination path 'EAST-tf2' already exists and is not an empty directory.


In [65]:
%cd ..

/content


기존 EAST-TF2 C/Python 코드와 파이썬 버전 맞춰주는 작업

In [None]:
%cd lanms

In [None]:
!ls

In [None]:
!python -V

In [None]:
!make clean

In [None]:
!wget https://gist.githubusercontent.com/solaris33/331c3a8f6b909b9d10766040eb1929a5/raw/72cd4188035458440281dc2a0ed9d4e4953d7eea/Makefile -O Makefile

In [None]:
!make

In [None]:
!ls

In [None]:
%cd ..

## Training을 위한 데이터셋 다운로드

ICDAR 2015
- Reference : https://drive.google.com/file/d/1mPvJkaWq3Ka_UqYr5rUqbQ7rSIPFqibD/view?usp=sharing 

- Original Source : https://rrc.cvc.uab.es/?ch=4&com=introduction
- License : https://creativecommons.org/licenses/by/4.0/

license_plate_dataset
- Reference : https://drive.google.com/file/d/1gvD8rsMNFGtu1VxKTwz3_2tQrhE8d9SV/view?usp=sharing

### 데이터셋 압축풀기

In [67]:
import os
import zipfile
CURR_PATH = os.getcwd()

os.listdir(CURR_PATH)
  #DATA_PATH = os.path.join(CURR_PATH,'data_set')
DATA_PATH = CURR_PATH + '/drive/MyDrive/Colab Notebooks/data_set/EAST-tf2_icdar2015_example.zip'
CUS_DATA_PATH = CURR_PATH + '/drive/MyDrive/Colab Notebooks/data_set/license_plate_detection_data.zip'
print("RAW_PATH", DATA_PATH)

with zipfile.ZipFile(DATA_PATH) as zfile:
  zfile.extractall(CURR_PATH + '/EAST-tf2')
with zipfile.ZipFile(CUS_DATA_PATH) as zfile:
  zfile.extractall(CURR_PATH + '/EAST-tf2')

RAW_PATH /content/drive/MyDrive/Colab Notebooks/data_set/EAST-tf2_icdar2015_example.zip


In [69]:
%cd EAST-tf2

/content/EAST-tf2


In [70]:
!pwd

/content/EAST-tf2


In [4]:
!ls

data		     eval.py   LICENSE		      model.py	   train.py
data_processor.py    examples  locality_aware_nms.py  __pycache__
east_resnet_50_rbox  lanms     losses.py	      README.md


## load_annotation 함수 분석(data_preprocessing.py)

In [None]:
!pwd

In [None]:
import os
import glob
import sklearn.model_selection
import shutil
import csv
import numpy as np

In [None]:
'''
ICDAR2015 데이터셋
['377', '117', '463', '117', '465', '130', '378', '130', 'Genaxis Theatre']
->
(array([[[377., 117.],
         [463., 117.],
         [465., 130.],
         [378., 130.]],

license_plate_detection 데이터셋
['935 362 1034 362 1034 411 935 411 "YG9X2G"']
->
(array([[[935., 362.],
         [1034., 362.],
         [1034., 411.],
         [935., 411.]],
'''

In [None]:
def load_annotation(p):
  '''
  load annotation from the text file
  :param p:
  :return:
  '''
  text_polys = []
  text_tags = []
  if not os.path.exists(p):
    return np.array(text_polys, dtype=np.float32)
  with open(p, 'r') as f:
    reader = csv.reader(f)
    for line in reader:
      label = line[-1]
      # strip BOM. \ufeff for python3,  \xef\xbb\bf for python2
      line = [i.strip('\ufeff').strip('\xef\xbb\xbf') for i in line]
      #x1, y1, x2, y2, x3, y3, x4, y4 = list(map(float, line[:8]))
      
      #new dataset
      pos = line[0].split(' ')
      print(pos)
      x1, y1, x2, y2, x3, y3, x4, y4 = list(map(float, pos[:8]))


      text_polys.append([[x1, y1], [x2, y2], [x3, y3], [x4, y4]])
      if label == '*' or label == '###':
        text_tags.append(True)
      else:
        text_tags.append(False)

    return np.array(text_polys, dtype=np.float32), np.array(text_tags, dtype=np.bool)

In [None]:
# load_annotation("/content/EAST-tf2/data/ICDAR2015/train_data/gt_img_1.txt")

In [None]:
load_annotation("/content/EAST-tf2/license_plate_detection_data/annotations/0b86cecf-67d1-4fc0-87c9-b36b0ee228bb.txt")

## Train 데이터 & Test 데이터 나누기
### annotation 파일명기준으로 분리


In [None]:
#cache_dir = '.'
current_dir = '/content/EAST-tf2'
main_dir = os.path.join(current_dir, 'license_plate_detection_data')
training_images_dir = os.path.join(main_dir, 'images')
training_gt_dir = os.path.join(main_dir, 'annotations')

In [None]:
training_gt_dir

In [None]:
#전체파일리스트불러오기
gt_list = glob.glob(os.path.join(training_gt_dir, '*.txt'))
print(gt_list)

In [None]:
# train and validation data split
train, test = sklearn.model_selection.train_test_split(
    gt_list, train_size=0.8, random_state=42
)

In [None]:
train

In [None]:
test

### Training / Test 데이터 폴더 생성

In [None]:
!mkdir train_data

In [None]:
!mkdir test_data

In [None]:
training_images_dir

In [None]:
train_dir = './train_data'

# train txt file copy
for fname in train:
  src = fname
  dst = os.path.join(train_dir, src.split('/')[-1])
  shutil.copyfile(src, dst)

# train image file copy
for fname in train:
  jpg_file_name = fname.split('/')[-1].split('.')[0]+'.jpg'
  src = os.path.join(training_images_dir, jpg_file_name)
  dst = os.path.join(train_dir, jpg_file_name)
  shutil.copyfile(src, dst)

In [None]:
test_dir = './test_data'

# test txt file copy
for fname in test:
  src = fname
  dst = os.path.join(test_dir, src.split('/')[-1])
  shutil.copyfile(src, dst)

# test image file copy
for fname in test:
  jpg_file_name = fname.split('/')[-1].split('.')[0]+'.jpg'
  src = os.path.join(training_images_dir, jpg_file_name)
  dst = os.path.join(test_dir, jpg_file_name)
  shutil.copyfile(src, dst)

##모델학습

In [None]:
!python train.py \
--training_data_path="/content/train_data/" \
--checkpoint_path="/content/EAST-tf2/east_resnet_50_rbox"

[1;30;43m스트리밍 출력 내용이 길어서 마지막 5000줄이 삭제되었습니다.[0m
Step 000244, dice_loss 0.0100, rbox_loss 0.0011, total_loss 0.0111
['545', '243', '612', '243', '612', '276', '545', '276', '"FNY881"']
['552', '172', '642', '172', '642', '217', '552', '217', '"DG2Z6U"']
['76', '306', '189', '306', '189', '362', '76', '362', '"980YTB"']
['35', '463', '107', '463', '107', '499', '35', '499', '"SK5K9V"']
['253', '412', '334', '412', '334', '452', '253', '452', '"FA8M7S"']
['629', '278', '719', '278', '719', '323', '629', '323', '"6WAW786"']
['756', '505', '854', '505', '854', '554', '756', '554', '"PJ2X1V"']
['748', '592', '829', '592', '829', '632', '748', '632', '"TWF220"']
['841', '666', '1068', '666', '1068', '779', '841', '779', '"NEILDMND"']
['935', '362', '1034', '362', '1034', '411', '935', '411', '"YG9X2G"']
Step 000245, dice_loss 0.0100, rbox_loss 0.0010, total_loss 0.0110
['532', '507', '612', '507', '612', '547', '532', '547', '"5DJ0529"']
['285', '124', '366', '124', '366', '164', '285', '16

In [None]:
!python eval.py \
--test_data_path="/content/train_data/" \
--model_path="/content/EAST-tf2/east_resnet_50_rbox" \
--output_dir="/content/test_data_output/"

In [None]:
# !zip -r /content/EAST-tf2/east_resnet_50_rbox.zip /content/EAST-tf2/east_resnet_50_rbox

In [None]:
# from google.colab import files
# files.download("/content/EAST-tf2/east_resnet_50_rbox.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# TODO : 

# Google drive mount

In [55]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [58]:
# %cd "/content/drive/MyDrive/Colab Notebooks/EAST-tf2"

/content/drive/MyDrive/Colab Notebooks/EAST-tf2


In [None]:
# !python train.py \
# --training_data_path="/content/train_data/" \
# --checkpoint_path="/content/drive/MyDrive/Colab Notebooks/EAST-tf2/east_resnet_50_rbox"