This commit is mainly to update the new neural network model, and a s…

…eries of reconstruction based on this. I made a lot of changes, temporarily using new branches and merging them after stabilization.
kerlomz · Oct 16, 2018 · f8ad8e2 · f8ad8e2
1 parent 20968c3
commit f8ad8e2
Show file tree

Hide file tree

Showing 15 changed files with 593 additions and 1,658 deletions.
diff --git a/README.md b/README.md
@@ -1,3 +1,7 @@
+# Project Introduction
+This project is based on CNN+LSTM+CTC to realize verification code identification. 
+This project is only for training the model, If you need to deploy the model, please move to https://github.com/kerlomz/captcha_platform
+
 # Attention
 1. Choose your version:
     This project uses GPU for training by default.
@@ -30,8 +34,7 @@
     # TrainRegex and TestRegex: Default matching apple_20181010121212.jpg file.
     # TrainsPath and TestPath: The local path of your training and testing set.
     System:
-      NeuralNet: 'CNNNet'
-      Device: 'gpu:0'
+      NeuralNet: 'CNN+LSTM+CTC'
       DeviceUsage: 0.7
       TrainsPath: 'E:\Task\Trains\YourModelName'
       TrainRegex: '.*?(?=_.*\.)'
@@ -68,66 +71,39 @@
 
 1. model.yaml  - Model Config
     ```yaml
-    # Convolution: The number of layers is at least 3.
-    # - The number below corresponds to the size of each layer of convolution.
-    # Provide flexible neural network construction,
-    # Adjust the neural network structure that suits you best
-    # [Convolution, Pool, Optimization: {Dropout}]
-    CNNNet:
-      Layer:
-        - Convolution: 32
-        - Pool: [1, 2, 2, 1]
-        - Optimization: Dropout
-        - Convolution: 64
-        - Pool: [1, 2, 2, 1]
-        - Optimization: Dropout
-        - Convolution: 64
-        - Pool: [1, 2, 2, 1]
-        - Optimization: Dropout
-      ConvCoreSize: 3
-      FullConnect: 1024
-    
     # ModelName: Corresponding to the model file in the model directory,
     # - such as YourModelName.pb, fill in YourModelName here.
     # CharSet: Provides a default optional built-in solution:
     # - [ALPHANUMERIC, ALPHANUMERIC_LOWER, ALPHANUMERIC_UPPER,
     # -- NUMERIC, ALPHABET_LOWER, ALPHABET_UPPER, ALPHABET]
     # - Or you can use your own customized character set like: ['a', '1', '2'].
-    # ImageChannel: [1 - Gray Scale, 3 - RGB].
-    # CharLength: Captcha Length.
     # CharExclude: CharExclude should be a list, like: ['a', '1', '2']
     # - which is convenient for users to freely combine character sets.
     # - If you don't want to manually define the character set manually,
     # - you can choose a built-in character set
     # - and set the characters to be excluded by CharExclude parameter.
     Model:
       ModelName: YourModelName
-      ImageChannel: 1
-      CharLength: 4
       CharSet: ALPHANUMERIC_LOWER
       CharExclude: []
+      CharReplace: {}
+      ImageWidth: 150
+      ImageHeight: 50
     
-    # Magnification: [ x2 -> from size(50, 50) to size(100,100)].
-    # OriginalColor: [false - Gray Scale, true - RGB].
     # Binaryzation: [-1: Off, >0 and < 255: On].
     # Smoothing: [-1: Off, >0: On].
     # Blur: [-1: Off, >0: On].
-    # Resize: [WIDTH, HEIGHT].
     Pretreatment:
-      Magnification: 0
-      OriginalColor: false
-      Binaryzation: 240
-      Smoothing: 3
-      Invert: false
-      Blur: 5
-    #  Resize: [160, 60]
+      Binaryzation: -1
+      Smoothing: -1
+      Blur: -1
     ```
 # Tools
 1. Pretreatment Previewer
     ```python -m tools.preview```
 2. Navigator (Currently only supports character set recommendations)
     ```python -m tools.navigator```
-3. Quantize
+3. Quantize(Deleted)
     ```python -m tools.quantize --input=***.pb --output=***.pb```
 4. PyInstaller Package
     ```

diff --git a/character.py b/character.py
@@ -2,12 +2,14 @@
 # -*- coding:utf-8 -*-
 # Author: kerlomz <kerlomz@gmail.com>
 
-
+SPACE = [' ']
 NUMBER = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
 ALPHA_UPPER = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U',
                'V', 'W', 'X', 'Y', 'Z']
 ALPHA_LOWER = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u',
                'v', 'w', 'x', 'y', 'z']
+OPERATOR = ['(', ')', 'P', 'S', 'M', 'D']
+FLOAT = ['.']
 
 SIMPLE_CHAR_SET = dict(
     NUMERIC=NUMBER,
@@ -16,5 +18,7 @@
     ALPHANUMERIC_UPPER=NUMBER + ALPHA_UPPER,
     ALPHABET_LOWER=ALPHA_LOWER,
     ALPHABET_UPPER=ALPHA_UPPER,
-    ALPHABET=ALPHA_LOWER+ALPHA_UPPER
+    ALPHABET=ALPHA_LOWER + ALPHA_UPPER,
+    OPERATION=NUMBER + OPERATOR,
+    FLOAT=NUMBER + FLOAT
 )
diff --git a/config.py b/config.py
@@ -4,6 +4,7 @@
 
 import os
 import re
+import cv2
 import yaml
 import random
 import platform
@@ -17,6 +18,13 @@
 # os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 PROJECT_PATH = "."
 
+
+class RunMode(object):
+    Test = 'test'
+    Trains = 'trains'
+    Predict = 'predict'
+
+
 PLATFORM = platform.system()
 
 SYS_CONFIG_DEMO_NAME = 'config_demo.yaml'
@@ -53,68 +61,35 @@ def char_set(_type):
     )
 
 
-def parse_neural_structure(_net):
-    layer = ""
-    layer_structure = []
-    layer_num = 1
-    pre_input = 1
-    for i in _net:
-        key = list(i.keys())[0]
-        val = list(i.values())[0]
-        conv = {"index": layer_num, "input": pre_input, "output": val, "extra": []}
-        if key == 'Convolution':
-            layer += "\n - {} Layer: {} Layer-[{} * {}]".format(layer_num, key, val, val)
-            layer_structure.append(conv)
-            pre_input = val
-            layer_num += 1
-        if key == 'Pool':
-            layer += ", {} Layer-{}".format(key, val)
-            layer_structure[layer_num - 2]['extra'].append({"name": "pool", "window": val})
-        if key == 'Optimization':
-            layer += ", {} Layer".format(val)
-            layer_structure[layer_num - 2]['extra'].append({"name": "dropout"})
-    return layer[1:], layer_structure
-
-
-def fetch_file_list(path):
-    file_list = os.listdir(path)
-    if len(file_list) < 200:
-        exception("Insufficient Sample!", ConfigException.INSUFFICIENT_SAMPLE)
-    group = [os.path.join(path, image_file) for image_file in file_list]
-    random.shuffle(group)
-    return group
-
-
 TARGET_MODEL = cf_model['Model'].get('ModelName')
 
 CHAR_SET = cf_model['Model'].get('CharSet')
 CHAR_EXCLUDE = cf_model['Model'].get('CharExclude')
-
 GEN_CHAR_SET = [i for i in char_set(CHAR_SET) if i not in CHAR_EXCLUDE]
-
+CHAR_REPLACE = cf_model['Model'].get('CharReplace')
+CHAR_REPLACE = CHAR_REPLACE if CHAR_REPLACE else {}
 CHAR_SET_LEN = len(GEN_CHAR_SET)
 
 NEU_NAME = cf_system['System'].get('NeuralNet')
-
-CONV_NEU_LAYER = cf_model.get('CNNNet').get('Layer')
-CONV_NEU_LAYER_DESC, CONV_NEU_STRUCTURE = parse_neural_structure(CONV_NEU_LAYER)
-
-FULL_LAYER_FEATURE_NUM = cf_model['CNNNet'].get('FullConnect')
-CONV_CORE_SIZE = cf_model.get('CNNNet').get('ConvCoreSize')
-
-NEU_LAYER_NUM = len(CONV_NEU_STRUCTURE)
-MAX_POOL_NUM = len([i for i in CONV_NEU_LAYER if list(i.keys())[0] == 'Pool'])
-
-CONV_STRIDES = [1, 1, 1, 1]
-POOL_STRIDES = [1, 2, 2, 1]
-PADDING = 'SAME'
+NEU_NAME = NEU_NAME if NEU_NAME else 'CNN+LSTM+CTC'
+OUT_CHANNEL = 64
+FILTERS = [1, 64, 128, 128]
+FILTERS += [OUT_CHANNEL]
+CNN_LAYER_NUM = 4
+LEAKINESS = 0.01
+NUM_HIDDEN = 128
+OUTPUT_KEEP_PROB = 0.8
+DECAY_RATE = 0.98
+DECAY_STEPS = 10000
+NUM_CLASSES = CHAR_SET_LEN + 2
+BATE1 = 0.9
+BATE2 = 0.999
 
 MODEL_TAG = '{}.model'.format(TARGET_MODEL)
 CHECKPOINT_TAG = 'checkpoint'
 SAVE_MODEL = os.path.join(MODEL_PATH, MODEL_TAG)
 SAVE_CHECKPOINT = os.path.join(MODEL_PATH, CHECKPOINT_TAG)
 
-DEVICE = cf_system['System'].get('Device')
 GPU_USAGE = cf_system['System'].get('DeviceUsage')
 
 TEST_PATH = cf_system['System'].get('TestPath')
@@ -125,50 +100,30 @@ def fetch_file_list(path):
 TRAINS_REGEX = cf_system['System'].get('TrainRegex')
 TRAINS_REGEX = TRAINS_REGEX if TRAINS_REGEX else ".*?(?=_.*\.)"
 
-TRAINS_SAVE_STEP = cf_system['Trains'].get('SavedStep')
-COMPILE_ACC = cf_system['Trains'].get('CompileAcc')
+TRAINS_SAVE_STEPS = cf_system['Trains'].get('SavedSteps')
+TRAINS_VALIDATION_STEPS = cf_system['Trains'].get('ValidationSteps')
 TRAINS_END_ACC = cf_system['Trains'].get('EndAcc')
-TRAINS_END_STEP = cf_system['Trains'].get('EndStep')
+TRAINS_END_EPOCHS = cf_system['Trains'].get('EndEpochs')
 TRAINS_LEARNING_RATE = cf_system['Trains'].get('LearningRate')
-TRAINS_TEST_NUM = cf_system['Trains'].get('TestNum')
-
-_TEST_GROUP = fetch_file_list(TEST_PATH)
-_TRAIN_GROUP = fetch_file_list(TRAINS_PATH)
+BATCH_SIZE = cf_system['Trains'].get('BatchSize')
 
-IMAGE_CHANNEL = cf_model['Model'].get('ImageChannel')
+IMAGE_HEIGHT = cf_model['Model'].get('ImageHeight')
+IMAGE_WIDTH = cf_model['Model'].get('ImageWidth')
 
-MAGNIFICATION = cf_model['Pretreatment'].get('Magnification')
-MAGNIFICATION = MAGNIFICATION if MAGNIFICATION and MAGNIFICATION > 0 and isinstance(MAGNIFICATION, int) else 1
-IMAGE_ORIGINAL_COLOR = cf_model['Pretreatment'].get('OriginalColor')
 BINARYZATION = cf_model['Pretreatment'].get('Binaryzation')
-INVERT = cf_model['Pretreatment'].get('Invert')
 SMOOTH = cf_model['Pretreatment'].get('Smoothing')
 BLUR = cf_model['Pretreatment'].get('Blur')
-RESIZE = cf_model['Pretreatment'].get('Resize')
-RESIZE = tuple(RESIZE) if RESIZE else None
 
-_IMAGE_PATH = _TEST_GROUP[random.randint(0, len(_TEST_GROUP) - 1)]
-_TEST_IMAGE_SIZE = pilImage.open(_IMAGE_PATH).size
-_TRAIN_IMAGE_SIZE = pilImage.open(_TRAIN_GROUP[0]).size
 
-TEST_SAMPLE_LABEL = re.search(TEST_REGEX, _IMAGE_PATH.split(PATH_SPLIT)[-1]).group()
-
-MAX_CAPTCHA_LEN = cf_model['Model'].get('CharLength')
-MAX_CAPTCHA_LEN = MAX_CAPTCHA_LEN if MAX_CAPTCHA_LEN else len(TEST_SAMPLE_LABEL)
-IMAGE_WIDTH = RESIZE[0] if RESIZE else _TEST_IMAGE_SIZE[0] * MAGNIFICATION
-IMAGE_HEIGHT = RESIZE[1] if RESIZE else _TEST_IMAGE_SIZE[1] * MAGNIFICATION
-
-
-def checkpoint(_name, _path):
+def _checkpoint(_name, _path):
     file_list = os.listdir(_path)
-    _checkpoint = ['"{}"'.format(i.split(".meta")[0]) for i in file_list if i.startswith(_name) and i.endswith('.meta')]
+    checkpoint = ['"{}"'.format(i.split(".meta")[0]) for i in file_list if i.startswith(_name) and i.endswith('.meta')]
     if not _checkpoint:
         return None
-    _checkpoint_step = [int(re.search('(?<=model-).*?(?=")', i).group()) for i in _checkpoint]
+    _checkpoint_step = [int(re.search('(?<=model-).*?(?=")', i).group()) for i in checkpoint]
     return _checkpoint[_checkpoint_step.index(max(_checkpoint_step))]
 
 
-# COMPILE_TRAINS_PATH = os.path.join(MODEL_PATH, '{}.tfrecords'.format(TARGET_MODEL))
 COMPILE_MODEL_PATH = os.path.join(MODEL_PATH, '{}.pb'.format(TARGET_MODEL))
 TF_LITE_MODEL_PATH = os.path.join(MODEL_PATH, "{}.tflite".format(TARGET_MODEL))
 QUANTIZED_MODEL_PATH = os.path.join(MODEL_PATH, 'quantized_{}.pb'.format(TARGET_MODEL))
@@ -207,13 +162,10 @@ def init():
             ConfigException.CHAR_SET_NOT_EXIST
         )
 
-    if _TEST_IMAGE_SIZE != _TRAIN_IMAGE_SIZE and not RESIZE:
-        exception("The image size of the test set must match the training set")
-
-    MODEL_FILE = checkpoint(TARGET_MODEL, MODEL_PATH)
-    CHECKPOINT = 'model_checkpoint_path: {}\nall_model_checkpoint_paths: {}'.format(MODEL_FILE, MODEL_FILE)
+    model_file = _checkpoint(TARGET_MODEL, MODEL_PATH)
+    checkpoint = 'model_checkpoint_path: {}\nall_model_checkpoint_paths: {}'.format(model_file, model_file)
     with open(SAVE_CHECKPOINT, 'w') as f:
-        f.write(CHECKPOINT)
+        f.write(checkpoint)
 
 
 if '../' not in SYS_CONFIG_PATH:
@@ -225,15 +177,10 @@ def init():
     print('MODEL_PATH:', SAVE_MODEL)
     print('COMPILE_MODEL_PATH:', COMPILE_MODEL_PATH)
     print('CHAR_SET_LEN:', CHAR_SET_LEN)
-    print('IMAGE_WIDTH: {}, IMAGE_HEIGHT: {}{}'.format(
-        IMAGE_WIDTH, IMAGE_HEIGHT, ", MAGNIFICATION: {}".format(
-            MAGNIFICATION) if MAGNIFICATION and not RESIZE else "")
+    print('CHAR_REPLACE: {}'.format(CHAR_REPLACE))
+    print('IMAGE_WIDTH: {}, IMAGE_HEIGHT: {}'.format(
+        IMAGE_WIDTH, IMAGE_HEIGHT)
     )
-    print('IMAGE_ORIGINAL_COLOR: {}'.format(IMAGE_ORIGINAL_COLOR))
-    print("MAX_CAPTCHA_LEN", MAX_CAPTCHA_LEN)
     print('NEURAL NETWORK: {}'.format(NEU_NAME))
-    print('{} LAYER CONV: \n{}\n - Full Connect Layer: {}'.format(
-        NEU_LAYER_NUM, CONV_NEU_LAYER_DESC, FULL_LAYER_FEATURE_NUM
-    ))
 
     print('---------------------------------------------------------------------------------')
diff --git a/config_demo.yaml b/config_demo.yaml
@@ -4,28 +4,28 @@
 # TrainRegex and TestRegex: Default matching apple_20181010121212.jpg file.
 # TrainsPath and TestPath: The local absolute path of your training and testing set.
 System:
-  NeuralNet: 'CNNNet'
-  Device: 'gpu:0'
+  NeuralNet: 'CNN+LSTM+CTC'
   DeviceUsage: 0.7
   TrainsPath: 'E:\Task\Trains\YourModelName\'
   TrainRegex: '.*?(?=_.*\.)'
   TestPath: 'E:\Task\TestGroup\YourModelName\'
   TestRegex: '.*?(?=_.*\.)'
 
-# SavedStep: A Session.run() execution is called a Step,
+# SavedSteps: A Session.run() execution is called a Epochs,
 # - Used to save training progress, Default value is 100.
+# ValidationSteps: Used to calculate accuracy, Default value is 100.
 # TestNum: The number of samples for each test batch.
 # - A test for every saved steps.
-# CompileAcc: When the accuracy reaches the set threshold,
-# - the model will be compiled together each time it is archived.
-# - Available for specific usage scenarios.
 # EndAcc: Finish the training when the accuracy reaches [EndAcc*100]%.
-# EndStep: Finish the training when the step is greater than the [-1: Off, EndStep >0: On] step.
+# EndEpochs: Finish the training when the epoch reaches [epoch].
+# BatchSize: Number of inputs per batch.
 # LearningRate: Find the fastest relationship between the loss decline and the learning rate.
 Trains:
-  SavedStep: 100
-  TestNum: 500
-  CompileAcc: 0.9
+  SavedSteps: 100
+  ValidationSteps: 100
   EndAcc: 0.95
-  EndStep: -1
+  EndEpochs: 1
+  BatchSize: 40
   LearningRate: 0.001
+
+