Merge remote-tracking branch 'origin/dev'

linyuan0213 · Apr 20, 2024 · 5c4a966 · 5c4a966
2 parents ae3fcaa + 194a9a3
commit 5c4a966
Show file tree

Hide file tree

Showing 7 changed files with 249 additions and 75 deletions.
diff --git a/README.md b/README.md
@@ -31,6 +31,58 @@
 
      馒头模拟登录需要添加 cookie，通过cookie访问接口，不排除禁用账户的可能
 
+### 插件
+
+- 自定义识别词
+
+  	tmdb id获取：[tmdb](https://www.themoviedb.org/?language=zh-CN) 网站搜索关键词，打开相关电影复制url对应数字id， 如 https://www.themoviedb.org/movie/693134-dune-part-two?language=zh-CN tmdb id 为693134
+
+
+  - 通用识别词维护：
+
+    	编辑 [通用识别词](https://pad.xcreal.cc/p/通用识别词) 添加关键词
+
+    	格式如下：
+
+    		屏蔽：被替换词
+
+    		替换：被替换词@@替换词
+
+    		替换+集偏移：被替换词@@替换词@@前定位词@@后定位词@@集偏移
+
+    		集偏移：前定位词@@后定位词@@集偏移
+
+  - 电影识别词维护：
+
+    	编辑 [电影识别词](https://pad.xcreal.cc/p/电影识别词) 添加关键词
+
+    	格式如下：
+
+    		屏蔽：tmdb id@@被替换词
+
+    		替换：tmdb id@@被替换词@@替换词
+
+    		替换+集偏移：tmdb id@@被替换词@@替换词@@前定位词@@后定位词@@集偏移
+
+    		集偏移：tmdb id@@前定位词@@后定位词@@集偏移
+
+  - 电视识别词维护：
+
+    	编辑 [电视识别词](https://pad.xcreal.cc/p/电视识别词) 添加关键词
+
+       格式同电影识别词
+
+  - 动漫识别词维护：
+
+    	编辑 [动漫识别词](https://pad.xcreal.cc/p/动漫识别词) 添加关键词
+
+       格式同电影识别词
+
+
+
+    **如果有好用的识别词，请共同维护**
+
+
 ### 开启公开站点
 
 在 config.yaml 的 laboratory 添加 ```show_more_sites: true```

diff --git a/app/helper/ocr_helper.py b/app/helper/ocr_helper.py
@@ -1,11 +1,83 @@
-import base64
+import ddddocr
+import cv2
+import numpy as np
+from PIL import Image
+import log
 
-from app.utils import RequestUtils
+from app.utils import RequestUtils, StringUtils
 
 
 class OcrHelper:
 
-    _ocr_b64_url = "https://nastool.cn/captcha/base64"
+    @staticmethod
+    def around_white(img):
+        """
+        四周置白色
+        """
+        w, h = img.shape
+        for _w in range(w):
+            for _h in range(h):
+                if (_w <= 5) or (_h <= 5) or (_w >= w-5) or (_h >= h-5):
+                    img.itemset((_w, _h), 255)
+        return img
+
+    @staticmethod
+    def noise_unsome_piexl(img):
+        '''
+        邻域非同色降噪
+        查找像素点上下左右相邻点的颜色，如果是非白色的非像素点颜色，则填充为白色
+        '''
+        w, h = img.shape
+        for _w in range(w):
+            for _h in range(h):
+                if _h != 0 and _w != 0 and _w < w - 1 and _h < h - 1:# 剔除顶点、底点
+                    center_color = img[_w, _h] # 当前坐标颜色
+                    top_color = img[_w, _h + 1]
+                    bottom_color = img[_w, _h - 1]
+                    left_color = img[_w - 1, _h]
+                    right_color = img[_w + 1, _h]
+                    cnt = 0
+                    if top_color.all() == center_color.all():
+                        cnt += 1
+                    if bottom_color.all() == center_color.all():
+                        cnt += 1
+                    if left_color.all() == center_color.all():
+                        cnt += 1
+                    if right_color.all() == center_color.all():
+                        cnt += 1
+                    if cnt < 1:
+                        img.itemset((_w, _h), 255)
+        return img
+
+    def image_pre_process(self, image):
+        """
+        图片预处理
+        """
+        gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        _, binary_image = cv2.threshold(gray_image, 127, 255, cv2.THRESH_BINARY)
+        denoised_image = cv2.fastNlMeansDenoising(binary_image, h=30, templateWindowSize=11, searchWindowSize=21)
+        noise_unsome = OcrHelper.noise_unsome_piexl(denoised_image)
+        op_image = OcrHelper.around_white(noise_unsome)
+
+        return op_image
+
+    def recognize_captcha(self, image_content):
+        """
+        识别验证码
+        """
+        res = ""
+        try:
+            ocr = ddddocr.DdddOcr(show_ad=False)
+            image = np.asarray(bytearray(image_content), dtype="uint8")
+            image = cv2.imdecode(image, cv2.IMREAD_COLOR)
+            op_image = self.image_pre_process(image)
+            pil_image = Image.fromarray(cv2.cvtColor(op_image, cv2.COLOR_BGR2RGB))
+            res = ocr.classification(pil_image)
+            res = StringUtils.replace_strings(res, {'之': '2', '>': '7'})
+            return res.upper()
+        except Exception as e:
+            log.error(f"{str(e)}：{res}")
+            return res
 
     def get_captcha_text(self, image_url=None, image_b64=None, cookie=None, ua=None):
         """
@@ -22,12 +94,5 @@ def get_captcha_text(self, image_url=None, image_b64=None, cookie=None, ua=None)
                 image_bin = ret.content
                 if not image_bin:
                     return ""
-                image_b64 = base64.b64encode(image_bin).decode()
-        if not image_b64:
-            return ""
-        ret = RequestUtils(content_type="application/json").post_res(
-            url=self._ocr_b64_url,
-            json={"base64_img": image_b64})
-        if ret:
-            return ret.json().get("result")
+                return self.recognize_captcha(image_bin)
         return ""