Skip to content

Commit

Permalink
- fix bugs import data manipulates
Browse files Browse the repository at this point in the history
  • Loading branch information
maycuatroi committed Jun 15, 2023
1 parent 872f6de commit d17343b
Show file tree
Hide file tree
Showing 11 changed files with 35 additions and 31 deletions.
7 changes: 0 additions & 7 deletions evoflow/controller/data_manipulate/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +0,0 @@
# Copyright (c) 2021. Copyright belongs to evoflow team

from evoflow.Controller.DataManipulate.ExcelFileOperator import ExcelFileOperator
from evoflow.Controller.DataManipulate.PdfFileOperator import PdfFileOperator
from evoflow.Controller.DataManipulate.ImageFileOperator import ImageFileOperator
from evoflow.Controller.DataManipulate.PptxFileOperator import PptxFileOperator
from evoflow.Controller.DataManipulate.CatiaFileOperator import CatiaFileOperator
6 changes: 3 additions & 3 deletions evoflow/controller/data_manipulate/excel_file_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

import pandas as pd

from evoflow.controller.data_manipulate.FileOperator import FileOperator
from evoflow.Entities.DataManipulate.FileOperator.DataFrameFile import DataFrameFile
from evoflow.Entities.DataManipulate.FileOperator.File import File
from evoflow.controller.data_manipulate.file_operator import FileOperator
from evoflow.entities.data_manipulate.file_operator.dataframe_file import DataFrameFile
from evoflow.entities.data_manipulate.file_operator.file import File


class ExcelFileOperator(FileOperator):
Expand Down
6 changes: 2 additions & 4 deletions evoflow/controller/data_manipulate/file_operator.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
# Copyright (c) 2021. Copyright belongs to evoflow team

import abc

from tqdm import tqdm

from evoflow.Controller.DataManipulate.DataManipulate import DataManipulate
from evoflow.Entities.DataManipulate.FileOperator.File import File
from evoflow.controller.data_manipulate.data_manipulate import DataManipulate
from evoflow.entities.data_manipulate.file_operator.file import File


def get_reader(file_type):
Expand Down
2 changes: 1 addition & 1 deletion evoflow/controller/data_manipulate/image_file_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from evoflow import logger
from evoflow.controller.data_manipulate.file_operator import FileOperator
from evoflow.entities.data_manipulate.file_operator.file import File
from evoflow.entities.data_manipulate.file_operator.ImageFile import ImageFile
from evoflow.entities.data_manipulate.file_operator.image_file import ImageFile


def imread(path, flags: int = -1):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import pdfplumber

from evoflow.Controller.DataManipulate.FileOperator import FileOperator
from evoflow.Entities.DataManipulate.FileOperator.File import File
from evoflow.Entities.DataManipulate.FileOperator.PdfFile import PdfFile
from evoflow.controller.data_manipulate.file_operator import FileOperator
from evoflow.entities.data_manipulate.file_operator.file import File
from evoflow.entities.data_manipulate.file_operator.pdf_file import PdfFile


class PdfFileOperator(FileOperator):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from pptx import Presentation

from evoflow.Controller.DataManipulate.FileOperator import FileOperator
from evoflow.Entities.DataManipulate.FileOperator.File import File
from evoflow.Entities.DataManipulate.FileOperator.PPTXFile import PPTXFile
from evoflow.controller.data_manipulate.file_operator import FileOperator
from evoflow.entities.data_manipulate.file_operator.file import File
from evoflow.entities.data_manipulate.file_operator.pptx_file import PPTXFile


class PptxFileOperator(FileOperator):
Expand Down
1 change: 1 addition & 0 deletions evoflow/entities/data_manipulate/file_operator/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
class File(AbstractData):
def __init__(self, file_path: str = None, **args):
self.file_path = file_path
self.file_path = os.path.abspath(self.file_path)

self.file_name = os.path.split(file_path)[1].replace(
"." + self.get_file_type(), ""
Expand Down
11 changes: 8 additions & 3 deletions evoflow/entities/data_manipulate/file_operator/image_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
from PIL import Image, ImageDraw, ImageFont

from evoflow.entities.data_manipulate.file_operator.file import File
from evoflow.entities.Global import Global
from evoflow.Services.OCR.EasyOCREngine import EasyOCREngine
from evoflow.Services.OCR.Result import OCRResult
from evoflow.entities.global_vars import Global
from evoflow.services.ocr.easy_ocr_engine import EasyOCREngine
from evoflow.services.ocr.result import OCRResult


class ImageFile(File):
Expand Down Expand Up @@ -41,6 +41,11 @@ def __init__(self, **kwargs):
def to_pil(self):
return PIL.Image.fromarray(self.data)

def __array__(self):
"""
Convert to numpy array: np.array(image_file)
"""
return self.data
def draw(self, ocr_results):
user_path = f'{os.getenv("userprofile")}/.evoflow/fonts/Noto_Sans_JP/NotoSansJP-Regular.otf'
data_path = "./data/.evoflow/fonts/Noto_Sans_JP/NotoSansJP-Regular.otf"
Expand Down
17 changes: 12 additions & 5 deletions evoflow/entities/data_manipulate/file_operator/pdf_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
import pathlib
import urllib
from typing import Iterator
from urllib.request import URLopener

import cv2
import numpy as np
import pdfplumber
from tqdm import tqdm

import evoflow.Params
import evoflow.params
from evoflow import logger
from evoflow.controller.data_manipulate.image_file_operator import ImageFileOperator
from evoflow.entities.data_manipulate.file_operator.file import File
Expand All @@ -26,11 +28,11 @@ def download_poppler():

poppler_path = f"{os.getenv('userprofile')}/.evoflow/poppler"
pathlib.Path(poppler_path).mkdir(parents=True, exist_ok=True)
poppler_file_name = evoflow.Params.POPPLER_URL.rsplit("/", maxsplit=-1)
opener = urllib.request.URLopener()
poppler_file_name = evoflow.params.POPPLER_URL.rsplit("/", maxsplit=-1)[-1]
opener = URLopener()
opener.addheader("User-Agent", "evoflow")
filename, _ = opener.retrieve(
evoflow.Params.POPPLER_URL, f"{poppler_path}/{poppler_file_name}"
evoflow.params.POPPLER_URL, f"{poppler_path}/{poppler_file_name}"
)
Archive(filename).extractall(poppler_path)
os.remove(filename)
Expand All @@ -41,6 +43,11 @@ def download_poppler():
class PdfFile(File):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.data = pdfplumber.open(self.file_path)

@property
def pages(self):
return self.data.pages

def get_texts(self):
page_count = len(self.data.pages)
Expand Down Expand Up @@ -76,7 +83,7 @@ def to_images(self, dpi=500) -> Iterator[ImageFile]:
) from value_error
else:
poppler_path = poppler_paths[0]

poppler_path = os.path.abspath(poppler_path)
pages = convert_from_path(self.file_path, dpi, poppler_path=poppler_path)
images = []
for i, page in tqdm(
Expand Down
2 changes: 1 addition & 1 deletion evoflow/params.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
POPPLER_URL = (
"http://blog.alivate.com.au/wp-content/uploads/2018/10/poppler-0.68.0_x86.7z"
"https://blog.alivate.com.au/wp-content/uploads/2018/10/poppler-0.68.0_x86.7z"
)
UPX_URL = "https://github.com/upx/upx/releases/download/v3.96/upx-3.96-win64.zip"
2 changes: 1 addition & 1 deletion evoflow/services/ocr/easy_ocr_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import numpy as np

from evoflow import logger
from evoflow.Services.ocr.result import OCRResult
from evoflow.services.ocr.result import OCRResult

try:
import easyocr
Expand Down

0 comments on commit d17343b

Please sign in to comment.