In [1]:
import os

In [2]:
%pwd

'e:\\GCET\\Machine Learning\\Deep Learning Projects\\text_recognition\\research'

In [3]:
os.chdir('../')

In [4]:
%pwd

'e:\\GCET\\Machine Learning\\Deep Learning Projects\\text_recognition'

# Demo Image import

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DemoImageConfig:
    image_url: str
    image_dir: Path

In [7]:
from mlOCR.constants import *
from mlOCR.utils.common import read_yaml, create_directories
#from mlOCR.entity.config_entity import ImageProcessingConfig,DemoImageConfig

class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_demo_image_config(self)->DemoImageConfig:
        config=self.config.demo_image

        create_directories([config.image_dir])

        demo_image_config=DemoImageConfig(
            image_url=config.image_url,
            image_dir=config.image_dir
        )

        return demo_image_config

In [8]:
import requests
import os
from mlOCR import logger

class DemoImage:
    def __init__(self,DemoImageConfig):
        self.url=DemoImageConfig.image_url
        self.image_dir=DemoImageConfig.image_dir

    def download_image(self):
        filename='demo.jpg'
        if not os.path.exists(os.path.join(self.image_dir, filename)):
            response=requests.get(self.url)

            if response.status_code == 200:
                with open(os.path.join(self.image_dir, filename), "wb") as file:
                    file.write(response.content)
                logger.info(f'{filename} downloaded successfully!')
            else:
                logger.info(f'Error downloading {filename}!')
        else:
            logger.info(f'{filename} already exists!')
            


In [8]:
try:
    config=ConfigurationManager()
    demo_image_config=config.get_demo_image_config()
    demo_image=DemoImage(demo_image_config)
    demo_image.download_image()
except Exception as e:
    logger.exception(e)
    raise e

[2025-03-03 15:58:21,327: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-03-03 15:58:21,329: INFO: common: yaml file: params.yaml loaded successfully]
[2025-03-03 15:58:21,330: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-03-03 15:58:21,331: INFO: common: created directory at: artifacts]
[2025-03-03 15:58:21,332: INFO: common: created directory at: artifacts/image/input]
[2025-03-03 15:58:21,332: INFO: 3457974689: demo.jpg already exists!]


# Image Processing

In [9]:
#entity
@dataclass(frozen=True)
class ImageProcessingConfig:
    image_path: Path
    result_path: Path
    norm_mean: tuple
    norm_variance: tuple
    canvas_size: int
    mag_ratio:float

In [10]:
#config
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_image_processing_config(self)->ImageProcessingConfig:
        config=self.config.image_processing
        params=self.params.craft_image_processing

        create_directories([config.result_path])

        image_processing_config=ImageProcessingConfig(
            image_path=Path(config.image_path),
            result_path=Path(config.result_path),
            norm_mean=tuple(params.norm_mean.translate(str.maketrans('','','()')).split(',')),
            norm_variance=tuple(params.norm_variance.translate(str.maketrans('','','()')).split(',')),
            canvas_size=int(params.canvas_size),
            mag_ratio=float(params.mag_ratio)
        )

        return image_processing_config

In [11]:
#component

import numpy as np
from skimage import io
import cv2

class ImageProcessing:
    def __init__(self,ImageProcessingConfig):
        self.image_path=ImageProcessingConfig.image_path
        self.result_path=ImageProcessingConfig.result_path
        self.canvas_size=ImageProcessingConfig.canvas_size
        self.mag_ratio=ImageProcessingConfig.mag_ratio
        self.norm_mean=ImageProcessingConfig.norm_mean
        self.norm_variance=ImageProcessingConfig.norm_variance

        self.norm_mean=np.array([self.norm_mean[0],self.norm_mean[1],self.norm_mean[2]],dtype=np.float32)
        self.norm_variance=np.array([self.norm_variance[0],self.norm_variance[1],self.norm_variance[2]],dtype=np.float32)

        
    def loadImage(self):
        img = io.imread(self.image_path)           # RGB order
        if img.shape[0] == 2: img = img[0]
        if len(img.shape) == 2 : img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
        if img.shape[2] == 4:   img = img[:,:,:3]
        img = np.array(img)

        return img
    
    def normalizeMeanVariance(self,in_img):
        # should be RGB order
        img = in_img.copy().astype(np.float32)

        norm_mean_array = self.norm_mean * 255.0
        norm_variance_array = self.norm_variance * 255.0

        img -= norm_mean_array
        img /= norm_variance_array
        return img

    def denormalizeMeanVariance(self,in_img):
        # should be RGB order
        img = in_img.copy()
        img *= self.norm_variance
        img += self.norm_mean
        img *= 255.0
        img = np.clip(img, 0, 255).astype(np.uint8)
        return img
    
    def resize_aspect_ratio(self,img, interpolation=cv2.INTER_LINEAR):
        mag_ratio=self.mag_ratio
        square_size=self.canvas_size

        height, width, channel = img.shape

        # magnify image size
        target_size = mag_ratio * max(height, width)

        # set original image size
        if target_size > square_size:
            target_size = square_size
        
        ratio = target_size / max(height, width)    

        target_h, target_w = int(height * ratio), int(width * ratio)
        proc = cv2.resize(img, (target_w, target_h), interpolation = interpolation)


        # make canvas and paste image
        target_h32, target_w32 = target_h, target_w
        if target_h % 32 != 0:
            target_h32 = target_h + (32 - target_h % 32)
        if target_w % 32 != 0:
            target_w32 = target_w + (32 - target_w % 32)
        resized = np.zeros((target_h32, target_w32, channel), dtype=np.float32)
        resized[0:target_h, 0:target_w, :] = proc
        target_h, target_w = target_h32, target_w32

        size_heatmap = (int(target_w/2), int(target_h/2))

        return resized, ratio, size_heatmap
    

In [12]:
# pipeline
from mlOCR.utils.common import save_image,save_bin

try:
    config=ConfigurationManager()
    image_processing_config=config.get_image_processing_config()
    image_processing=ImageProcessing(image_processing_config)
    image=image_processing.loadImage()
    save_image(image,'demo_transformed.jpg',image_processing.result_path)
    resized_image,ratio,size_heatmap=image_processing.resize_aspect_ratio(image)
    normalized_image=image_processing.normalizeMeanVariance(resized_image)
    save_image(normalized_image,'demo_normalized.jpg',image_processing.result_path)
    resized_data={'ratio':ratio,'size_heatmap':size_heatmap}
    save_bin(resized_data,'resized_data.pkl',image_processing.result_path)
except Exception as e:
    logger.exception(e)
    raise e

[2025-03-04 12:40:38,680: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-03-04 12:40:38,709: INFO: common: yaml file: params.yaml loaded successfully]
[2025-03-04 12:40:38,713: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-03-04 12:40:38,717: INFO: common: created directory at: artifacts]
[2025-03-04 12:40:38,721: INFO: common: created directory at: artifacts/image/result]
[2025-03-04 12:40:38,782: INFO: common: Image saved at: artifacts\image\result]
[2025-03-04 12:40:38,815: INFO: common: Image saved at: artifacts\image\result]
[2025-03-04 12:40:38,816: INFO: common: binary file resized_data.pkl saved at: artifacts\image\result]


In [6]:
from mlOCR.pipeline.stage_00_demo_image import DemoImagePipeline
from mlOCR.pipeline.stage_01_image_processing import ImageProcessingPipeline

obj1=DemoImagePipeline()
obj1.main()
obj2=ImageProcessingPipeline()
obj2.main()

[2025-03-03 16:22:12,910: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-03-03 16:22:12,913: INFO: common: yaml file: params.yaml loaded successfully]
[2025-03-03 16:22:12,916: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-03-03 16:22:12,917: INFO: common: created directory at: artifacts]
[2025-03-03 16:22:12,919: INFO: common: created directory at: artifacts/image/input]
[2025-03-03 16:22:13,168: INFO: Demo_Image: demo.jpg downloaded successfully!]
[2025-03-03 16:22:13,171: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-03-03 16:22:13,173: INFO: common: yaml file: params.yaml loaded successfully]
[2025-03-03 16:22:13,174: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-03-03 16:22:13,175: INFO: common: created directory at: artifacts]
[2025-03-03 16:22:13,176: INFO: common: created directory at: artifacts/image/result]
[2025-03-03 16:22:13,219: INFO: common: Image saved at: artifacts\image\result]
[2025-