In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


# **Bibliotecas**

In [None]:
if 1:
    !pip install numpy
    !pip install opencv-python
    !pip install matplotlib
    !pip install scikit-image
    !pip install pillow
    !pip install pytesseract
    !pip install language-tool-python
    !sudo apt install tesseract-ocr
    !sudo apt install libtesseract-dev
    !sudo apt install tesseract-ocr-por


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
tesseract-ocr is already the newest version (4.1.1-2.1build1).
0 upgraded, 0 newly installed, 0 to remove and 30 not upgraded.
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
libtesseract-dev is already the newest version (4.1.1-2.1build1).
0 upgraded, 0 newly installed, 0 to remove and 30 not upgraded.
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
tesseract-ocr-por is already the newest version (1:4.00~git30-7274cfa-1.1).
0 upgraded, 0 newly installed, 0 to remove and 30 not upgraded.


In [None]:
# download morph.py from drive
!pip install -U --no-cache-dir gdown --pre
!wget https://raw.githubusercontent.com/fzampirolli/morph/main/morph.py

--2025-04-09 22:26:26--  https://raw.githubusercontent.com/fzampirolli/morph/main/morph.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 42991 (42K) [text/plain]
Saving to: ‘morph.py’


2025-04-09 22:26:26 (3.51 MB/s) - ‘morph.py’ saved [42991/42991]



In [None]:
import numpy as np
import cv2
from google.colab.patches import cv2_imshow #  display de imagem
from skimage import io
from PIL import Image
import matplotlib.pylab as plt
from morph import *
import pytesseract

pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'
custom_config = r'--oem 3 --psm 6'

# **Captura de Imagem da Camera usando uma webcam**

In [None]:
from IPython.display import display, Javascript
from google.colab.output import eval_js
from base64 import b64decode

In [None]:
def take_photo(filename, quality):
  js = Javascript('''
    async function takePhoto(quality) {
      const div = document.createElement('div');
      const capture = document.createElement('button');
      capture.textContent = 'Capture';
      div.appendChild(capture);

      const video = document.createElement('video');
      video.style.display = 'block';
      const stream = await navigator.mediaDevices.getUserMedia({video: true});

      document.body.appendChild(div);
      div.appendChild(video);
      video.srcObject = stream;
      await video.play();


      // Resize the output to fit the video element.
      google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);

      // Wait for Capture to be clicked.
      await new Promise((resolve) => capture.onclick = resolve);

      const canvas = document.createElement('canvas');
      canvas.width = video.videoWidth;
      canvas.height = video.videoHeight;
      canvas.getContext('2d').drawImage(video, 0, 0);
      stream.getVideoTracks()[0].stop();
      div.remove();
      return canvas.toDataURL('image/jpeg', quality);
    }
    ''')
  display(js)
  data = eval_js('takePhoto({})'.format(quality))
  binary = b64decode(data.split(',')[1])
  with open(filename, 'wb') as f:
    f.write(binary)
  return filename

# **Processamento de Imagem**

In [None]:
# 1. Aquisição de imagem (Webcam)
def capture_image(image_path):
  !gdown --id 1SQ0ueCgL_XuEj_0YnbW0epsqPA4j0S0C
  image = cv2.imread(image_path)
  # Converter a imagem para escala cinza
  image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  return image_gray

In [None]:
# 2. Pré processamento
def process_image(image_gray):
  # melhoria de contraste (aqui poded ser útil para imagens com iluminação irregular)
  clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
  contrast = clahe.apply(image_gray)

  # remoção de ruídos
  blurred = cv2.GaussianBlur(contrast, (3, 3), 0)  # Ideal para pré-OCR

  # Limiarização adaptativa com média ponderada gaussiana
  thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)

   # Dilatamento para engrossar caracteres finos
   #aumentar a dilatção
  kernel = np.ones((1,1), np.uint8) #aumentar o kernel ou mudar o kerne
  dilated = cv2.dilate(thresh, kernel, iterations=1)

  return dilated


In [None]:
def segment_text(image_processed):
    # 1. Pré-processamento adicional para segmentação
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    dilated = cv2.dilate(image_processed, kernel, iterations=1)

    # 2. Detecção de contornos (modo hierárquico para blocos aninhados)
    contours, hierarchy = cv2.findContours(dilated, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    # 3. Filtrar contornos por área e proporção (para descartar ruídos)
    min_area = 50
    max_area = 5000
    text_blocks = []
    segmented_img = cv2.cvtColor(image_processed, cv2.COLOR_GRAY2BGR)

    for i, cnt in enumerate(contours):
        area = cv2.contourArea(cnt)
        x, y, w, h = cv2.boundingRect(cnt)
        aspect_ratio = w / h

        # Critérios para ser considerado texto
        if (min_area < area < max_area) and (0.2 < aspect_ratio < 5):
            cv2.rectangle(segmented_img, (x, y), (x+w, y+h), (0, 255, 0), 2)
            text_blocks.append((x, y, w, h))  # Guarda coordenadas dos blocos

    # 4. Ordenar blocos da esquerda para direita, de cima para baixo
    text_blocks = sorted(text_blocks, key=lambda b: (b[1] // 20, b[0]))  # Agrupa por linhas

    return segmented_img, text_blocks

In [None]:
# 6. Pós-processamento (Correção de texto)
# Entrada: Texto reconhecido com possíveis erros.
# Saída: Texto refinado e corrigido em formato digital.

import language_tool_python
import re

def corrigir_texto(texto_ocr):
    tool = language_tool_python.LanguageTool('pt-BR')

    # Ajuste de formatação simples
    texto_formatado = texto_ocr.replace('\n', ' ')
    texto_formatado = re.sub(r'\s+', ' ', texto_formatado)

    # Correção ortográfica e gramatical
    texto_corrigido = tool.correct(texto_formatado)

    return texto_corrigido

# **MAIN**

In [None]:
# 7. Salvar no formato escolhido (txt, pdf, docx...)

if __name__ == "__main__":
  from IPython.display import Image
  import os

  image_path = "/content/drive/MyDrive/Colab Notebooks/"
  filename=os.path.join(image_path,'photo1.jpg')
  quality=0.8

  try:
    take_photo(filename, quality)

    print('Imagem gravada em {}'.format(filename))

    # Show the image which was just taken.
    display(Image(filename))

    image_gray = capture_image(filename)
    image_processed = process_image(image_gray)

    # Mostrar imagem processada
    plt.figure(figsize=(8, 6))
    plt.imshow(image_processed, cmap='gray')
    plt.title('Imagem Processada para OCR')
    plt.show()

    # Segmentação
    segmented_img, text_blocks = segment_text(image_processed)

    # Mostrar segmentação
    plt.figure(figsize=(8, 6))
    plt.imshow(segmented_img)
    plt.title('Blocos de Texto Detectados')
    plt.show()

    # Configuração otimizada para texto em parágrafos
    custom_config = r'--oem 3 --psm 6 -c tessedit_char_whitelist="ABCDEFGHIJKLMNOPQRSTUVWXYZÇÃÂÊÁÉÍÓÚÀÜÖËÄ,.:;!?() "'

    # Reconhecimento do texto completo
    full_text = pytesseract.image_to_string(image_processed, lang='por', config=custom_config)
    print("TEXTO RECONHECIDO:\n", full_text)

        # Reconhecimento por blocos (apenas se existirem blocos detectados)
    if len(text_blocks) > 0:
        print("\nRECONHECIMENTO POR BLOCOS:")
        print("-"*50)
        for i, (x, y, w, h) in enumerate(text_blocks):
            roi = image_processed[y:y+h, x:x+w]

            # Mostrar cada bloco (opcional)
            # plt.figure(), plt.imshow(roi, cmap='gray'), plt.title(f'Bloco {i}'), plt.show()

            text = pytesseract.image_to_string(roi, lang='por', config='--psm 7')
            print(f"Bloco {i+1}: {text.strip()}")
        print("-"*50)

    else:
      print("\nNENHUM BLOCO DE TEXTO DETECTADO!")

    # Verificação final
    print("\nVERIFICAÇÃO:")
    print(f"Total de blocos detectados: {len(text_blocks)}")
    print(f"Tamanho da imagem processada: {image_processed.shape}")

    # Pós-processamento
    texto_corrigido = corrigir_texto(full_text)
    print("TEXTO CORRIGIDO:\n", texto_corrigido)
    with open("texto_corrigido.txt", "w", encoding="utf-8") as f:
      f.write(texto_corrigido)
      print("Texto salvo em 'texto_corrigido.txt'")


  except Exception as err:
    # Errors will be thrown if the user does not have a webcam or if they do not
    # grant the page permission to access it.
    print(str(err))
