# Importing libs

In [2]:
import os, sys, random
import google.generativeai as genai
from dotenv import load_dotenv


sys.path.insert(0, './sample')
from sample.create_sample import create_sample

load_dotenv()
genai.configure(api_key=os.environ["GEMINI_API_KEY"])

  from .autonotebook import tqdm as notebook_tqdm


# Defining sample test

In [3]:
sample_path = "./data/samples"
random_sample = create_sample(size=10)
print (random_sample)

# for file_name in os.listdir(sample_path):
#     print(file_name[:-4])

['6ng6w.png', 'pxdwp.png', '74eyg.png', '8d8ep.png', 'e84n2.png', '2p2y8.png', 'b685n.png', '85pew.jpg', '8np22.png', '3den6.png']


# Testing some Images pre-processing
1. Converting to greysacale; Simplifica a imagem, reduzindo a dimensionalidade e facilitando a segmentação.
2. Binarização: Converte a imagem em uma imagem binária (preto e branco), facilitando a identificação de regiões de interesse. 0.5 devido ao fundo degradê ser bem definido.
3. Remoção de Ruído

4. Filtragem: 
4. 1. Média: Suaviza a imagem, reduzindo ruído gaussiano.
4. 2. Mediana: Preserva bordas, eficaz contra ruído impulsivo (sal e pimenta).
4. 3. Gaussiano: Suaviza a imagem, com maior preservação de detalhes.

Morfologia Matemática:
Dilatação: Engrossa linhas e objetos.
Erosão: Afina linhas e objetos.
Abertura: Remove pequenos objetos e preenche pequenos buracos.
Fechamento: Preenche pequenos buracos e remove pequenos objetos.

In [None]:
import matplotlib.pyplot as plt
import tensorflow as tf
from PIL import Image

def binarization(img):
	threshold = 0.5
	img_binary = tf.where(img > threshold, 1.0, 0.0)
	return img_binary

def gaussian_transformation(img, kernel_size, sigma):
	x = tf.range(-kernel_size // 2 + 1, kernel_size // 2 + 1, dtype=tf.float32)
	y = x[:, tf.newaxis]
	kernel = tf.exp(-(x * x + y * y) / (2.0 * sigma * sigma))
	kernel = kernel / tf.reduce_sum(kernel)

	# Aplicar a convolução (filtragem Gaussiana)
	img_filtered = tf.nn.conv2d(img[tf.newaxis, :, :, :], kernel[tf.newaxis, :, :, tf.newaxis], strides=1, padding='SAME')
	img_filtered = tf.squeeze(img_filtered)
	return img_filtered

def median_transformation(img,median_kernel_size):
	# Extrai patches da imagem
	patches = tf.image.extract_patches(
		images=img[tf.newaxis, :, :, tf.newaxis],
		sizes=[1, median_kernel_size, median_kernel_size, 1],
		strides=[1, 1, 1, 1],
		rates=[1, 1, 1, 1],
		padding='SAME')
	patches = tf.reshape(patches, [-1, median_kernel_size * median_kernel_size])
	 # Ordena os valores em cada patch
	patches_sorted = tf.sort(patches, axis=1)

	# Seleciona o valor mediano
	median_values = patches_sorted[:, median_kernel_size * median_kernel_size // 2]

	# Reconstrói a imagem
	output = tf.reshape(median_values, tf.shape(img))
	return output

def erosion(image, element):
    eroded = tf.nn.conv2d(image[tf.newaxis, :, :, tf.newaxis],
                        element[tf.newaxis, :, :, tf.newaxis],
                        strides=1, padding='SAME')
    return tf.squeeze(eroded)

def dilation(image, element):
    dilated = tf.nn.max_pool(
        tf.concat([image[tf.newaxis, :, :, tf.newaxis],
                  -tf.ones_like(image[tf.newaxis, :, :, tf.newaxis])], axis=3),
        ksize=[1, element.shape[0], element.shape[1], 1],
        strides=1, padding='SAME')
    return tf.squeeze(dilated)

def image_processing(image_path, channels, gaussian_kernel, sigma, median_kernel):
	element = tf.ones((1, 1), dtype=tf.float32)

	img = tf.io.read_file(image_path)
	img = tf.image.decode_png(img, channels=channels)
	### Binarização
	# Converter para float32 e normalizar para o intervalo [0, 1]
	img = tf.cast(img, tf.float32) / 255.0

	# Binarização (ajustar o limiar conforme necessário)
	img = binarization(img)

	# Criar um kernel Gaussiano
	img = gaussian_transformation(img, gaussian_kernel, sigma)

	img = median_transformation(img, median_kernel)

	img = dilation(img,element)

	# Visualizar a imagem binária
	plt.imshow(img, cmap='gray')
	plt.show()


In [None]:
import cv2
import numpy as np
from PIL import Image
import io
import base64

def cv_image_processing(image_path, gaussian_kernel, sigma, median_kernel, closing_k, dilation_k, method):
	kernel_d = np.ones(dilation_k, np.uint8)
	kernel_c = np.ones(closing_k, np.uint8)

	img = cv2.imread(image_path, 0)
	(h, w) = img.shape[:2]
	img = cv2.resize(img, (int(w*1.8), int(h*1.8)))
	ret, thresh = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)

	if median_kernel != None:
		thresh = cv2.medianBlur(thresh, median_kernel)

	if gaussian_kernel != None:
		thresh = cv2.GaussianBlur(thresh, (gaussian_kernel, gaussian_kernel), sigma)

	tmp_path = "./data/tmp/" + image_path[-9:]
	if method == 'dilation' and dilation_k != None:
		dilation = cv2.dilate(thresh, kernel_d, iterations=1)
		dilation_image = Image.fromarray(dilation, mode="L")
		dilation_image.save(tmp_path,format='PNG')
		# dilation_buffer = io.BytesIO()
		# dilation_image.save(dilation_buffer,format='PNG')
		return dilation_image
	elif method == 'closing' and closing_k != None:
		closing = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel_c)
		closing_image = Image.fromarray(closing, mode="L")
		closing_image.save(tmp_path,format='PNG')
		# closing_buffer = io.BytesIO()
		# closing_image.save(closing_buffer,format='PNG')
		return closing_image
	else:
		return Image.fromarray(thresh, mode="L")

	# cv2.imshow('Original', img)
	# # cv2.imshow('Blur', blur)
	# cv2.imshow('Median', median)
	# cv2.imshow('Dilation', dilation)
	# cv2.imshow('Closing', closing)

	# cv2.waitKey(0)
	# cv2.destroyAllWindows()

# Upload to GEMINI

In [6]:
def upload_to_gemini(path, mime_type=None):
  file = genai.upload_file(path, mime_type=mime_type)
  return file

# Gemini Prompting

In [7]:
def captcha_decoder(captcha):
    # Create the model
    generation_config = {
        "temperature": 0,
        "top_p": 0.95,
        "top_k": 40,
        "max_output_tokens": 256,
        "response_mime_type": "text/plain",
    }

    files = [
        upload_to_gemini(captcha,mime_type="image/png"),
    ]

    model = genai.GenerativeModel(
        model_name="gemini-1.5-flash",
        generation_config=generation_config,
    )

    chat_session = model.start_chat(
        history=[{
                "role": "user",
                "parts": [
                    files[0],
                ],
            }
        ]
    )

    response = chat_session.send_message("Act as a 5 characters captcha breaker and tell me whats the captcha on the image? Response only with the text in lowercase. If you cannnot detect any text, tell me only: 'NONE'. Remember all captha has 5 characters that can be alphanumeric")
    return response.text

# Pytessract Testing

In [44]:
import pytesseract
from PIL import Image
def pytesseract_captcha(captcha):
	print(captcha)
	img = Image.open(captcha)
	response = pytesseract.image_to_string(img)
	# print(response)
	return response

# Decaptcha with Gemini

In [None]:
import re
captcha_response = []
# dilation or closing
method = 'closing'
for sample in random_sample:
	image_path = sample_path + "/" + sample
	tmp_path = "./data/tmp/" + sample
	images = cv_image_processing(image_path,
					 gaussian_kernel=None, sigma=0.5,
					 median_kernel=None,
					 closing_k=(5,5),
					 dilation_k=(3,5),
					 method = method)
	response = captcha_decoder(tmp_path)
	captcha_response.append(re.sub(r"[\n\t\s]*", "", response))
	# os.remove(tmp_path)

# Decaptcha With pytesseract

In [None]:
import re
tesseract_response = []
# dilation or closing
method = 'dilation'
for sample in random_sample:
	image_path = sample_path + "/" + sample
	tmp_path = "./data/tmp/" + sample
	images = cv_image_processing(image_path,
					 gaussian_kernel=None, sigma=0.5,
					 median_kernel=1,
					 closing_k=(1,1),
					 dilation_k=(3,3),
					 method=method)
	response = pytesseract.image_to_string(images, config='--psm 10')
	tesseract_response.append(re.sub(r"[\n\t\s]*", "", response))
	# os.remove(tmp_path)

# print(random_sample[:-4], response.text)
# print("OK") if response.text == random_sample[:-3] else print("no")

In [85]:
from sample.test_sample import test_sample
metrics = test_sample(random_sample, tesseract_response)
# metrics_llm = test_sample(random_sample, captcha_response)

In [86]:
metrics

{'data': [{'tested': 'GreGw', 'ground': '6ng6w', 'recall': 0},
  {'tested': 'wxdwip—', 'ground': 'pxdwp', 'recall': 0},
  {'tested': 'deyq', 'ground': '74eyg', 'recall': 0},
  {'tested': '(BAseD_', 'ground': '8d8ep', 'recall': 0},
  {'tested': 'a', 'ground': 'e84n2', 'recall': 0},
  {'tested': 'a', 'ground': '2p2y8', 'recall': 0},
  {'tested': 'H685a', 'ground': 'b685n', 'recall': 0},
  {'tested': 'S5paw', 'ground': '85pew', 'recall': 0},
  {'tested': 'Byp22', 'ground': '8np22', 'recall': 0},
  {'tested': '3den6—', 'ground': '3den6', 'recall': 0}],
 'metrics': 0.0}

In [16]:
import os
for file_name in os.listdir('./data/tmp'):
    os.remove('./data/tmp/' + file_name)