# Projeto facerecog

In [1]:
#pacotes = !pip freeze

In [2]:
#%%writefile requirements.txt

# lista extensa do environment, depois filtro para os que foram utilizados realmente
#ipython==5.1.0
#jupyter==1.0.0
#matplotlib==1.5.3
#notebook==4.2.3
#numpy==1.11.2
#pandas==0.18.1
#Pillow==3.4.1
#pytesseract==0.1.6
#scikit-image==0.12.3
#scikit-learn-0.18
#scipy==0.18.1
#seaborn==0.7.1

# the following must be installed separately
# opencv==3.1.0
# tesseract

In [3]:
# uncomment line below to install requirements (recommended to use a virtualenv)
#!pip install -r requirements.txt

-----------------------

# Importação de pacotes

In [4]:
%matplotlib inline
""" handling files support packages """
from glob import glob

""" logic support packages """
import numpy as np
import pytesseract
import itertools
import csv
import pandas as pd

""" plot support packages """
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

""" image trasformation packages """
from PIL import Image
import skimage.io as skio
from skimage.util import dtype_limits
from skimage.morphology import label
from skimage.measure import regionprops
from skimage.morphology import label, skeletonize
from skimage.filters import rank
from skimage import color
from skimage import restoration
from skimage import feature
from skimage.measure import compare_ssim, compare_mse
from sklearn.preprocessing import binarize

""" statistical data visualization packages"""
import seaborn as sns

""" seaborn configurations """
sns.set_style('white')
sns.set_context('talk')
plt.rcParams['figure.figsize'] = 20, 10

In [5]:
from funcoes import apply_filter, mse, compare_images, load_image, save_image, plot_captchas, save_images, crop_char, feed_char_dict, ler_letras, checar_combinacoes, remove_small_blobs, run_tesseract

----------------------------

# Leitura das imagens iniciais funções de suporte

In [6]:
img_files = sorted(glob('../imagens/captcha*.png'))
#print(img_files)
imgs = skio.imread_collection(img_files)
#print(imgs.files)

In [7]:
# Qual versão de filtro será usada? 1, 2 ou 3?
v = 2
plot = False
save = True

In [8]:
unblobbed = [apply_filter(img, v) for img in imgs]
if plot: plot_captchas(unblobbed)

------------------------

# Extração e arquivo das letras dos primeiros captchas

In [9]:
captcha_letters = dict()
captcha_letters.clear()
if plot: print(captcha_letters == {})

In [10]:
# 6º caractere
cropped = [crop_char(img, 5) for img in unblobbed ]
if plot: plot_captchas( cropped, (4, 5) )

In [11]:
letters = ('9', 'g', 'b', 'q', 'o',
           'x', 'b', 'l', 'r', 'm',
           'l', 'l', '3', 'e', 'z',
           '1', '1', 'e', 'a', 'u')


feed_char_dict(captcha_letters, letters, cropped)

In [12]:
# 5º caractere
cropped = [crop_char(img, 4) for img in unblobbed ]
if plot: plot_captchas( cropped, (4, 5) )

In [13]:
letters = ('k', 'b', 'g', 'c', 'l',
           's', 'f', 'd', 'o', 'a',
           'y', '7', 'k', 'y', 'j',
           'q', 'k', 'z', '3', 'x')

feed_char_dict(captcha_letters, letters, cropped)

In [14]:
# 4º caractere
cropped = [crop_char(img, 3) for img in unblobbed ]
if plot: plot_captchas( cropped, (4, 5) )

In [15]:
letters = ('j', '6', '6', 's', 'l',
           'f', 'e', '5', 'w', 'a',
           'i', 's', 'f', 'b', 'm',
           's', 'k', '3', 'v', 'h')

feed_char_dict(captcha_letters, letters, cropped)

In [16]:
# 3º caractere
cropped = [crop_char(img, 2) for img in unblobbed ]
if plot: plot_captchas( cropped, (4, 5) )

In [17]:
letters = ('p', 'v', 'y', 'g', 's',
           '2', 'n', 'b', 's', 'i',
           'k', 'a', 'p', 'v', 'z',
           't', 'e', 'c', 'g', 'a')

feed_char_dict(captcha_letters, letters, cropped)

In [18]:
# 2º caractere
cropped = [crop_char(img, 1) for img in unblobbed ]
if plot: plot_captchas( cropped, (4, 5) )

In [19]:
letters = ('v', 'w', 'z', 'q', 'r',
           '3', 'h', 'q', 's', 's',
           'm', 'u', 'a', 'm', 'd',
           'y', '9', 'p', 'o', 'n')

feed_char_dict(captcha_letters, letters, cropped)

In [20]:
# 1º caractere
cropped = [crop_char(img, 0) for img in unblobbed ]
if plot: plot_captchas( cropped, (4, 5) )

In [21]:
letters = ('v', 't', 'g', '8', 's',
           'k', 'n', 'q', 'w', 'g',
           'e', 'j', 'i', '4', '6',
           'a', 'v', '9', '9', 'w')

feed_char_dict(captcha_letters, letters, cropped)

----------

# Inclusão dos novos captchas

À partir daqui farei a inclusão dos novos captchas no dicionário já criado **captcha_letters**:

In [22]:
img_files = sorted(glob(r'../imagens/gerarCaptcha.asp[0-9]?.png') + glob(r'../imagens/gerarCaptcha.asp[0-9].png'))
#print(img_files)
imgs = skio.imread_collection(img_files)

In [23]:
#plot_captchas(imgs, (10, 10))

In [24]:
unblobbed = [apply_filter(img, v) for img in imgs]
if plot: plot_captchas(unblobbed, (10, 10))

In [25]:
# 6º caractere
cropped = [crop_char(img, 5) for img in unblobbed ]
if plot: plot_captchas( cropped, (10, 10) )

In [26]:
letters = ('d', 'm', 'n', '4', 'k', 'j', '8', 'u', '3', '6',
            '3', 'z', 'c', 'x', 'u', 'e', 'b', 'w', 'y', '1',
            'u', 'x', '2', 's', '5', 'a', 'v', 'g', '7', 'h',
            'r', 'p', 'f', '1', 's', '6', 'v', '4', 'h', 'a',
            '4', 'r', 't', 'x', 'g', 'j', 'n', '1', '4', 'z',
            'p', 'u', 's', '9', 'c', 'r', '3', 'g', 'k', 'g',
            's', 't', 'a', '8', 'q', 's', 'o', 't', 'h', '5',
            'm', 'z', 'g', 's', '6', 'y', 'k', 'w', 'b', 'p',
            'c', 'a', 's', 'g', '2', '3', 'n', 'a', 'z', 'y',
            'p', 'q', 'b', 'u', 'd', 'a', 'h', 'b', '2', 's')

feed_char_dict(captcha_letters, letters, cropped)

In [27]:
# 5º caractere
cropped = [crop_char(img, 4) for img in unblobbed ]
if plot: plot_captchas( cropped, (10, 10) )

In [28]:
letters = ('8', 'u', 'h', 'q', 'p', 'x', 'j', 'y', 'k', 'j',
            'w', 'j', 'p', 'x', 'x', 'p', 'q', '1', 'u', 'g',
            'a', 'w', 'p', 'm', 'w', 'w', 'g', 'e', 'g', 'x',
            'r', '1', 'c', '9', 'y', 'n', 'n', 'd', 'j', 'g',
            'h', 'j', 'b', '5', 'a', 'l', '6', '8', 'x', 'e',
            'r', 'u', 'v', 'q', 't', '5', 'w', 's', 'f', 't',
            'i', '7', '6', 'j', 'z', '1', 'b', 'v', '7', 'd',
            'n', 'v', 'e', 'q', 'v', 'r', 'm', 'r', 'e', 'b',
            'o', 'm', 'y', 'k', 'a', 'h', 'c', 'u', 'j', 'd',
            'q', 'p', 'r', '2', 'b', 'c', 'h', 'd', 'q', 'h')

feed_char_dict(captcha_letters, letters, cropped)

In [29]:
# 4º caractere
cropped = [crop_char(img, 3) for img in unblobbed ]
if plot: plot_captchas( cropped, (10, 10) )

In [30]:
letters = ('n', 'n', 'g', 'q', 'x', 'm', 'a', 'u', 'x', 'n',
            't', 'h', 'y', 'v', 'x', 'k', '5', 'e', 'p', '9',
            's', '1', 'j', 'l', 'z', 'h', 'm', '7', 't', 'h',
            '6', 's', 'r', '8', 'h', '9', 't', 'f', 'q', 'e',
            's', 'b', '3', 'v', 'l', '8', 'a', 'x', 'p', 'u',
            'v', 's', 'q', 'd', 'e', 'u', 'm', 'k', 'b', 'e',
            'a', 't', 'f', 's', 'n', 'e', 'y', 't', '4', '5',
            '1', 'a', '6', 'p', 'v', 'j', '7', 'i', 'm', 'j',
            'n', 'i', 'a', '7', 'b', '3', '7', 'p', '3', 'v',
            '1', 'y', 'r', 'k', 't', 'v', 'p', 'p', 'c', 'u')

feed_char_dict(captcha_letters, letters, cropped)

In [31]:
# 3º caractere
cropped = [crop_char(img, 2) for img in unblobbed ]
if plot: plot_captchas( cropped, (10, 10) )

In [32]:
letters = ('j', 'n', '1', 'r', 'r', 't', 'o', 'w', 'n', 'c',
          'r', 'o', 'm', 'a', 'd', 'e', 'm', 'y', 'u', 'f',
          'o', 'n', 'y', '8', 'p', 't', 'i', 'n', 'v', 'q',
          'r', 's', 'b', 'r', '1', '8', 'q', 'g', 'u', 'o',
          '4', 'q', '5', 't', 'b', 'e', 'f', 'v', '1', 'i',
          'a', 'h', 'w', 's', 'v', 'h', 'h', 'd', 'h', '1',
          '6', 'y', '6', '8', 'z', 'n', 'f', 'n', 'f', 'b',
          'e', 'm', '9', 'a', 'r', '4', 'a', 't', 'o', 'q',
          'j', 'u', 'a', 'y', 'd', 'p', 'e', 'p', 'h', '1',
          '5', 'n', 'b', '6', '4', 'd', '5', 'h', 'g', 'c')

feed_char_dict(captcha_letters, letters, cropped)

In [33]:
# 2º caractere

cropped = [crop_char(img, 1) for img in unblobbed ]
if plot: plot_captchas( cropped, (10, 10) )

In [34]:
letters = ('m', 's', 'k', 'v', 'c', 'l', 'r', 'w', 'x', 'g',
          'v', 'h', 'k', 't', '7', 'r', '7', 'y', 's', 'z',
          'o', '4', 'o', 'e', 'a', 'z', 'j', '8', 'i', 'y',
          '8', 'v', 'p', 'm', 'a', 'o', 'w', 'u', 'k', 'n',
          'e', 'q', '3', 'b', 'b', 'c', 'c', '7', 'z', 't',
          't', '5', 'm', 'z', 'k', '3', 'r', 'n', 'c', 'k',
          '9', 'n', 'u', 'y', '3', 'u', 'v', '1', 'a', 'n',
          'j', 'v', 'n', 'x', 'y', '3', 'u', 'h', 'r', 'c',
          'w', 'g', 'x', 'f', '2', 'v', 'o', 'e', 'v', '1',
          'd', '3', 'x', '7', 'l', 'g', 'z', 'h', 'r', 's')

feed_char_dict(captcha_letters, letters, cropped)

In [35]:
# 1º caractere
cropped = [crop_char(img, 0) for img in unblobbed ]
if plot: plot_captchas( cropped, (10, 10) )

In [36]:
letters = ('r', 'j', 'c', 'w', 'j', 'p', 'u', 'd', 'p', 'p',
          'i', 'p', 'p', '4', 'b', 'u', 't', 'l', 'y', '5',
          'f', '9', '7', 'e', 'a', 'l', 'f', 'c', 't', 'a',
          'e', 'k', 'a', '8', 'w', 'h', 'k', 's', 's', 'r',
          'w', 'p', 'k', 'n', 'd', 'e', 'p', 'e', 'z', 'y',
          '7', 'g', 'n', 'u', 'm', 'x', '7', 'k', 'y', 'z',
          'z', 'n', 't', 'g', 'n', 'v', 'h', 'e', 'r', 'o',
          's', 'z', 'r', '1', '8', 'v', 'q', 'p', 'n', 'n',
          '3', 'e', 'n', '1', 'd', 'g', 'x', 'h', 'u', 'a',
          'f', 'w', '4', 'r', '9', 'h', 'a', 'p', 'c', 'b')

feed_char_dict(captcha_letters, letters, cropped)

In [37]:
#cropped = [crop_char(img, 1, x1 = 10, x2 = 40) for img in unblobbed ]
#plot_captchas( cropped, (10, 10) )

# Adicao de + 70 imagens

In [38]:
img_files = sorted(glob(r'../imagens/gerarCaptcha.asp[0-9][0-9][0-9].png'))
#print(img_files)
imgs = skio.imread_collection(img_files)

In [39]:
if plot: plot_captchas(imgs, (7, 10))

In [40]:
unblobbed = [apply_filter(img, v) for img in imgs]
if plot: plot_captchas(unblobbed, (7, 10))

In [41]:
# 6º caractere
cropped = [crop_char(img, 5) for img in unblobbed ]
if plot: plot_captchas( cropped, (7, 10) )

In [42]:
letters = ('4', 't', 's', 't', 'o', 'k', 'z', 'm', 'x', 'e',
          'm', '8', 'b', 'r', 'f', 'p' ,'r', 'h', 't', 'r',
          'w', 'f', '3', 'e', 'q', 's', '2' ,'r', 'g', 'b',
          'v', 'p', 'x', 'r', '2', '4', 'e', 'w', 'e', '2',
          'o', 'f', 'g', 'a', 'f', '8', 'j', 'w', 'q', '8',
          't', 'c', 'c', 'w', 'g', '9', 'f', 'v', 'o', 'w',
          '5', 'y', 'd', '1', 'k', 'o', 'a', 'k', 'm', 'd')

feed_char_dict(captcha_letters, letters, cropped)

In [43]:
# 5º caractere
cropped = [crop_char(img, 4) for img in unblobbed ]
if plot: plot_captchas( cropped, (7, 10) )

In [44]:
letters = ('g', 'a', 'o', 'c', 'p', 'b', 'm', 'n', 'q', 'w',
          's', 'n', 'u', 'd', '6', 'y', 'r', '8', 'q', 'q',
          'g', 'a', 'd', 'k', 'q', 'm', 'x', 'e', 'f', 'u',
          'z', 'o', 'y', 'l', 'o', 'k', 'o', 'c', 'm', 'v',
          'n', 'r', 'y', 'm', '9', 'v', 'z', 'u', 'd', 'v',
          'k', '3', '7', 'g', 'j', 't', '7', '1', 'o', 'h',
          'z', 'o', 'f', 'i', 'w', 't', 'o', 'e', 'm', 'h')

feed_char_dict(captcha_letters, letters, cropped)

In [45]:
# 4º caractere
cropped = [crop_char(img, 3) for img in unblobbed ]
if plot: plot_captchas( cropped, (7, 10) )

In [46]:
letters = ('j', 'k', 'e', 'z', '9', 'h', 'w', 'p', 'g', 'o',
          'm', 'u', 'e', 't', 'j', '4', 'v', 'k', 'x', 'm',
          's', 'l', 'e', 'g', 'p', 'z', 'e', 't', '6', 'a',
          'n', 'z', 'k', 'g', 'f', 'u', '9', 'r', 'x', 'e',
          't', 't', 'e', 'r', 'c', 'u', 'h', 'b', 'm', 'f',
          'l', 't', 'h', 'v', 'w', 'z', '5', 'r', 'r', 'h',
          'o', 'n', 'a', '4', 'p', 'k', 'g', 'g', 'g', '8')

feed_char_dict(captcha_letters, letters, cropped)

In [47]:
# 3º caractere
cropped = [crop_char(img, 2) for img in unblobbed ]
if plot: plot_captchas( cropped, (7, 10) )

In [48]:
letters = ('3','n', 's', 's', 'a', '9', 'g', 'c', 'h', 'h',
          '1', 'j', '3', '5', 'h', 'q', 'r', 'h', 's', 'o',
          'v', 'k', 'j', 'c', 'c', 'n', 'b', 'd', 'z', 'v',
          'e', 'j', '8', 'y', 'v', 'm', '3', 'q', 'e', 'w',
          'd', 'q', 'k', 'c', 'p', '2', 'f', 'n', 'a', '2',
          'y', 'a', '1', 'f', 'n', 'v', 'b', '2', 'e', 'q',
          '5', 'n', 'v', 'z', '2', 'r', 'e', 'd', '3', 't')

feed_char_dict(captcha_letters, letters, cropped)

In [49]:
# 2º caractere

cropped = [crop_char(img, 1) for img in unblobbed ]
if plot: plot_captchas( cropped, (7, 10) )

In [50]:
letters = ('s', 'g', 'k', 'c', 'p', 'r', 'u', 'y', 'o', '4',
          '3', 'a', 't', 'r', 'n', 'u', 'r', 'f', 'e', 'r',
          'z', 'b', 'x', 'h', 'c', 's', 't', 'j', 'd', 'z',
          'y', 'm', 'r', '7', 'q', '1', 'v', '3', 'g', 's',
          'r', '3', '2', 'y', 'o', '1', 'n', 'n', 't', 'g',
          'h', '2', '1', 'w', 'v', 'j', 'x', '8', 'w', 'u',
          'c', 'a', 'o', 'w', 'u', 'h' ,'n', 't', 't', 'c')

feed_char_dict(captcha_letters, letters, cropped)

In [51]:
# 1º caractere
cropped = [crop_char(img, 0) for img in unblobbed ]
if plot: plot_captchas( cropped, (7, 10) )

In [52]:
letters = ('3', 'm', 'x', 'p', 'p', '2', 'm', 'k', '5', 'u',
          'q', 'r', 'w', 's', 'k', 'f', 'x', 't', '7', 'y',
          'd', 'r', '8', 'm', 'x', 'e', 'n', 'w', 't', 'l',
          'e', '1', 'u', 'd', '5', 'j', 't', 'n', 'k', 'k',
          'y', 'd', 'r', 'z', 'c', 's', '3', 't', 'o', '5',
          'o', 'a', 'y', 'v', 'g', 'e', 'r', 's', 'w', 'z',
          'o', 'o', 'k', 'w', '6', 'w', 'j', '5', 'd', 'k')

feed_char_dict(captcha_letters, letters, cropped)

# Salvar base de dados de Letras

In [53]:
if save: save_images(captcha_letters)

In [54]:
classes = sorted(list(captcha_letters.keys()))
print(classes)

['1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
