<a href="https://colab.research.google.com/github/sadrakhamoshi/DeepLearning-OCR/blob/main/OpticalCharacterRecognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports


In [None]:
from skimage import io
import numpy as np
import random
import cv2
import csv
import os

import keras

from google.colab import drive
from PIL import Image, ImageDraw, ImageFont
from tqdm import tqdm

# Data Collection

In [None]:
# Mount Google Drive

drive.mount("/content/drive")

Mounted at /content/drive


## Generate Numbers

In [None]:
credit_card_n = 10000
new_national_n = 10000
old_national_n = 10000
base_dir = '/content/drive/MyDrive/Deep/OpticalCharacterRecognition/'

In [None]:
# Generate 16-Digits For Credit Card

credit_card_prefixes = ['6274 12', '6273 81', '5057 85', '6221 06', '6391 94', '6278 84', '6393 47', '5022 29', '6362 14', '6273 53', '5029 08', '6276 48', '2071 77',
                     '6369 49', '5029 38', '5894 63', '6219 86', '5892 10', '6396 07', '6393 46', '5028 06', '6037 69', '6279 61', '6063 73', '6395 99', '6274 88',
                     '5029 10', '6037 70', '6392 17', '5054 16', '6367 95', '6280 23', '6104 33', '9919 75', '6037 99', '6393 70', '6277 60', '6281 57', '5058 01']

def GenerateCreaditCardNumber():

    numbers = []

    i = 0
    while i < credit_card_n:
        # Select A Prefix
        select_prefix = random.randint(0, 38)
        new_item = credit_card_prefixes[select_prefix]

        # Random Numbers
        for j in range(10):
            if j == 2 or j == 6:
                new_item += ' '
            new_item += str(random.randint(0, 9))

        # Add 
        if new_item not in numbers: 
            numbers.append(new_item)
            i += 1

    return numbers

credit_card_numbers = GenerateCreaditCardNumber()

with open(base_dir + "CreditCardDataset/labels.txt", "w") as output:
    output.write('\n'.join(credit_card_numbers))

print('len:', len(credit_card_numbers), '\nhead:', credit_card_numbers[0:5])

len: 10000 
head: ['6393 4712 6666 8736', '6273 8154 8173 4408', '6104 3362 5938 6588', '5029 3816 5906 7789', '6392 1738 1274 8771']


In [None]:
# Generate 10-Digits For National Card

DIGITS_COUNT_NATIONAL_ID = 10

persian_numbers = ['۰', '۱', '۲', '۳', '۴', '۵', '۶', '۷', '۸', '۹']
national_id_prefixes = []


def GenerateNewNationalID():
    # New Cards
    numbers = []
    i = 0
    while i < new_national_n:

        # Control Sum
        control_digit_sum = 0

        # Select A Prefix

        # Random Numbers
        new_item = ''
        for j in range(9):
            random_number = random.randint(0, 9)
            new_item += persian_numbers[random_number]
            control_digit_sum += random_number * (DIGITS_COUNT_NATIONAL_ID - j)

        # Control Number
        control_remainder = control_digit_sum % 11
        if control_remainder < 2:
            new_item += persian_numbers[control_remainder]
        else:
            new_item += persian_numbers[11 - control_remainder]

        # Add 
        if new_item not in numbers: 
            numbers.append(new_item)
            i += 1
        
    return numbers

def GenerateOldNationalID():
    # Old Cards
    numbers = []
    i = 0
    while i < old_national_n:

        # Control Sum
        control_digit_sum = 0

        # Select A Prefix

        # Random Numbers
        new_item = ''
        for j in range(9):
            if j == 3:
                new_item += '-'
            random_number = random.randint(0, 9)
            new_item += persian_numbers[random_number]
            control_digit_sum += random_number * (DIGITS_COUNT_NATIONAL_ID - j)

        # Control Number
        new_item += '-'

        control_remainder = control_digit_sum % 11
        if control_remainder < 2:
            new_item += persian_numbers[control_remainder]
        else:
            new_item += persian_numbers[11 - control_remainder]

        # Add 
        if new_item not in numbers: 
            numbers.append(new_item)
            i += 1
        
    return numbers

new_national_numbers = GenerateNewNationalID()
old_national_numbers = GenerateOldNationalID()

with open(base_dir + "NewNationalCardDataset/labels.txt", "w") as output:
    output.write('\n'.join(new_national_numbers))

with open(base_dir + "OldNationalCardDataset/labels.txt", "w") as output:
    output.write('\n'.join(old_national_numbers))

print('new len:', len(new_national_numbers), '\nhead:', new_national_numbers[0:5], '\n')
print('old len:', len(old_national_numbers), '\nhead:', old_national_numbers[0:5])

new len: 10000 
head: ['۱۹۷۰۲۶۳۰۲۴', '۲۲۷۸۱۲۴۰۹۹', '۳۶۵۴۶۳۸۵۵۴', '۷۵۳۷۹۸۹۹۰۷', '۱۲۵۱۹۴۶۸۲۸'] 

old len: 10000 
head: ['۹۸۳-۴۸۷۸۰۷-۹', '۸۵۰-۱۵۳۰۳۶-۰', '۹۱۰-۴۳۴۷۲۵-۰', '۶۸۲-۸۳۳۷۹۴-۸', '۸۱۹-۸۲۰۱۷۵-۰']


## Generate Images

In [None]:
new_national_img = Image.open(base_dir + 'NationalCardBackgrounds/NationalCardNew.jpg')
old_national_img = Image.open(base_dir + 'NationalCardBackgrounds/NationalCardOld.jpg')

In [None]:
font_size = 50
font = ImageFont.truetype(base_dir + 'Fonts/BYekan+ Bold.ttf', font_size)

def GenerateNewNationalDataset():

    for i in tqdm(range(len(new_national_numbers))):
        number = new_national_numbers[i]
        img = new_national_img.copy()

        drawn = ImageDraw.Draw(img)

        width_pos, height_pos = (250, 520)
        width, height = (1181, 750)
        text_width, text_height = drawn.textsize(number, font=font)

        drawn.text((width - text_width - width_pos, 
                    height - text_height - height_pos), number, font=font, fill=(0, 0, 0))

        img = img.crop((width - text_width - width_pos - 150, 
                        height - text_height - height_pos - 50, 
                        width - width_pos + 150, 
                        height - height_pos + 50))

        angle = random.randint(-3, 3)
        img = img.rotate(angle)

        img = img.crop((150 + random.randint(-40, -10), 
                        50 + random.randint(-10, 0), 
                        text_width + 150 + random.randint(10, 40), 
                        text_height + 50 + random.randint(10, 20)))
        
        img.save(base_dir + 'NewNationalCardDataset/' + number + ".jpg", "JPEG")

GenerateNewNationalDataset()

100%|██████████| 10000/10000 [02:13<00:00, 74.73it/s]


In [None]:
font_size = 37
font = ImageFont.truetype(base_dir + 'Fonts/YasBd.ttf', font_size)

def GenerateOldNationalDataset():

    for i in tqdm(range(len(old_national_numbers))):
        number = old_national_numbers[i]
        img = old_national_img.copy()

        drawn = ImageDraw.Draw(img)

        width_pos, height_pos = (170, 380)
        width, height = (800, 497)
        text_width, text_height = drawn.textsize(number, font=font)

        drawn.text((width - text_width - width_pos, 
                    height - text_height - height_pos), number, font=font, fill=(0, 0, 0))

        img = img.crop((width - text_width - width_pos - 140, 
                        height - text_height - height_pos - 50, 
                        width - width_pos + 140, 
                        height - height_pos + 50))

        angle = random.randint(-3, 3)
        img = img.rotate(angle)

        img = img.crop((140 + random.randint(-30, -10), 
                        50 + random.randint(-10, 0), 
                        text_width + 140 + random.randint(10, 30), 
                        text_height + 50 + random.randint(10, 20)))

        img.save(base_dir + 'OldNationalCardDataset/' + number + ".jpg", "JPEG")

GenerateOldNationalDataset()

100%|██████████| 10000/10000 [01:41<00:00, 98.15it/s]


In [None]:
# Credit Cards

fonts_path = base_dir + '/Fonts/'
fonts = os.listdir(base_dir + '/Fonts/')

credit_card_path = base_dir + '/CreditCardBackgrounds/ResizedBackground/'
credit_card_background = os.listdir(base_dir + '/CreditCardBackgrounds/ResizedBackground/')

font_size = 29

# Gets A Random Path
def RandomSelect(path, folder):
  return path + random.choice(folder)

def GenerateCreditCardDataset():
    cc = 0

    for i in tqdm(range(len(credit_card_numbers))):
        # Select Random Font
        font_type = RandomSelect(fonts_path, fonts)
        font = ImageFont.truetype(font_type, font_size + random.randint(-1, 1))
        font_color = (0, 0, 0)

        number = credit_card_numbers[i]

        credit_card_random_background = Image.open(RandomSelect(credit_card_path, credit_card_background))
        img = credit_card_random_background.copy()

        # Check The Color
        average = np.asarray(img).mean(axis=0).mean(axis=0)
        if average[0] * 0.299 + average[1] * 0.587 + average[2] * 0.114 < 130:
            font_color = (255, 255, 255)
            
        # Put The Text On Image
        width, height = (600, 200)

        drawn = ImageDraw.Draw(img)
        text_width, text_height = drawn.textsize(number, font=font)

        drawn.text(((width - text_width) / 2, (height - text_height) / 2), number, font=font, fill=font_color)


        angle = random.randint(-3, 3)
        img = img.rotate(angle)

        img = img.crop(((width - text_width) / 2 + random.randint(-40, -10), 
                        (height - text_height) / 2 + random.randint(-30, -15), 
                        (width + text_width) / 2 + random.randint(10, 40), 
                        (height + text_height) / 2 + random.randint(15, 30)))

        img.save(base_dir + 'CreditCardDataset/' + number + ".jpg", "JPEG")

GenerateCreditCardDataset()

100%|██████████| 10000/10000 [03:37<00:00, 46.01it/s]


#Network

In [None]:
# Data Augmentation