In [1]:
import tensorflow
import os
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model
import pickle


In [2]:
batch_size = 16
max_sequence_length = 384
max_word_length = 20
embedding_size = 128
lstm_units = 256
num_classes = 14

directory_train='./training-data'
directory_test='./testing-data'
directory_all='./all-data'
model_path='model.keras' #modelin konumu
model_test_path='./deneme-veriler'#modelin testi için kullanılacak verinin konumu

In [3]:
txt_data = """VAYRO
ÖMER ALBAYRAM
KURUKÖPRÜ MH. ÇAKMAK CD.
ÇUKUROVA İŞH.NO:105/1
SEYHAN / ADANA
SUBURCU V.D. 39937304660
: 27/08/2021
: 16:56:55
: 0049
TARIH
SAAT
FİŞ NO
TEKSTİL
*32,99
*2,44
*32,99
TOPKDV
TOPLAM
KREDİ
İŞ BANKASI
**1069
*32,99
T.SİCİL NO: .
WEB
EKÜ NO:0001
Z NO:0691
İŞYERİ NO:667118459
BÖLÜM: 0001/VAYRO GiV
27/08/2021
TERH NO: SOMKG601
16:57
SATIŞ
AID: A0000000041010
APP LABEL: KREDI KA
**** **** **** 1069
32, 99 TL
# YURTICI MC KREDI KARTI #
ONAY KODU: 338472
VERSİYON: IIWE280 EINGE2M68IS214C77 RC-00
SIRA NO: IA000041
REF: 123916503972
BATCH NO: 000694
UUKH: 00682900
BU BELGEVİ SAKLAY INIZ
KARI HAMİLİ NÜSHASI
ÖMER ALBAYRAM
TiC.SICIL NO:-1
TÜRKİYE $
BANKASI
MASTERCARD PAYPASS
EKÜ NO:0001
Z NO:0691
NF 2A 20144856
"""

In [4]:
from collections import Counter
import re

def most_common(lst):
    data = Counter(lst)
    return data.most_common(1)[0][0] if data else None
#MODELİ ÇALIŞTIR
def model_run(txt_data):
    all_texts = []
    txt_data = txt_data.split()
    all_texts.append(txt_data)
                
    with open('tokenizer.pickle', 'rb') as handle:
        tokenizer_model = pickle.load(handle)     
    print(tokenizer_model.word_index)        

    all_sequences=[]
    for i in range(len(all_texts)):
        all_sequences.append(tokenizer_model.texts_to_sequences(all_texts[i]))
        
    max_word_length,trunc_type,padding_type = 20,'post','post'

    all_padded_sequences=[]
    for i in range(len(all_sequences)):
        padded_sequence = pad_sequences(all_sequences[i], maxlen=max_word_length, padding=padding_type, truncating=trunc_type)
        all_padded_sequences.append(padded_sequence)

    padded_arrays = []
    for arr in all_padded_sequences:
        pad_width = ((0, max_sequence_length - len(arr)), (0, 0))
        padded_arr = np.pad(arr, pad_width, mode='constant', constant_values=0)
        padded_arrays.append(padded_arr)

    xtrain=np.array(padded_arrays)
    model = tensorflow.keras.models.load_model(model_path)

    prediction=model.predict(xtrain)
    print(prediction.shape)
    #------------------------------------------------------------
    label_list = ["Pad", "Others", "B_Comp", "I_Comp", "B_Date", "I_Date", "B_Time", "I_Time", "B_Receipt", "I_Receipt", "B_Tax", "I_Tax", "B_Amount", "I_Amount"]


    predicted_labels = np.argmax(prediction, axis=-1)
    
    receipt=predicted_labels[0]
    #print(receipt)


    truncated_values = receipt[:len(txt_data)]
    print(truncated_values)


    my_map = {}
    for i in range(len(txt_data)):
        print(txt_data[i])
        print(label_list[truncated_values[i]])

    
    company_array = []
    date_array = []
    time_array = []
    receipt_array = []
    tax_array = []
    amount_array = []

    current_label = None
    current_text = ""

    for i in range(len(txt_data)):
        label = label_list[truncated_values[i]]
        text = txt_data[i]
        
        if label.startswith("B_"):
            if current_label:
                if current_label == "B_Comp":
                    company_array.append(current_text)
                elif current_label == "B_Date":
                    date_array.append(current_text)
                elif current_label == "B_Time":
                    time_array.append(current_text)
                elif current_label == "B_Receipt":
                    receipt_array.append(current_text)
                elif current_label == "B_Tax":
                    tax_array.append(current_text)
                elif current_label == "B_Amount":
                    amount_array.append(current_text)
            
            current_label = label
            current_text = text
        elif label.startswith("I_"):
            if current_label:
                current_text += " " + text
        else:
            if current_label:
                if current_label == "B_Comp":
                    company_array.append(current_text)
                elif current_label == "B_Date":
                    date_array.append(current_text)
                elif current_label == "B_Time":
                    time_array.append(current_text)
                elif current_label == "B_Receipt":
                    receipt_array.append(current_text)
                elif current_label == "B_Tax":
                    tax_array.append(current_text)
                elif current_label == "B_Amount":
                    amount_array.append(current_text)
            
            current_label = None
            current_text = ""

    # Print the arrays
    print("Companies:", company_array)
    print("Dates:", date_array)
    print("Times:", time_array)
    print("Receipts:", receipt_array)
    print("Taxes:", tax_array)
    print("Amounts:", amount_array)




    cleaned_amount_array = []
    for amount in amount_array:
        cleaned_amount = re.sub(r'[^\d.,]', '', amount)  # Sadece rakam, nokta ve virgülü bırak
        cleaned_amount_array.append(cleaned_amount)

    print("Cleaned Amounts:", cleaned_amount_array)

    cleaned_tax_array = []
    for tax in tax_array:
        cleaned_tax = re.sub(r'[^\d.,]', '', tax)  # Sadece rakam, nokta ve virgülü bırak
        cleaned_tax_array.append(cleaned_tax)

    print("Cleaned Taxes:", cleaned_tax_array)

    cleaned_receipt_array = []
    for receipt in receipt_array:
        cleaned_receipt = re.sub(r'[^\d.,]', '', receipt)  # Sadece rakam, nokta ve virgülü bırak
        cleaned_receipt_array.append(cleaned_receipt)

    print("Cleaned Receipts:", cleaned_receipt_array)    
    
    most_common_company = most_common(company_array)
    most_common_date = most_common(date_array)
    most_common_time = most_common(time_array)
    most_common_receipt = most_common(cleaned_receipt_array)
    most_common_tax = most_common(cleaned_tax_array)
    most_common_amount = most_common(cleaned_amount_array)

    print("Most Common Company:", most_common_company)
    print("Most Common Date:", most_common_date)
    print("Most Common Time:", most_common_time)
    print("Most Common Receipt:", most_common_receipt)
    print("Most Common Tax:", most_common_tax)
    print("Most Common Amount:", most_common_amount)
        
    return most_common_company,most_common_date,most_common_time,most_common_receipt,most_common_tax,most_common_amount

In [5]:

#model_test_path içinde test edilecek veriler txt halde bulunmalıdır
model_run(txt_data)

{'0': 1, 'A': 2, '1': 3, 'E': 4, 'T': 5, 'I': 6, '2': 7, 'R': 8, 'N': 9, 'L': 10, 'K': 11, '*': 12, 'S': 13, 'O': 14, 'M': 15, '5': 16, ':': 17, '.': 18, '3': 19, '8': 20, '4': 21, '9': 22, 'D': 23, '6': 24, 'U': 25, 'İ': 26, '7': 27, ',': 28, 'B': 29, 'C': 30, 'Y': 31, 'P': 32, 'V': 33, 'e': 34, 'a': 35, 'i': 36, 'H': 37, 'G': 38, 'Z': 39, 'r': 40, 'F': 41, '/': 42, 'Ş': 43, 'n': 44, 'o': 45, 'l': 46, '%': 47, 't': 48, 's': 49, 'Ü': 50, '-': 51, 'k': 52, 'd': 53, 'm': 54, 'u': 55, 'X': 56, 'y': 57, 'c': 58, 'g': 59, 'h': 60, 'z': 61, '#': 62, 'ı': 63, 'b': 64, 'w': 65, 'Ç': 66, 'Ğ': 67, 'Ö': 68, 'p': 69, 'v': 70, 'ş': 71, 'W': 72, 'ü': 73, ')': 74, '(': 75, 'J': 76, 'f': 77, 'x': 78, "'": 79, '+': 80, '$': 81, 'ğ': 82, '»': 83, 'ç': 84, '!': 85, 'Q': 86, '=': 87, 'ö': 88, 'Í': 89, 'ж': 90, '&': 91, 'j': 92, '>': 93, '"': 94, '\\': 95, 'Т': 96, 'О': 97, 'Р': 98, 'q': 99, '×': 100, '@': 101, ';': 102, 'Ø': 103, 'К': 104, '|': 105, '<': 106, 'Á': 107, 'Ú': 108, 'Ș': 109, '[': 110, '_': 1

('VAYRO ÖMER ALBAYRAM KURUKÖPRÜ',
 '27/08/2021',
 '16:56:55',
 '0049',
 '2,44',
 '32,99')

In [6]:
import sys
from PyQt5.QtWidgets import QApplication, QWidget, QVBoxLayout, QLabel, QPushButton, QFileDialog, QComboBox
from google.cloud import vision
from google.oauth2 import service_account
import io
import tensorflow
from collections import Counter
import re

credentials = service_account.Credentials.from_service_account_file('key.json')

client = vision.ImageAnnotatorClient(credentials=credentials)

model = tensorflow.keras.models.load_model(model_path)

class ImageReader(QWidget):
    def __init__(self):
        super().__init__()

        self.setWindowTitle("Image Text Reader")
        self.setGeometry(100, 100, 400, 250)

        self.layout = QVBoxLayout()

        self.load_button = QPushButton("Görsel Yükle")
        self.load_button.clicked.connect(self.load_image)
        self.layout.addWidget(self.load_button)

        self.ocr_selector = QComboBox()
        self.ocr_selector.addItems(["Google Vision API"])
        self.layout.addWidget(self.ocr_selector)

        self.text_label = QLabel("Metin burada görünecek")
        self.layout.addWidget(self.text_label)

        self.company_label = QLabel("Şirket: ")
        self.layout.addWidget(self.company_label)

        self.date_label = QLabel("Tarih: ")
        self.layout.addWidget(self.date_label)

        self.time_label = QLabel("Zaman: ")
        self.layout.addWidget(self.time_label)

        self.receipt_label = QLabel("Fiş: ")
        self.layout.addWidget(self.receipt_label)

        self.tax_label = QLabel("Vergi: ")
        self.layout.addWidget(self.tax_label)

        self.amount_label = QLabel("Tutar: ")
        self.layout.addWidget(self.amount_label)

        self.setLayout(self.layout)

    def load_image(self):
        options = QFileDialog.Options()
        file_path, _ = QFileDialog.getOpenFileName(self, "Görsel Yükle", "", "Image files (*.jpg *.png *.jpeg *.bmp *.gif)", options=options)
        if file_path:
            self.process_image(file_path)

    def process_image(self, file_path):
        ocr_method = self.ocr_selector.currentText()

        if ocr_method == "Google Vision API":
            with io.open(file_path, 'rb') as image_file:
                content = image_file.read()

            image = vision.Image(content=content)

            response = client.text_detection(image=image)
            texts = response.text_annotations

            if texts:
                text = texts[0].description
                most_common_company, most_common_date, most_common_time, most_common_receipt, most_common_tax, most_common_amount = model_run(text)
                
                self.company_label.setText("Şirket: " + most_common_company)
                self.date_label.setText("Tarih: " + most_common_date)
                self.time_label.setText("Zaman: " + most_common_time)
                self.receipt_label.setText("Fiş: " + most_common_receipt)
                self.tax_label.setText("Vergi: " + most_common_tax)
                self.amount_label.setText("Tutar: " + most_common_amount)
            else:
                print("Metin tespit edilemedi.")

if __name__ == "__main__":
    app = QApplication(sys.argv)
    window = ImageReader()
    window.show()
    sys.exit(app.exec_())


{'0': 1, 'A': 2, '1': 3, 'E': 4, 'T': 5, 'I': 6, '2': 7, 'R': 8, 'N': 9, 'L': 10, 'K': 11, '*': 12, 'S': 13, 'O': 14, 'M': 15, '5': 16, ':': 17, '.': 18, '3': 19, '8': 20, '4': 21, '9': 22, 'D': 23, '6': 24, 'U': 25, 'İ': 26, '7': 27, ',': 28, 'B': 29, 'C': 30, 'Y': 31, 'P': 32, 'V': 33, 'e': 34, 'a': 35, 'i': 36, 'H': 37, 'G': 38, 'Z': 39, 'r': 40, 'F': 41, '/': 42, 'Ş': 43, 'n': 44, 'o': 45, 'l': 46, '%': 47, 't': 48, 's': 49, 'Ü': 50, '-': 51, 'k': 52, 'd': 53, 'm': 54, 'u': 55, 'X': 56, 'y': 57, 'c': 58, 'g': 59, 'h': 60, 'z': 61, '#': 62, 'ı': 63, 'b': 64, 'w': 65, 'Ç': 66, 'Ğ': 67, 'Ö': 68, 'p': 69, 'v': 70, 'ş': 71, 'W': 72, 'ü': 73, ')': 74, '(': 75, 'J': 76, 'f': 77, 'x': 78, "'": 79, '+': 80, '$': 81, 'ğ': 82, '»': 83, 'ç': 84, '!': 85, 'Q': 86, '=': 87, 'ö': 88, 'Í': 89, 'ж': 90, '&': 91, 'j': 92, '>': 93, '"': 94, '\\': 95, 'Т': 96, 'О': 97, 'Р': 98, 'q': 99, '×': 100, '@': 101, ';': 102, 'Ø': 103, 'К': 104, '|': 105, '<': 106, 'Á': 107, 'Ú': 108, 'Ș': 109, '[': 110, '_': 1

SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
import tensorflow
print(tensorflow.__version__)

2.6.0
