In [1]:
import tensorflow
from tensorflow.keras.layers import Reshape
from keras.layers import Input, Embedding, Reshape, LSTM, Dense, Flatten
from keras.models import Model
import os
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import re
import pickle


In [2]:
batch_size = 16
max_sequence_length = 384
max_word_length = 20
embedding_size = 128
lstm_units = 256
num_classes = 14

directory_train='./training-data'
directory_test='./testing-data'
directory_all='./all-data'
model_path='kotu_model.keras' #modelin konumu
model_test_path='./deneme-veriler'#modelin testi için kullanılacak verinin konumu

In [3]:
#MODELİ ÇALIŞTIR
def model_run(directory):
    all_texts = []

    for filename in os.listdir(directory):
        if filename.endswith('.txt'):
            file_path = os.path.join(directory, filename)
            with open(file_path, 'r', encoding='utf-8') as file:
                content = file.read()
                txt_data = content.split()
                all_texts.append(txt_data)
                
    #TOKENIZER YÜKLE
    with open('tokenizer.pickle', 'rb') as handle:
        tokenizer_model = pickle.load(handle)     
    print(tokenizer_model.word_index)        

    all_sequences=[]
    for i in range(len(all_texts)):
        all_sequences.append(tokenizer_model.texts_to_sequences(all_texts[i]))
        
    max_word_length,trunc_type,padding_type = 20,'post','post'

    all_padded_sequences=[]
    for i in range(len(all_sequences)):
        padded_sequence = pad_sequences(all_sequences[i], maxlen=max_word_length, padding=padding_type, truncating=trunc_type)
        all_padded_sequences.append(padded_sequence)

    padded_arrays = []
    for arr in all_padded_sequences:
        pad_width = ((0, max_sequence_length - len(arr)), (0, 0))
        padded_arr = np.pad(arr, pad_width, mode='constant', constant_values=0)
        padded_arrays.append(padded_arr)

    xtrain=np.array(padded_arrays)
    model = tensorflow.keras.models.load_model(model_path)

    prediction=model.predict(xtrain)
    print(prediction.shape)
    #------------------------------------------------------------
    label_list = ["Pad", "Others", "B_Comp", "I_Comp", "B_Date", "I_Date", "B_Time", "I_Time", "B_Receipt", "I_Receipt", "B_Tax", "I_Tax", "B_Amount", "I_Amount"]

    #deneme yapılacak fişin konumu
    with open("./deneme-veriler/deneme-veri.txt", "r", encoding="utf-8") as file:
        words = file.read().split()
    
    predicted_labels = np.argmax(prediction, axis=-1)
    print(predicted_labels)
    
    #klasördeki ilk fişi temsil eder
    fis_1=predicted_labels[0]

    truncated_values = fis_1[:len(words)]#uzun olan diziyi kısalt ve iki dizi boyutunu eşle
    my_map = dict(zip(words, truncated_values))#kelimelere ve karşılarına predictionları 0 1 2 formatında maple

    result_map = {}
    for key, value in my_map.items():#anlamlı gözükmesi için 0 1 2 yerine etiket isimleri olacak şekilde maple
        index = min(value, len(label_list) - 1)
        label = label_list[index]
        result_map[key] = label

    print(result_map)
    for key, value in result_map.items():
        print(f'{key}: {value}')
        

In [4]:

#model_test_path içinde test edilecek veriler txt halde bulunmalıdır
model_run(model_test_path)

{'0': 1, 'A': 2, '1': 3, 'E': 4, 'T': 5, 'I': 6, '2': 7, 'R': 8, 'N': 9, 'L': 10, 'K': 11, '*': 12, 'S': 13, 'O': 14, 'M': 15, '5': 16, ':': 17, '.': 18, '3': 19, '8': 20, '4': 21, '9': 22, 'D': 23, '6': 24, 'U': 25, 'İ': 26, '7': 27, ',': 28, 'B': 29, 'C': 30, 'Y': 31, 'P': 32, 'V': 33, 'e': 34, 'a': 35, 'i': 36, 'H': 37, 'G': 38, 'Z': 39, 'r': 40, 'F': 41, '/': 42, 'Ş': 43, 'n': 44, 'o': 45, 'l': 46, '%': 47, 't': 48, 's': 49, 'Ü': 50, '-': 51, 'k': 52, 'd': 53, 'm': 54, 'u': 55, 'X': 56, 'y': 57, 'c': 58, 'g': 59, 'h': 60, 'z': 61, '#': 62, 'ı': 63, 'b': 64, 'w': 65, 'Ç': 66, 'Ğ': 67, 'Ö': 68, 'p': 69, 'v': 70, 'ş': 71, 'W': 72, 'ü': 73, ')': 74, '(': 75, 'J': 76, 'f': 77, 'x': 78, "'": 79, '+': 80, '$': 81, 'ğ': 82, '»': 83, 'ç': 84, '!': 85, 'Q': 86, '=': 87, 'ö': 88, 'Í': 89, 'ж': 90, '&': 91, 'j': 92, '>': 93, '"': 94, '\\': 95, 'Т': 96, 'О': 97, 'Р': 98, 'q': 99, '×': 100, '@': 101, ';': 102, 'Ø': 103, 'К': 104, '|': 105, '<': 106, 'Á': 107, 'Ú': 108, 'Ș': 109, '[': 110, '_': 1

In [5]:
import sys
from PyQt5.QtWidgets import QApplication, QWidget, QVBoxLayout, QLabel, QPushButton, QFileDialog, QComboBox
from google.cloud import vision
from google.oauth2 import service_account
import io
import tensorflow

credentials = service_account.Credentials.from_service_account_file('key.json')

client = vision.ImageAnnotatorClient(credentials=credentials)

model = tensorflow.keras.models.load_model(model_path)


class ImageReader(QWidget):
    def __init__(self):
        super().__init__()

        self.setWindowTitle("Image Text Reader")
        self.setGeometry(100, 100, 400, 250)

        self.layout = QVBoxLayout()

        self.image_label = QLabel("Görsel burada görünecek")
        self.layout.addWidget(self.image_label)

        self.load_button = QPushButton("Görsel Yükle")
        self.load_button.clicked.connect(self.load_image)
        self.layout.addWidget(self.load_button)

        self.ocr_selector = QComboBox()
        self.ocr_selector.addItems(["Google Vision API"])
        self.layout.addWidget(self.ocr_selector)

        self.text_label = QLabel("Metin burada görünecek")
        self.layout.addWidget(self.text_label)

        self.setLayout(self.layout)

    def load_image(self):
        options = QFileDialog.Options()
        file_path, _ = QFileDialog.getOpenFileName(self, "Görsel Yükle", "", "Image files (*.jpg *.png *.jpeg *.bmp *.gif)", options=options)
        if file_path:
            self.process_image(file_path)

    def process_image(self, file_path):
        ocr_method = self.ocr_selector.currentText()

        if ocr_method == "Google Vision API":
            with io.open(file_path, 'rb') as image_file:
                content = image_file.read()

            image = vision.Image(content=content)

            response = client.text_detection(image=image)
            texts = response.text_annotations

            if texts:
                text = texts[0].description
                print(text)
                self.text_label.setText(text)
            else:
                print("Metin tespit edilemedi.")

if __name__ == "__main__":
    app = QApplication(sys.argv)
    window = ImageReader()
    window.show()
    sys.exit(app.exec_())


EXARAASLAY PETROL INS. HAF. NAK
TAAH SAN TIC. LTD. STI.
CORFEZ MAH. ANKARA YOL CD. N. 135
REAL HIP. MARKET YANI KOCAELI
ALEMDAR VD:6980051870
TL:262 3224035 T.S:8470/15223
MERSIS NO:0698005187000015
22-02-2022
FIS NO :96
SAAT: 14:27
041 LG 636
42.630 LT X 15.500
MOTORIN MED *18
KDV
TOP
KREDI
*660.76
*100.79
#660.76
*660.76
CEK


SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


: 