In [58]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.layers import Embedding, Dense, InputLayer,GRU, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt
import numpy as np
import re
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

In [59]:
def file_to_sentence_list(file_path): 
    with open(file_path, 'r', encoding='utf-8') as file: 
        text = file.read() 
  
    # Splitting the text into sentences using 
    # delimiters like '.', '?', and '!' 
    sentences = [sentence.strip() for sentence in re.split( 
        r'(?<=[.!?])\s+', text) if sentence.strip()] 
  
    return sentences 
  
file_path = 'kayip_sehir.txt'
text_data = file_to_sentence_list(file_path) 

In [60]:
tokenizer = Tokenizer()

In [61]:
tokenizer.fit_on_texts(text_data) 
total_words = len(tokenizer.word_index) + 1

In [62]:
input_sequences = []
for line in text_data: 
    token_list = tokenizer.texts_to_sequences([line])[0] 
    for i in range(1, len(token_list)): 
        n_gram_sequence = token_list[:i+1] 
        input_sequences.append(n_gram_sequence) 
  

In [63]:
max_len = max([len(x) for x in input_sequences])
max_len

25

In [64]:
padded_input_sequences =np.array(pad_sequences( 
    input_sequences, maxlen=max_len, padding='pre'))

In [65]:
x = padded_input_sequences[:,:-1]
x.shape

(901, 24)

In [66]:
y = padded_input_sequences[:,-1]
y.shape

(901,)

In [67]:
y =to_categorical(y, num_classes=total_words) 
y.shape

(901, 603)

In [68]:
model = Sequential()
model.add(InputLayer(input_shape=(max_len-1,))) 
model.add(Embedding(total_words, 10))
model.add(GRU(800, return_sequences=True)) 
model.add(GRU(800)) 
model.add(Dense(800, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(total_words, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()


In [69]:
model.fit(x, y, epochs=50, verbose=1) 

Epoch 1/50
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 706ms/step - accuracy: 0.0222 - loss: 6.3823
Epoch 2/50
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 903ms/step - accuracy: 0.0601 - loss: 6.1061
Epoch 3/50
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 808ms/step - accuracy: 0.0482 - loss: 6.0783
Epoch 4/50
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 731ms/step - accuracy: 0.0536 - loss: 5.9879
Epoch 5/50
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 769ms/step - accuracy: 0.0543 - loss: 5.8239
Epoch 6/50
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 1s/step - accuracy: 0.0688 - loss: 5.6215
Epoch 7/50
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 887ms/step - accuracy: 0.0641 - loss: 5.3018
Epoch 8/50
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 875ms/step - accuracy: 0.0567 - loss: 5.0005
Epoch 9/50
[1m29/29[0m [32m━━━━━

<keras.src.callbacks.history.History at 0x1d592217880>

In [70]:
seed_text = "bu şehir büyüsünden"
next_words = 5
  
for _ in range(next_words): 
    token_list = tokenizer.texts_to_sequences([seed_text])[0] 
    token_list = pad_sequences( 
        [token_list], maxlen=max_len-1, padding='pre') 
    predicted_probs = model.predict(token_list) 
    predicted_word = tokenizer.index_word[np.argmax(predicted_probs)] 
    seed_text += " " + predicted_word 
  
print("Next predicted words:", seed_text) 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 158ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 265ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 306ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 171ms/step
Next predicted words: bu şehir büyüsünden zenginliğe ve bilgelik arayışına kapılanların


In [71]:
import time
text = "Aras"

for i in range(5):
  # tokenize
  token_text = tokenizer.texts_to_sequences([text])[0]
  # padding
  padded_token_text = pad_sequences([token_text], maxlen=10, padding='pre')
  # predict
  pos = np.argmax(model.predict(padded_token_text))

  for word,index in tokenizer.word_index.items():
    if index == pos:
      text = text + " " + word
      print(text)
      time.sleep(1)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
Aras büyükbabasının
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 151ms/step
Aras büyükbabasının verdiği
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156ms/step
Aras büyükbabasının verdiği anahtarı
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156ms/step
Aras büyükbabasının verdiği anahtarı çıkardı
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 175ms/step
Aras büyükbabasının verdiği anahtarı çıkardı ve


In [72]:
import time
text = "Peşinde olan"

for i in range(5):
  # tokenize
  token_text = tokenizer.texts_to_sequences([text])[0]
  # padding
  padded_token_text = pad_sequences([token_text], maxlen=10, padding='pre')
  # predict
  pos = np.argmax(model.predict(padded_token_text))

  for word,index in tokenizer.word_index.items():
    if index == pos:
      text = text + " " + word
      print(text)
      time.sleep(1)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 137ms/step
Peşinde olan aras
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 168ms/step
Peşinde olan aras evinde
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 173ms/step
Peşinde olan aras evinde haritayı
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 109ms/step
Peşinde olan aras evinde haritayı yeniden
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 212ms/step
Peşinde olan aras evinde haritayı yeniden incelemekle


In [73]:
import time
text = "Altın işlemeli"

for i in range(5):
  # tokenize
  token_text = tokenizer.texts_to_sequences([text])[0]
  # padding
  padded_token_text = pad_sequences([token_text], maxlen=10, padding='pre')
  # predict
  pos = np.argmax(model.predict(padded_token_text))

  for word,index in tokenizer.word_index.items():
    if index == pos:
      text = text + " " + word
      print(text)
      time.sleep(1)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 207ms/step
Altın işlemeli gün
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step
Altın işlemeli gün hiçbir
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
Altın işlemeli gün hiçbir iz
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step
Altın işlemeli gün hiçbir iz bırakmadan
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 194ms/step
Altın işlemeli gün hiçbir iz bırakmadan ortadan
