In [3]:
import tensorflow as tf

from keras.layers import TextVectorization

In [4]:
text_vectorization = TextVectorization()

In [5]:
data = [
    "bugün hava çok güzel",
    "Ali , Efe ve Ece",
    "Selam Söyle"
]
text_vectorization.adapt(data)
text_vectorization.get_vocabulary()

['',
 '[UNK]',
 'çok',
 've',
 'söyle',
 'selam',
 'hava',
 'güzel',
 'efe',
 'ece',
 'bugün',
 'ali']

In [6]:
vectorized_text = text_vectorization(data)
vectorized_text


<tf.Tensor: shape=(3, 4), dtype=int64, numpy=
array([[10,  6,  2,  7],
       [11,  8,  3,  9],
       [ 5,  4,  0,  0]])>

In [7]:
import re
import string

In [9]:
def standardize_text(string_tensor):
    # Convert to lowercase
    string_tensor = tf.strings.lower(string_tensor)
    
    # Remove punctuation
    string_tensor = tf.strings.regex_replace(string_tensor, '[%s]' % re.escape(string.punctuation), '')
    
    # Remove extra whitespace
    string_tensor = tf.strings.regex_replace(string_tensor, '\\s+', ' ')
    
    # Strip leading/trailing whitespace
    string_tensor = tf.strings.strip(string_tensor)
    
    return string_tensor

In [10]:
def split_fn(string_tensor):
    return tf.strings.split(string_tensor)



In [11]:
text_vectorization = TextVectorization(
    standardize=standardize_text,
    split=split_fn,
    output_mode="int"
)
text_vectorization.adapt(data)
text_vectorization.get_vocabulary()
vectorized_text = text_vectorization(data)
vectorized_text

<tf.Tensor: shape=(3, 4), dtype=int64, numpy=
array([[10,  6,  2,  7],
       [11,  8,  3,  9],
       [ 5,  4,  0,  0]])>

In [12]:
text = "bugün hava çok güzel"
text_vectorization(text)

<tf.Tensor: shape=(4,), dtype=int64, numpy=array([10,  6,  2,  7])>

In [13]:
text_dataset = tf.data.Dataset.from_tensor_slices([
    "kedi",
    "aslan",
    "yunus"
])
vectorize_layer = TextVectorization(
    max_tokens=5000,
    output_sequence_length=4)
vectorize_layer.adapt(text_dataset.batch(64))

2025-01-08 23:23:21.797591: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [15]:
vectorize_layer.get_vocabulary()


['', '[UNK]', 'yunus', 'kedi', 'aslan']

In [26]:
from keras.models import Sequential
from keras.layers import Dense, Embedding, GlobalAveragePooling1D

# Basit bir model tanımlama
model = Sequential([
    Embedding(input_dim=1000, output_dim=64, input_length=10),
    GlobalAveragePooling1D(),
    Dense(1, activation='sigmoid')
])

# Modeli derleme
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Modeli eğitme veya yükleme
# model.fit(...)  # Eğer modelinizi eğitmek istiyorsanız
# model = tf.keras.models.load_model('model_path')  # Eğer önceden eğitilmiş bir modeli yüklemek istiyorsanız



In [27]:

import numpy
# Örnek metin verileri
text_data = ["kedi kartal aslan", "fok yunus"]

# TextVectorization katmanını oluşturma ve adapte etme
vectorize_layer = TextVectorization(
    max_tokens=1000,
    output_mode='int',
    output_sequence_length=10
)
vectorize_layer.adapt(text_data)

# Giriş verisini vektörleştirme
input_data = vectorize_layer(text_data)

# Yukarıdaki modeli predict et
predictions = model.predict(input_data)

# Tahminleri yazdırma
print(predictions)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step
[[0.49681294]
 [0.49765554]]
