In [16]:
import os
import shutil

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
from official.nlp import optimization  # to create AdamW optimizer

import matplotlib.pyplot as plt

import pandas as pd
import numpy as np

tf.get_logger().setLevel('ERROR')

In [3]:
import matplotlib.pyplot as plt


def plot_graphs(history, metric):
  plt.plot(history.history[metric])
  plt.plot(history.history['val_'+metric], '')
  plt.xlabel("Epochs")
  plt.ylabel(metric)
  plt.legend([metric, 'val_'+metric])

In [4]:
dataframe = pd.read_csv('./yorumlar.csv')
dataframe

Unnamed: 0,Puan,Yorum
0,1.0,Çak iyi fiyata çok iyi ürün Tam zamanında kusu...
1,1.0,Mükemmel Ürün siparişi verdim ve 24 saatten kı...
2,0.0,beğenmedim ses seviyesi beklediğimden düşük bu...
3,0.0,Klavye Bildiğiniz klavye klavye olduğu için öv...
4,0.0,Ucuz çok iyi değil ama idare eder TV nin plast...
...,...,...
79547,0.0,Kaliteli Urun Hizli Kargo Pazar gunu siparis v...
79548,1.0,Harika bir ürün Umduğumdan çok daha iyi görünt...
79549,1.0,sağlam ve güzel paketleme sağlam ve güzel pake...
79550,0.0,Kargo rezil Mağaza ilgili değil


In [5]:
AUTOTUNE = tf.data.AUTOTUNE
BUFFER_SIZE = 10000
BATCH_SIZE = 64

In [40]:
raw_train_ds = tf.data.experimental.CsvDataset(
    './yorumlar.csv',
    [tf.string, tf.string],
    compression_type=None,
    buffer_size=None,
    header=True,
    field_delim=',',
    use_quote_delim=True,
    na_value='',
    select_cols=None,
    exclude_cols=None
)
train_ds = raw_train_ds.cache().prefetch(buffer_size=AUTOTUNE)
train_dataset = raw_train_ds.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_dataset = raw_train_ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)


In [7]:
for example, label in train_dataset.take(1):
  print('texts: ', example.numpy()[:3])
  print()
  print('labels: ', label.numpy()[:3])

texts:  [1. 0. 1.]

labels:  [b'Webdenal Say\xc4\xb1n M\xc3\xbc\xc5\x9fterimiz Taksit bilgileri \xc3\xbcr\xc3\xbcn sayfas\xc4\xb1n\xc4\xb1n alt\xc4\xb1nda yer almaktad\xc4\xb1r Sayg\xc4\xb1lar\xc4\xb1m\xc4\xb1zla'
 b'\xc3\x9cr\xc3\xbcn gelmedi \xc3\x9cr\xc3\xbcn gelmedi \xc3\xbcr\xc3\xbcn elime ula\xc5\x9fmad\xc4\xb1 birde yorum isteniyor'
 b'hitachi Hitachi ht 49 1700 ud modelini ald\xc4\xb1m g\xc3\xb6r\xc3\xbcnt\xc3\xbc kalitesi g\xc3\xbczel ses klasik Vestel sesi yani idare eder ekran\xc4\xb1n tam kar\xc5\x9f\xc4\xb1s\xc4\xb1nda durursan\xc4\xb1z g\xc3\xb6r\xc3\xbcnt\xc3\xbc daha net canl\xc4\xb1 ancak hafif yandan bak\xc4\xb1nca g\xc3\xb6r\xc3\xbcnt\xc3\xbc bulan\xc4\xb1k oluyor yani 178 derece yandan felan net g\xc3\xb6r\xc3\xbcnt\xc3\xbc almak biraz hayal']


In [9]:
VOCAB_SIZE = 1000
encoder = tf.keras.layers.TextVectorization(
    max_tokens=VOCAB_SIZE)

data = dataframe["Yorum"].values.tolist()
encoder.adapt(data)

In [27]:
val_ds = tf.data.experimental.CsvDataset(
    './yorumlar.csv',
    [tf.string, tf.string],
    compression_type=None,
    buffer_size=None,
    header=True,
    field_delim=',',
    use_quote_delim=True,
    na_value='',
    select_cols=None,
    exclude_cols=None
)

val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [28]:
test_ds = tf.data.experimental.CsvDataset(
    './yorumlar.csv',
    [tf.string, tf.string],
    compression_type=None,
    buffer_size=None,
    header=True,
    field_delim=',',
    use_quote_delim=True,
    na_value='',
    select_cols=None,
    exclude_cols=None
)

test_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [13]:
for text_batch, label_batch in train_ds.take(1):
    print(text_batch.numpy())
    print(label_batch.numpy())


1.0
b'\xc3\x87ak iyi fiyata \xc3\xa7ok iyi \xc3\xbcr\xc3\xbcn Tam zaman\xc4\xb1nda kusursuz geldi servis randevusu online al\xc4\xb1nabiliyor ve ertesi g\xc3\xbcn kurulumu yap\xc4\xb1ld\xc4\xb1 te\xc5\x9fekk\xc3\xbcrler'


2023-01-08 21:56:21.688038: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


In [14]:
for text_batch, label_batch in train_ds.take(1):
  for i in range(3):
    print(f'Review: {text_batch.numpy()}')
    label = label_batch.numpy()
    print(f'Label : {label} ({label})')

Review: 1.0
Label : b'\xc3\x87ak iyi fiyata \xc3\xa7ok iyi \xc3\xbcr\xc3\xbcn Tam zaman\xc4\xb1nda kusursuz geldi servis randevusu online al\xc4\xb1nabiliyor ve ertesi g\xc3\xbcn kurulumu yap\xc4\xb1ld\xc4\xb1 te\xc5\x9fekk\xc3\xbcrler' (b'\xc3\x87ak iyi fiyata \xc3\xa7ok iyi \xc3\xbcr\xc3\xbcn Tam zaman\xc4\xb1nda kusursuz geldi servis randevusu online al\xc4\xb1nabiliyor ve ertesi g\xc3\xbcn kurulumu yap\xc4\xb1ld\xc4\xb1 te\xc5\x9fekk\xc3\xbcrler')
Review: 1.0
Label : b'\xc3\x87ak iyi fiyata \xc3\xa7ok iyi \xc3\xbcr\xc3\xbcn Tam zaman\xc4\xb1nda kusursuz geldi servis randevusu online al\xc4\xb1nabiliyor ve ertesi g\xc3\xbcn kurulumu yap\xc4\xb1ld\xc4\xb1 te\xc5\x9fekk\xc3\xbcrler' (b'\xc3\x87ak iyi fiyata \xc3\xa7ok iyi \xc3\xbcr\xc3\xbcn Tam zaman\xc4\xb1nda kusursuz geldi servis randevusu online al\xc4\xb1nabiliyor ve ertesi g\xc3\xbcn kurulumu yap\xc4\xb1ld\xc4\xb1 te\xc5\x9fekk\xc3\xbcrler')
Review: 1.0
Label : b'\xc3\x87ak iyi fiyata \xc3\xa7ok iyi \xc3\xbcr\xc3\xbcn Tam zaman\

2023-01-08 21:56:29.571690: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


In [17]:
vocab = np.array(encoder.get_vocabulary())
vocab[:20]

array(['', '[UNK]', 'bir', 've', 'çok', 'ürün', 'bu', 'iyi', 'güzel',
       'ama', 'için', 'daha', 'tavsiye', 'yıldız', 'yok', 'Ürün',
       'ederim', 'da', 'gayet', 'gibi'], dtype='<U13')

In [36]:
model = tf.keras.Sequential([
    encoder,
    tf.keras.layers.Embedding(
        input_dim=len(encoder.get_vocabulary()),
        output_dim=64,
        # Use masking to handle the variable sequence lengths
        mask_zero=True),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

In [37]:
print([layer.supports_masking for layer in model.layers])


[False, True, True, True, True]


In [38]:
sample_text = ('The movie was cool. The animation and the graphics '
               'were out of this world. I would recommend this movie.')
predictions = model.predict(np.array([sample_text]))
print(predictions[0])

[0.04080091]


In [39]:
padding = "the " * 2000
predictions = model.predict(np.array([sample_text, padding]))
print(predictions[0])

[0.04080091]


In [45]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['accuracy'])

In [46]:
history = model.fit(raw_train_ds, epochs=10,
                    validation_data=test_dataset,
                    validation_steps=30)

Epoch 1/10


ValueError: in user code:

    File "/Users/ma/Library/Python/3.9/lib/python/site-packages/keras/engine/training.py", line 1249, in train_function  *
        return step_function(self, iterator)
    File "/Users/ma/Library/Python/3.9/lib/python/site-packages/keras/engine/training.py", line 1233, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/ma/Library/Python/3.9/lib/python/site-packages/keras/engine/training.py", line 1222, in run_step  **
        outputs = model.train_step(data)
    File "/Users/ma/Library/Python/3.9/lib/python/site-packages/keras/engine/training.py", line 1023, in train_step
        y_pred = self(x, training=True)
    File "/Users/ma/Library/Python/3.9/lib/python/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/Users/ma/Library/Python/3.9/lib/python/site-packages/keras/engine/input_spec.py", line 232, in assert_input_compatibility
        raise ValueError(

    ValueError: Exception encountered when calling layer 'sequential_2' (type Sequential).
    
    Input 0 of layer "bidirectional_2" is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 64)
    
    Call arguments received by layer 'sequential_2' (type Sequential):
      • inputs=tf.Tensor(shape=(), dtype=string)
      • training=True
      • mask=None
