In [1]:
import numpy as np
import pandas as pd
import json

from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, GRU, Embedding, CuDNNGRU
from tensorflow.python.keras.optimizers import adam_v2
import tensorflow as tf
import matplotlib.pyplot as plt

2023-01-08 22:36:40.405041: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def plot_graph_loss(history, metric, transparent=False):
    plt.plot(history.history[metric])
    plt.plot(history.history["val_" + metric], "")
    plt.xlabel("Devirler")
    plt.ylabel("Kayıp")
    plt.legend(["Kayıp", "Devir Test Kaybı"])
    plt.savefig(
        "eticaret" + metric + ".png",
        dpi=(250),
        bbox_inches="tight",
        transparent=transparent,
    )

    plt.close()

In [3]:
def plot_graph_accuracy(history, metric, transparent=False):
    plt.plot(history.history[metric])
    plt.plot(history.history["val_" + metric], "")
    plt.xlabel("Devirler")
    plt.ylabel("Doğruluk")
    plt.legend(["Doğruluk", "Devir Test Doğruluğu"])
    plt.savefig(
        "eticaret" + metric + ".png",
        dpi=(250),
        bbox_inches="tight",
        transparent=transparent,
    )

    plt.close()

In [4]:
yorumlar = pd.read_csv("yorumlar.csv")
target = yorumlar["Puan"].values.tolist()
data = yorumlar["Yorum"].values.tolist()

cutoff = int(len(data) * 0.90)
x_train, x_test = data[:cutoff], data[cutoff:]
y_train, y_test = target[:cutoff], target[cutoff:]

In [5]:
with open('tf_tokenizer.json') as f:
    json_string = json.load(f)

tf_tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(json_string)

In [29]:
def tokenlestir(yorumListesi):
    y_yorumlar = []
    for yorum in yorumListesi:
        y_yorum = tf_tokenizer.texts_to_sequences(str(yorum).lower())[0][:50]

        if len(y_yorum) < 50:
            sifirlar = list(np.zeros(50 - len(y_yorum), dtype=int))
            y_yorum = sifirlar + y_yorum

        y_yorumlar.append(y_yorum)
    return np.array(y_yorumlar, dtype=np.dtype(np.int64))

In [30]:
egitim_kume = tokenlestir(x_train)
test_kume = tokenlestir(x_test)

In [44]:
len(tf_tokenizer.word_counts)

196544

In [45]:
model = Sequential()

model.add(
    Embedding(input_dim=len(tf_tokenizer.word_counts), output_dim=50, input_length=50, name="embedding_layer")
)

model.add(GRU(units=16, return_sequences=True, reset_after=False))
model.add(GRU(units=8, return_sequences=True, reset_after=False))
model.add(GRU(units=4, return_sequences=False, reset_after=False))
model.add(Dense(1, activation="sigmoid"))

opt = adam_v2.Adam(learning_rate=1e-3, clipnorm=1.0, clipvalue=0.5)

model.compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"])

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_layer (Embedding)  (None, 50, 50)            9827200   
_________________________________________________________________
gru_3 (GRU)                  (None, 50, 16)            3216      
_________________________________________________________________
gru_4 (GRU)                  (None, 50, 8)             600       
_________________________________________________________________
gru_5 (GRU)                  (None, 4)                 156       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 5         
Total params: 9,831,177
Trainable params: 9,831,177
Non-trainable params: 0
_________________________________________________________________


In [46]:
n_y_train = np.array(y_train)
n_y_test = np.array(y_test)

In [47]:
history = model.fit(
    egitim_kume,
    n_y_train,
    epochs=25,
    batch_size=512,
    validation_data=(test_kume, n_y_test),
    validation_steps=30,
)

Epoch 1/25




Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [30]:
test_loss, test_acc = model.evaluate(test_kume, n_y_test)



In [31]:
print("Test Loss: {}".format(test_loss))
print("Test Accuracy: {}".format(test_acc))

text1 = "bu ürün çok iyi herkese tavsiye ederim"
text2 = "kargo çok hızlı aynı gün elime geçti"
text3 = "büyük bir hayal kırıklığı yaşadım bu ürün bu markaya yakışmamış"
text4 = "mükemmel"
text5 = "tasarımı harika ancak kargo çok geç geldi ve ürün açılmıştı tavsiye etmem"
text6 = "hiç resimde gösterildiği gibi değil"
text7 = "kötü yorumlar gözümü korkutmuştu ancak hiçbir sorun yaşamadım teşekkürler"
text8 = "hiç bu kadar kötü bir satıcıya denk gelmemiştim ürünü geri iade ediyorum"
text9 = "tam bir fiyat performans ürünü"
text10 = "beklediğim gibi çıkmadı"
yazilar = [text1, text2, text3, text4, text5, text6, text7, text8, text9, text10]

deneme_kume = tokenlestir(yazilar)
sonuc = model.predict(deneme_kume)
print(sonuc)

Test Loss: 0.47108957171440125
Test Accuracy: 0.8696581125259399
[[0.9590216 ]
 [0.95821506]
 [0.01047284]
 [0.95859253]
 [0.01047935]
 [0.220978  ]
 [0.95817643]
 [0.01048765]
 [0.9576007 ]
 [0.18795137]]


In [32]:
plot_graph_accuracy(history, "accuracy")

In [33]:
plot_graph_loss(history, "loss")

In [34]:
# Save the entire model as a SavedModel.
!mkdir -p saved_model
model.save('tf_saved_model/my_model')

INFO:tensorflow:Assets written to: saved_model/my_model/assets


INFO:tensorflow:Assets written to: saved_model/my_model/assets


In [32]:
new_model = tf.keras.models.load_model('tf_saved_model/my_model')

# Check its architecture
new_model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_layer (Embedding)  (None, 50, 50)           1000050   
                                                                 
 gru_12 (GRU)                (None, 50, 16)            3216      
                                                                 
 gru_13 (GRU)                (None, 50, 8)             600       
                                                                 
 gru_14 (GRU)                (None, 4)                 156       
                                                                 
 dense_4 (Dense)             (None, 1)                 5         
                                                                 
Total params: 1,004,027
Trainable params: 1,004,027
Non-trainable params: 0
_________________________________________________________________


In [36]:
# confusion matrix
from sklearn.metrics import confusion_matrix
import seaborn as sns

y_pred = new_model.predict(test_kume)
y_pred = (y_pred > 0.5)

cm = confusion_matrix(n_y_test, y_pred)
print(cm)

[[1609 2402]
 [1941 2004]]
