In [40]:
import pandas as pd
from keras import layers, models
from keras.models import save_model
from keras.preprocessing.text import Tokenizer
from keras.utils import pad_sequences
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [3]:
# stop words
with open("../support/ua-stop-words.txt") as stop_words_file:
    ua_stop_words = stop_words_file.read()

ua_stop_words = ua_stop_words.split("\n")

In [6]:
df = pd.read_csv("../data/preprocessed/ua_sentiment_dataset__amazon__preprocessed.csv")

In [23]:
df = df.dropna()

In [24]:
label_encoder = LabelEncoder()
df["encoded-labels"] = label_encoder.fit_transform(df["label"])

In [25]:
max_features = 10000
max_text_len = 100

In [29]:
tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(df["preprocessed-text"].tolist())
sequences = tokenizer.texts_to_sequences(df["preprocessed-text"])

data = pad_sequences(sequences, maxlen=max_text_len)

In [30]:
X_train, X_test, y_train, y_test = train_test_split(data, df["encoded-labels"], test_size=0.2, random_state=42)

In [31]:
model = models.Sequential()
model.add(layers.Embedding(max_features, 128, input_length=max_text_len))
model.add(layers.LSTM(64, return_sequences=True))
model.add(layers.LSTM(64))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation="sigmoid"))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

2023-08-20 14:50:26.675739: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-08-20 14:50:26.676977: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-08-20 14:50:26.677627: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

In [32]:
model.fit(data, df["encoded-labels"], epochs=10, batch_size=32)

Epoch 1/10


2023-08-20 14:50:51.129841: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-08-20 14:50:51.130788: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-08-20 14:50:51.131431: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus



2023-08-20 14:51:39.389597: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-08-20 14:51:39.390487: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-08-20 14:51:39.391080: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7ff0af6791b0>

In [34]:
accuracy = model.evaluate(X_test, y_test)[1]

print("Model accuracy: ", accuracy)

Model accuracy:  0.8959282040596008


# It works awful...

In [39]:
test_texts = [
    "Дуже гарний телефон",
    "Дуже поганий телефон",
    "Дуже гарний телефон, але батарея швидко розряджається",
    "Росія атакувала міст",
    "вибух у чернігові",
]

test_sequences = tokenizer.texts_to_sequences(test_texts)
test_data = pad_sequences(test_sequences, maxlen=max_text_len)

predictions = model.predict(test_data)

for i, prediction in enumerate(predictions):
    print(test_texts[i], " - ", label_encoder.classes_[int(round(prediction[0]))])

Дуже гарний телефон  -  pos
Дуже поганий телефон  -  neg
Дуже гарний телефон, але батарея швидко розряджається  -  pos
Росія атакувала міст  -  pos
вибух у чернігові  -  pos


In [41]:
save_model(model, "../compiled-models/ua-sentiment__amazon__Tf/model.h5 ")

import pickle
with open("../compiled-models/ua-sentiment__amazon__Tf/tokenizer.pickle", "wb") as tokenizer_file:
    pickle.dump(tokenizer, tokenizer_file)

2023-08-20 19:43:43.930549: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-08-20 19:43:43.931681: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-08-20 19:43:43.932298: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

INFO:tensorflow:Assets written to: ../compiled-models/ua-sentiment__amazon__Tf/model.h5 /assets


INFO:tensorflow:Assets written to: ../compiled-models/ua-sentiment__amazon__Tf/model.h5 /assets
