In [None]:
from google.colab import drive
drive.mount('/content/drive' , force_remount=True)

Mounted at /content/drive


In [1]:
import tensorflow as tf
import sklearn.model_selection as sk
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pandas as pd

In [7]:
# ds_path = "/content/drive/MyDrive/Datasets/products.csv"
ds_path = "./dataset/products.csv"
model_save_path = "./model"
EPOCHS = 10
VOCAB_SIZE = 2000
BATCH_SIZE = 64
MAX_SEQUENCE_LENGTH = 50
EMBEDDING_DIM = 64

In [3]:
df = pd.read_csv(ds_path)
df.category = df.category.map(lambda x: x.split("|")[0].replace("-", " "))
labels = list(pd.get_dummies(df.category))
label_count = len(labels)
print("Label count:", label_count)
titles = df.title.values.tolist()

Label count: 1189


In [4]:
tokenizer = Tokenizer(num_words=VOCAB_SIZE, oov_token="[OOV]")
tokenizer.fit_on_texts(titles)
X = pad_sequences(tokenizer.texts_to_sequences(titles), maxlen=MAX_SEQUENCE_LENGTH)
print("Shape of data tensor:", X.shape)
Y = pd.get_dummies(df.category).values
print("Shape of label tensor:", Y.shape)
X_train, X_test, Y_train, Y_test = sk.train_test_split(
    X, Y, test_size=0.2, random_state=1
)

Shape of data tensor: (99972, 50)
Shape of label tensor: (99972, 1189)


In [None]:
model = tf.keras.Sequential(
    [
        tf.keras.layers.Embedding(
            input_dim=VOCAB_SIZE, output_dim=EMBEDDING_DIM, input_length=X.shape[1]
        ),
        tf.keras.layers.Dropout(.2),
        tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(EMBEDDING_DIM, dropout=0.2, recurrent_dropout=0.2)
        ),
        tf.keras.layers.Dense(32),
        tf.keras.layers.Dense(label_count, activation="softmax"),
    ]
)

In [None]:
print("Compiling the model...")
model.compile(
    loss=tf.keras.losses.CategoricalCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(),
    metrics=["accuracy"],
)
print("Compiled")

Compiling the model...
Compiled


In [10]:
history = model.fit(
    X_train, Y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=0.2
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [11]:
print("Saving model...")
model.save(model_save_path)
print("Saved")

Saving model...
INFO:tensorflow:Assets written to: /model/assets




Saved


In [12]:
loss, accuracy = model.evaluate(X_test, Y_test)
print("Test set:\n Loss: {:0.3f}\n Accuracy: {:0.3f}".format(loss, accuracy))

Test set:
 Loss: 0.780
 Accuracy: 0.862


In [None]:
saved_model = tf.keras.models.load_model(model_save_path)

In [42]:
def predict(text , model):
    inp = [text]
    seq = tokenizer.texts_to_sequences(inp)
    padded = pad_sequences(seq , maxlen=MAX_SEQUENCE_LENGTH)
    prediction = labels[np.argmax(model.predict(padded))]
    print(prediction)


text= input("Your product name: ")

predict(text , model)
# predict(text , saved_model)

Your product name: ماوس بی سیم شیائومی مدل WXSMSBMW02
Mouse
