In [18]:
import tensorflow as tf 
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import numpy as np 
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [3]:
texts = ["This is a great product", "I love this item", "Not satisfied with the quality"]
categories = ["Electronics", "Clothing", "Electronics"]

In [33]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
text_sequences = tokenizer.texts_to_sequences(texts)
max_sequence_length = max([len(seq) for seq in text_sequences])
padded_sequences = pad_sequences(text_sequences, maxlen=max_sequence_length, padding='post')
label_encoder = LabelEncoder()
encoded_intents = label_encoder.fit_transform(categories)

In [37]:
padded_sequences = np.array(padded_sequences)
encoded_categories = np.array(encoded_categories)

In [39]:
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, encoded_categories, test_size=0.2, random_state=42)

In [40]:
input_dim = len(tokenizer.word_index) + 1  # Vocabulary size
output_dim = len(label_encoder.classes_)  # Number of unique categories
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim, 16, input_length=max_sequence_length),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(output_dim, activation='softmax')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [41]:
model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.5000 - loss: 0.6924 - val_accuracy: 1.0000 - val_loss: 0.6823
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step - accuracy: 0.5000 - loss: 0.6905 - val_accuracy: 1.0000 - val_loss: 0.6836
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step - accuracy: 0.5000 - loss: 0.6881 - val_accuracy: 1.0000 - val_loss: 0.6849
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step - accuracy: 0.5000 - loss: 0.6857 - val_accuracy: 1.0000 - val_loss: 0.6863
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step - accuracy: 0.5000 - loss: 0.6833 - val_accuracy: 1.0000 - val_loss: 0.6869
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step - accuracy: 1.0000 - loss: 0.6810 - val_accuracy: 1.0000 - val_loss: 0.6874
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x14da6f482d0>

In [42]:
def predict_category(text):
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=max_sequence_length, padding='post')
    prediction = model.predict(padded_sequence)
    predicted_category_index = np.argmax(prediction)
    predicted_category = label_encoder.inverse_transform([predicted_category_index])[0]
    return predicted_category

In [43]:
user_input = "This product works great"
predicted_category = predict_category(user_input)
print("Predicted category:", predicted_category)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
Predicted category: Clothing
