In [19]:
!pip install tensorflow pandas scikit-learn numpy



In [20]:
import numpy as np
import pandas as pd
import pickle as pk
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [21]:
### This is my toy data set
data = {
    'text': [
        "My app is broken!",
        "I can't log in",
        "How do I reset my password?",
        "Where is my order?",
        "What is the status of my shipment?",
        "I need a refund for my order",
        "This is not working, I want my money back",
        "The app is so slow and buggy",
        "Forgot my login info",
        "When will my package arrive?"
    ],
    'intent': [
        "LOGIN_ISSUE",
        "LOGIN_ISSUE",
        "PASSWORD_RESET",
        "ORDER_STATUS",
        "ORDER_STATUS",
        "REFUND_REQUEST",
        "REFUND_REQUEST",
        "LOGIN_ISSUE",
        "PASSWORD_RESET",
        "ORDER_STATUS"
    ]
}

df = pd.DataFrame(data)
print(df)

                                        text          intent
0                          My app is broken!     LOGIN_ISSUE
1                             I can't log in     LOGIN_ISSUE
2                How do I reset my password?  PASSWORD_RESET
3                         Where is my order?    ORDER_STATUS
4         What is the status of my shipment?    ORDER_STATUS
5               I need a refund for my order  REFUND_REQUEST
6  This is not working, I want my money back  REFUND_REQUEST
7               The app is so slow and buggy     LOGIN_ISSUE
8                       Forgot my login info  PASSWORD_RESET
9               When will my package arrive?    ORDER_STATUS


In [25]:
MAX_WORDS = 1000
MAX_LEN = 20

tokenizer = Tokenizer(num_words=MAX_WORDS, oov_token="<OOV>")
tokenizer.fit_on_texts(df['text'])

X_sequences = tokenizer.texts_to_sequences(df['text'])
X_padded = pad_sequences(X_sequences, maxlen=MAX_LEN, padding='post', truncating='post')

encoder = LabelEncoder()
y_encoded = encoder.fit_transform(df[['intent']])

print("Text data shape:", X_padded.shape)
print("Label data shape:", y_encoded.shape)


nums = len(encoder.classes_)

Text data shape: (10, 20)
Label data shape: (10,)


  y = column_or_1d(y, warn=True)


In [28]:
model = Sequential()
model.add(Embedding(input_dim=MAX_WORDS, output_dim=16, input_length=MAX_LEN))
model.add(GRU(units=32, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=nums, activation='softmax'))

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [29]:
model.fit(X_padded, y_encoded, epochs=100, batch_size=2, verbose=2)

Epoch 1/100
5/5 - 1s - 256ms/step - accuracy: 0.1000 - loss: 1.3912
Epoch 2/100
5/5 - 0s - 11ms/step - accuracy: 0.2000 - loss: 1.3863
Epoch 3/100
5/5 - 0s - 10ms/step - accuracy: 0.6000 - loss: 1.3813
Epoch 4/100
5/5 - 0s - 10ms/step - accuracy: 0.4000 - loss: 1.3772
Epoch 5/100
5/5 - 0s - 10ms/step - accuracy: 0.1000 - loss: 1.3797
Epoch 6/100
5/5 - 0s - 12ms/step - accuracy: 0.4000 - loss: 1.3754
Epoch 7/100
5/5 - 0s - 12ms/step - accuracy: 0.4000 - loss: 1.3720
Epoch 8/100
5/5 - 0s - 10ms/step - accuracy: 0.3000 - loss: 1.3773
Epoch 9/100
5/5 - 0s - 10ms/step - accuracy: 0.2000 - loss: 1.3708
Epoch 10/100
5/5 - 0s - 11ms/step - accuracy: 0.2000 - loss: 1.3674
Epoch 11/100
5/5 - 0s - 10ms/step - accuracy: 0.3000 - loss: 1.3710
Epoch 12/100
5/5 - 0s - 11ms/step - accuracy: 0.3000 - loss: 1.3800
Epoch 13/100
5/5 - 0s - 10ms/step - accuracy: 0.3000 - loss: 1.3735
Epoch 14/100
5/5 - 0s - 10ms/step - accuracy: 0.2000 - loss: 1.3617
Epoch 15/100
5/5 - 0s - 11ms/step - accuracy: 0.1000 - l

<keras.src.callbacks.history.History at 0x7f109fff0830>

In [30]:
model.save("support_model.keras")

In [31]:
with open('tokenizer.pickle', 'wb') as handle:
    pk.dump(tokenizer, handle, protocol=pk.HIGHEST_PROTOCOL)
with open('encoder.pickle', 'wb') as handle:
    pk.dump(encoder, handle, protocol=pk.HIGHEST_PROTOCOL)


In [32]:
model.summary()