In [41]:
import json
import numpy as np
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Embedding, GRU, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [9]:
# Step 1: Load and preprocess the training data
with open('traindata (2).json') as f:
    train_data = json.load(f)

# Remove unwanted classes from the training dataset
unwanted_classes = ['000', '200', 'B','A']
train_data = {key: value for key, value in train_data.items() if key not in unwanted_classes}

In [11]:
# Prepare the training data
train_texts = []
train_labels = []
for key, values in train_data.items():
    train_texts.extend(values)
    train_labels.extend([key] * len(values))

In [13]:
# Step 2: Load and preprocess the testing data
with open('testdata (2).json') as f:
    test_data = json.load(f)


In [14]:
# Remove unwanted classes from the testing dataset
test_data = {key: value for key, value in test_data.items() if key not in unwanted_classes}

In [15]:
# Prepare the testing data
test_texts = []
test_labels = []
for key, values in test_data.items():
    test_texts.extend(values)
    test_labels.extend([key] * len(values))

# Step 3: Preprocess the text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(train_texts)

train_sequences = tokenizer.texts_to_sequences(train_texts)
test_sequences = tokenizer.texts_to_sequences(test_texts)

max_sequence_length = max(len(sequence) for sequence in train_sequences)
train_padded = pad_sequences(train_sequences, maxlen=max_sequence_length)
test_padded = pad_sequences(test_sequences, maxlen=max_sequence_length)

label_to_id = {label: idx for idx, label in enumerate(set(train_labels))}
train_encoded = np.array([label_to_id[label] for label in train_labels])
test_encoded = np.array([label_to_id[label] for label in test_labels])


In [16]:
# Step 4: Build the model
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=100, input_length=max_sequence_length))
model.add(GRU(units=128))
model.add(Dense(units=len(label_to_id), activation='softmax'))

In [17]:
# Step 5: Compile and train the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(train_padded, train_encoded, epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x24dac1ccd90>

In [39]:
# Step 7: Save the trained model
model.save('gru_model.h5')
print('Model saved successfully.')


Model saved successfully.


In [42]:
# Step 8: Load the saved model
loaded_model = load_model('gru_model.h5')


In [43]:
# Step 9: Generate classification report
test_predictions = loaded_model.predict(test_padded)
test_predictions = np.argmax(test_predictions, axis=1)
target_names = list(label_to_id.keys())
classification_report_output = classification_report(test_encoded, test_predictions, target_names=target_names)
print('Classification Report:')
print(classification_report_output)


Classification Report:
              precision    recall  f1-score   support

         011       0.78      0.77      0.77       172
         013       0.84      0.66      0.74        41
         021       0.83      0.72      0.77       211
         028       0.86      0.87      0.86       211
         010       0.88      0.86      0.87       185
         005       0.90      0.81      0.85       211
         043       0.92      0.82      0.87        55
         033       0.89      0.78      0.83        32
         025       0.88      0.75      0.81       142
         037       0.93      0.92      0.93       249
         020       0.76      0.76      0.76       157
         022       0.65      0.62      0.63        42
         039       0.81      0.86      0.84        77
         024       0.84      0.82      0.83        62
         007       0.82      0.89      0.85       233
         034       0.91      0.97      0.94        31
         027       0.92      0.96      0.94        23
    

In [None]:
# Step 10: Predict the class from user input using the loaded model
while True:
    user_input = input("Enter a sentence (or 'exit' to quit): ")
    if user_input.lower() == 'exit':
        break

    # Preprocess the user input
    user_sequence = tokenizer.texts_to_sequences([user_input])
    user_padded = pad_sequences(user_sequence, maxlen=max_sequence_length)

    # Make predictions using the loaded model
    user_prediction = loaded_model.predict(user_padded)
    user_prediction = np.argmax(user_prediction, axis=1)[0]

    # Convert the predicted class index back to the label
    id_to_label = {v: k for k, v in label_to_id.items()}
    predicted_label = id_to_label[user_prediction]

    print('Predicted class:', predicted_label)

Enter a sentence (or 'exit' to quit): chicken fry brand
Predicted class: 029
