In [8]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping



Epoch 1/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 286ms/step - loss: 2.5420 - sparse_categorical_accuracy: 0.2258 - val_loss: 2.2513 - val_sparse_categorical_accuracy: 0.2449
Epoch 2/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 132ms/step - loss: 2.0794 - sparse_categorical_accuracy: 0.2633 - val_loss: 1.9026 - val_sparse_categorical_accuracy: 0.5673
Epoch 3/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 156ms/step - loss: 1.5854 - sparse_categorical_accuracy: 0.5563 - val_loss: 1.3576 - val_sparse_categorical_accuracy: 0.6816
Epoch 4/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 149ms/step - loss: 1.0703 - sparse_categorical_accuracy: 0.6591 - val_loss: 1.0664 - val_sparse_categorical_accuracy: 0.7551
Epoch 5/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 143ms/step - loss: 0.8465 - sparse_categorical_accuracy: 0.7924 - val_loss: 0.8932 - val_sparse_categorical_accuracy: 0.7388

In [None]:
data = pd.read_json('C:/Users/Ratan/Desktop/Assessment/ecf9c1e7ab7374f18e4400b7a3d2a161-f94652f217eeca83e36dab9d08727caf79ebdecf/dataset.json')

external_statuses = data['externalStatus']
internal_statuses = data['internalStatus']

In [None]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(external_statuses)
encoded_sequences = tokenizer.texts_to_sequences(external_statuses)
max_length = max([len(seq) for seq in encoded_sequences])

padded_sequences = pad_sequences(encoded_sequences, maxlen=max_length, padding='post')


In [None]:
label_encoder = LabelEncoder()
encoded_internal_statuses = label_encoder.fit_transform(internal_statuses)

X_train, X_test, y_train, y_test = train_test_split(padded_sequences, encoded_internal_statuses, test_size=0.2, random_state=42)


In [None]:
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index)+1, output_dim=100, input_length=max_length))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(64))
model.add(Dense(len(label_encoder.classes_), activation='softmax'))  

In [None]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history = model.fit(X_train, y_train, epochs=20, batch_size=64, validation_data=(X_test, y_test), callbacks=[early_stopping])