In [31]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Embedding, LSTM, Dense, Bidirectional, GlobalMaxPooling1D, Dropout
from keras.models import Sequential
import json

In [32]:
def load_data_from_csv(file_path):
    data = pd.read_csv(file_path)
    texts = data['text'].tolist()
    labels = data['label'].tolist()
    return texts, labels

train_texts, train_labels = load_data_from_csv('sentiment_train.csv')
val_texts, val_labels = load_data_from_csv('sentiment_validation.csv')
test_texts, test_labels = load_data_from_csv('sentiment_test.csv')

In [33]:
train_labels = np.array(train_labels)
val_labels = np.array(val_labels)

In [34]:
max_words = 30000 
max_sequence_length = 100 
embedding_dim = 100 
lstm_units = 64 
dropout_rate = 0.5  
num_classes = 3 

In [35]:
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(train_texts)

train_sequences = tokenizer.texts_to_sequences(train_texts)
val_sequences = tokenizer.texts_to_sequences(val_texts)
test_sequences = tokenizer.texts_to_sequences(test_texts)

In [36]:
train_sequences = pad_sequences(train_sequences, maxlen=max_sequence_length)
val_sequences = pad_sequences(val_sequences, maxlen=max_sequence_length)
test_sequences = pad_sequences(test_sequences, maxlen=max_sequence_length)

In [37]:
model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=embedding_dim, input_length=max_sequence_length))
model.add(Bidirectional(LSTM(lstm_units, return_sequences=True)))
model.add(GlobalMaxPooling1D())
model.add(Dropout(dropout_rate))
model.add(Dense(128, activation='relu'))
model.add(Dropout(dropout_rate))
model.add(Dense(num_classes, activation='softmax'))

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_3 (Embedding)     (None, 100, 100)          3000000   
                                                                 
 bidirectional_3 (Bidirecti  (None, 100, 128)          84480     
 onal)                                                           
                                                                 
 global_max_pooling1d_3 (Gl  (None, 128)               0         
 obalMaxPooling1D)                                               
                                                                 
 dropout_6 (Dropout)         (None, 128)               0         
                                                                 
 dense_6 (Dense)             (None, 128)               16512     
                                                                 
 dropout_7 (Dropout)         (None, 128)              

In [38]:
model.fit(train_sequences, train_labels, epochs=10, batch_size=64, validation_data=(val_sequences, val_labels))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x19c626464d0>

In [39]:
model.save("emotion_classification_model_v2.h5")

# Save the tokenizer's word index and configuration
tokenizer_word_index = tokenizer.word_index
tokenizer_config = tokenizer.get_config()

with open("tokenizer_word_index.json", "w") as json_file:
    json.dump(tokenizer_word_index, json_file)

with open("tokenizer_config.json", "w") as json_file:
    json.dump(tokenizer_config, json_file)

  saving_api.save_model(
