In [46]:
import numpy as np
import pandas as pd

In [47]:
data = np.array(pd.read_csv("Dataset/Text_Emotion_Data.csv"))
stopwords = np.array(pd.read_csv("Dataset/stopwords.txt", sep=" ", header=None)).flatten()

In [48]:
X = data[:, 0]
y = data[:, 1]

In [49]:
import re

def remove_none_alpha(x):
    regex = re.compile('[^a-zA-Z ]')
    return regex.sub('', x)

In [50]:
stopwords = [remove_none_alpha(w) for w in stopwords]

In [51]:
X_seq = []
for sentence in X:
    words_seq = remove_none_alpha(sentence).lower().split(" ")
    words_seq = [w for w in words_seq if len(w) > 2]
    words_seq = [w for w in words_seq if w not in stopwords]
    X_seq.append(words_seq)

In [52]:
max_len = len(max(X_seq, key=len))

# create a new list of lists with the desired size, padding with empty strings
sequences = [['' for _ in range(max_len)] for _ in range(len(X_seq))]

# copy the elements from the original lists into the new list, padding with empty strings as needed
for i, sequence in enumerate(X_seq):
    sequences[i][:len(sequence)] = sequence

In [53]:
my_list = [1, 2, 3]
threshold = 6

my_list = [0] * (threshold - len(my_list)) + my_list

print(my_list)

[0, 0, 0, 1, 2, 3]


In [54]:
import numpy as np

def one_hot_encode(sequence, word_to_idx):
    num_words = len(word_to_idx)
    encoding = np.zeros((len(sequence), num_words))
    for i, word in enumerate(sequence):
        if word in word_to_idx:
            encoding[i, word_to_idx[word]] = 1
    return encoding

def one_hot_encode2(sequence, word_to_idx, max_bit_num):
    encoding = np.zeros((len(sequence), max_bit_num))
    for i, word in enumerate(sequence):
        if word in word_to_idx:            
            binary_string = bin(word_to_idx[word])[2:]  # exclude the "0b" prefix
            binary_array = np.array([int(digit) for digit in binary_string])
            binary_array = np.pad(binary_array, (0, max_bit_num - len(binary_array)), 'constant')

            encoding[i, :] = binary_array
    return encoding

unique_words = list(set([word for sequence in sequences for word in sequence]))
word_to_idx = {word: idx for idx, word in enumerate(unique_words)}

max_bit_num = 1
while(True):
    if(2**max_bit_num > len(unique_words)): break
    max_bit_num += 1

encoded_sequences = []
for sequence in sequences:
    encoded_sequences.append(one_hot_encode2(sequence, word_to_idx, max_bit_num))

encoded_sequences = np.array(encoded_sequences)

In [55]:
X_train = encoded_sequences[:-150]
X_test = encoded_sequences[-150:]
y_train_str = y[:-150]
y_test_str = y[-150:]

In [56]:
from sklearn import preprocessing

le = preprocessing.LabelEncoder()
y_train = le.fit_transform(y_train_str)
y_test = le.fit_transform(y_test_str)

In [57]:
y_train

array([2, 4, 2, ..., 1, 2, 3])

In [70]:
import tensorflow as tf

# Define the Elman network architecture
input_dim = 13
hidden_dim = 20
output_dim = 1
num_steps = 19

inputs = tf.keras.layers.Input(shape=(num_steps, input_dim))
hidden_layer = tf.keras.layers.SimpleRNN(hidden_dim, activation='sigmoid')(inputs)
outputs = tf.keras.layers.Dense(output_dim, activation='linear')(hidden_layer)

# Define the model
model = tf.keras.Model(inputs=inputs, outputs=outputs)

# Compile the model
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
model.compile(optimizer=optimizer, loss='mse', metrics=[tf.metrics.Accuracy()])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1a6d87b8910>

In [71]:
import numpy as np

# Assuming you have test data and labels, x_test and y_test respectively

# Evaluate the model on the test data
loss = model.evaluate(X_test, y_test, verbose=0)

# Get the model's predictions on the test data
predictions = model.predict(X_test, verbose=0)

# Convert predictions and labels to class indices
predicted_labels = np.argmax(predictions, axis=1)

# Compute accuracy
accuracy = np.mean(predicted_labels == y_test)

print("Accuracy:", accuracy)


Accuracy: 0.19333333333333333
