In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score
from gensim.models import Word2Vec
import nltk
from nltk.corpus import stopwords
from keras.models import Sequential
from keras.layers import Dense

# Load the dataset
data = pd.read_csv(r"/content/emotions.csv")

# Data preprocessing
X = data['text']
y = data['label']

# Tokenization and stopwords
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

def clean_and_tokenize(text):
    return [word for word in text.split() if word.lower() not in stop_words]

X_tokenized = X.apply(clean_and_tokenize)

# Train Word2Vec model
model_Word2Vec = Word2Vec(sentences=X_tokenized, vector_size=100, window=5, min_count=1, workers=4)

# Function to create sentence vectors by averaging word vectors
def sentence_vector(sentence):
    vectors = [model_Word2Vec.wv[word] for word in sentence if word in model_Word2Vec.wv]
    if vectors:
        return np.mean(vectors, axis=0)
    else:
        return np.zeros(model_Word2Vec.vector_size)

# Convert text data to fixed-length vectors
X_features = np.array([sentence_vector(sentence) for sentence in X_tokenized])

# Convert labels to one-hot encoding
num_classes = len(data['label'].unique())
y_encoded = pd.get_dummies(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_features, y_encoded, test_size=0.2, random_state=42)

# Build the model
model = Sequential()
model.add(Dense(128, input_shape=(X_train.shape[1],), activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, validation_split=0.2, epochs=10, batch_size=32)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

# Predict labels
y_pred_prob = model.predict(X_test)
y_pred = np.argmax(y_pred_prob, axis=1)
y_true = np.argmax(np.array(y_test), axis=1)

# Calculate precision, recall, and F1-score
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')
loss, accuracy = model.evaluate(X_test, y_test)

print('Precision:', precision)
print('Recall:', recall)
print('F1-score:', f1)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

model.save("emotions_ffn.keras")

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 0.6024633049964905
Test Accuracy: 0.765888512134552
Precision: 0.7637832722900731
Recall: 0.7658885343441856
F1-score: 0.7634821096843183
Test Loss: 0.6024633049964905
Test Accuracy: 0.765888512134552


In [3]:
model.save("emotions_ffn.keras")

In [6]:
from keras.models import load_model

model = load_model('/content/emotions_ffn.keras')

In [7]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

Test Loss: 0.6024633049964905
Test Accuracy: 0.765888512134552


In [8]:
# Calculate precision, recall, and F1-score
precision = precision_score(y_true, y_pred, average='macro')
recall = recall_score(y_true, y_pred, average='macro')
f1 = f1_score(y_true, y_pred, average='macro')

print('Precision:', precision)
print('Recall:', recall)
print('F1-score:', f1)

Precision: 0.7233099882150197
Recall: 0.6942084726868395
F1-score: 0.7068180534400023


In [9]:
# Predict labels
y_pred = model.predict(X_test)
y_pred = np.argmax(y_pred, axis=1)
y_true = np.argmax(np.array(y_test), axis=1)

# Calculate precision, recall, and F1-score
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')

print('Precision:', precision)
print('Recall:', recall)
print('F1-score:', f1)

Precision: 0.7637832722900731
Recall: 0.7658885343441856
F1-score: 0.7634821096843183
