In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score


In [6]:
# Load the dataset
df = pd.read_csv('/content/drive/MyDrive/Compozent /Emotion_classify_Data.csv')


In [7]:
# Display the first few rows of the dataset
print(df.head())

# Display basic information about the dataset
print(df.info())


                                             Comment Emotion
0  i seriously hate one subject to death but now ...    fear
1                 im so full of life i feel appalled   anger
2  i sit here to write i start to dig out my feel...    fear
3  ive been really angry with r and i feel like a...     joy
4  i feel suspicious if there is no one outside l...    fear
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5937 entries, 0 to 5936
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Comment  5937 non-null   object
 1   Emotion  5937 non-null   object
dtypes: object(2)
memory usage: 92.9+ KB
None


In [8]:
#Text Vectorization
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['Comment'], df['Emotion'], test_size=0.2, random_state=42)

# Create a pipeline with a TF-IDF vectorizer and a support vector machine (SVM) classifier
model = make_pipeline(TfidfVectorizer(), SVC())

# Train the model
model.fit(X_train, y_train)

# Evaluate the model
predictions = model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)

print(f'Accuracy: {accuracy:.2f}')
print('\nClassification Report:\n', classification_report(y_test, predictions))


Accuracy: 0.91

Classification Report:
               precision    recall  f1-score   support

       anger       0.90      0.92      0.91       392
        fear       0.96      0.87      0.91       416
         joy       0.89      0.95      0.92       380

    accuracy                           0.91      1188
   macro avg       0.92      0.92      0.91      1188
weighted avg       0.92      0.91      0.91      1188



In [9]:
#Sentimental analysis
# Assuming you want to perform sentiment analysis
sentiment_model = make_pipeline(CountVectorizer(), MultinomialNB())
sentiment_model.fit(X_train, y_train)
sentiment_predictions = sentiment_model.predict(X_test)

print('Sentiment Accuracy:', accuracy_score(y_test, sentiment_predictions))


Sentiment Accuracy: 0.8947811447811448


In [10]:
# Cross-validation for model evaluation
cv_scores = cross_val_score(model, df['Comment'], df['Emotion'], cv=5)
print('Cross-validation Mean Accuracy:', cv_scores.mean())


Cross-validation Mean Accuracy: 0.9125810194049453


In [12]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense

# Tokenize the comments
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['Comment'])
X_sequence = tokenizer.texts_to_sequences(df['Comment'])
X_padded = pad_sequences(X_sequence, maxlen=50)  # Adjust maxlen based on the maximum sequence length in your data

# Define an LSTM model
lstm_model = Sequential()
lstm_model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=100, input_length=50))
lstm_model.add(LSTM(100))
lstm_model.add(Dense(3, activation='softmax'))  # Adjust the number of units based on the number of classes
lstm_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the LSTM model
lstm_model.fit(X_padded, pd.get_dummies(df['Emotion']), epochs=5, batch_size=32, validation_split=0.2)

# Evaluate the LSTM model
X_test_sequence = tokenizer.texts_to_sequences(X_test)
X_test_padded = pad_sequences(X_test_sequence, maxlen=50)
accuracy_lstm = lstm_model.evaluate(X_test_padded, pd.get_dummies(y_test))[1]

print(f'LSTM Model Accuracy: {accuracy_lstm:.2f}')


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
LSTM Model Accuracy: 0.98
