In [1]:
import pandas as pd
import re
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import pickle

# Load Dataset
input_path = "isear.csv"
data = pd.read_csv(input_path)
data.dropna(inplace=True)

# Text Cleaning Function
def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    return text

data['Clean_Text'] = data['Text'].apply(clean_text)

# Split Data into Train and Test
X_train, X_test, y_train, y_test = train_test_split(data['Clean_Text'], data['Emotion'], test_size=0.2, random_state=42)

# Vectorize Text Data
vectorizer = TfidfVectorizer(ngram_range=(1, 2), max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Train Logistic Regression Model
model = LogisticRegression(max_iter=1000)
model.fit(X_train_vec, y_train)

# Evaluate Model
y_pred = model.predict(X_test_vec)
print(classification_report(y_test, y_pred))

# Save Model & Vectorizer in One File
model_data = {'model': model, 'vectorizer': vectorizer}

with open('emotion_model.pkl', 'wb') as f:
    pickle.dump(model_data, f)

print("Model and Vectorizer Saved as 'emotion_model.pkl'!")

# Function to Predict Emotion with Confidence Level
def predict_emotion(text):
    with open("emotion_model.pkl", "rb") as f:
        model_data = pickle.load(f)

    model = model_data['model']
    vectorizer = model_data['vectorizer']

    # Transform text and predict
    text_vec = vectorizer.transform([clean_text(text)])
    probabilities = model.predict_proba(text_vec)[0]  # Get probabilities
    emotion_index = probabilities.argmax()  # Get the highest probability index
    emotion = model.classes_[emotion_index]  # Get the corresponding emotion
    confidence = probabilities[emotion_index]  # Get confidence level

    return emotion, confidence

# Example Usage
new_text = "I am feeling very happy today"
emotion, confidence = predict_emotion(new_text)
print(f"Predicted Emotion: {emotion} (Confidence: {confidence:.2f})")




              precision    recall  f1-score   support

       anger       0.46      0.53      0.49       208
     disgust       0.57      0.65      0.61       194
        fear       0.72      0.72      0.72       237
       guilt       0.55      0.52      0.53       229
         joy       0.69      0.74      0.72       211
     sadness       0.66      0.65      0.65       201
       shame       0.54      0.40      0.46       215

    accuracy                           0.60      1495
   macro avg       0.60      0.60      0.60      1495
weighted avg       0.60      0.60      0.60      1495

Model and Vectorizer Saved as 'emotion_model.pkl'!
Predicted Emotion: joy (Confidence: 0.85)


In [2]:
# Taking User Input
mytext = input("Enter the text: ")
emotion, confidence = predict_emotion(mytext)
print(f"2nd Predicted Emotion: {emotion} (Confidence: {confidence:.2f})")

2nd Predicted Emotion: joy (Confidence: 0.85)
