In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

# Load the dataset
data = pd.read_csv('text.csv')  

# Preprocess the data (assuming 'text' is the column with text data and 'emotion' is the label)
data['text'] = data['text'].str.lower()  # Lowercase
data['text'] = data['text'].str.replace('[^\w\s]', '')  # Remove punctuation

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data['text'], data['label'], test_size=0.2, random_state=42)

#('Sadness',0)
#('Joy',1)
#('Love',2)
#('Anger',3)
#('Fear',4)
#('Surprise',5)

# Vectorize the text data
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train a logistic regression model
model = LogisticRegression()
model.fit(X_train_tfidf, y_train)

# Make predictions
y_pred = model.predict(X_test_tfidf)

# Evaluate the model
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.94      0.94      0.94     24201
           1       0.91      0.93      0.92     28164
           2       0.81      0.76      0.78      6929
           3       0.90      0.90      0.90     11441
           4       0.86      0.84      0.85      9594
           5       0.78      0.71      0.74      3033

    accuracy                           0.90     83362
   macro avg       0.87      0.85      0.86     83362
weighted avg       0.90      0.90      0.90     83362



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [7]:
# Assuming you have already trained your model and have the vectorizer
import pandas as pd

# Example new text for prediction
new_texts = [
    "I am so happy with my new job!",
    "This is the worst experience I've ever had.",
    "I feel confused about the situation.",
    "I am satisfied with the service."
]

# Preprocess the new text
new_texts_processed = [text.lower().replace('[^\w\s]', '') for text in new_texts]

# Vectorize the new text using the same vectorizer
new_texts_tfidf = vectorizer.transform(new_texts_processed)

# Make predictions
predictions = model.predict(new_texts_tfidf)

# Display the predictions
for text, sentiment in zip(new_texts, predictions):
    print(f"Text: {text} | Predicted Sentiment: {sentiment}")

Text: I am so happy with my new job! | Predicted Sentiment: 1
Text: This is the worst experience I've ever had. | Predicted Sentiment: 1
Text: I feel confused about the situation. | Predicted Sentiment: 4
Text: I am satisfied with the service. | Predicted Sentiment: 1
