In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
import nltk
from nltk.corpus import stopwords
import string

# Download NLTK stopwords if not already downloaded
nltk.download('stopwords')

# Load the dataset
df = pd.read_csv("conversational_ai_sentiment_analysis.csv")

# Check the first few rows of the data
print(df.head())

# Text preprocessing function
def preprocess_text(text):
    # Convert to lowercase
    text = text.lower()
    # Remove punctuation
    text = text.translate(str.maketrans('', '', string.punctuation))
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    text = " ".join([word for word in text.split() if word not in stop_words])
    return text

# Apply the preprocessing to user messages
df['cleaned_message'] = df['User_Message'].apply(preprocess_text)

# Split the data into training and test sets
X = df['cleaned_message']
y = df['Sentiment_Label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert text to numerical data using TF-IDF Vectorizer
vectorizer = TfidfVectorizer(max_features=1000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train a Logistic Regression model
model = LogisticRegression()
model.fit(X_train_tfidf, y_train)

# Make predictions on the test data
y_pred = model.predict(X_test_tfidf)

# Print classification report and accuracy score
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


  Conversation_ID                   Timestamp  \
0          conv_1  2024-09-18 03:14:11.237019   
1          conv_2  2024-09-14 19:52:11.237040   
2          conv_3  2024-09-13 14:11:11.237044   
3          conv_4  2024-09-16 20:00:11.237047   
4          conv_5  2024-09-14 16:33:11.237050   

                           User_Message  \
0        Can you help me with my order?   
1                    I'm so frustrated.   
2  I need to speak to a representative.   
3        Can you help me with my order?   
4                    This app is great!   

                             AI_Response Sentiment_Label  Confidence_Score  
0  Sure, let me check your order status.        Positive              0.84  
1               We’re glad to hear that!        Negative              0.77  
2  Sure, let me check your order status.        Negative              0.86  
3            We apologize for the delay.        Positive              0.90  
4                        You're welcome!         Neutral     

In [8]:
# Function to preprocess the input message
def preprocess_and_predict(message, vectorizer, model):
    # Preprocess the message (same as training data)
    message_cleaned = preprocess_text(message)

    # Convert the message into the TF-IDF format
    message_tfidf = vectorizer.transform([message_cleaned])

    # Predict the sentiment using the trained model
    prediction = model.predict(message_tfidf)

    # Output the prediction
    return prediction[0]

# Example: Predict sentiment for a custom message
custom_message ="This app is great."

# Call the prediction function
predicted_sentiment = preprocess_and_predict(custom_message, vectorizer, model)

# Print the result
print(f"The predicted sentiment for the message '{custom_message}' is: {predicted_sentiment}")


The predicted sentiment for the message 'This app is great.' is: Positive


In [9]:
# Example: Predict sentiment for a custom message
custom_message ="I'm having a terrible experience."

# Call the prediction function
predicted_sentiment = preprocess_and_predict(custom_message, vectorizer, model)

# Print the result
print(f"The predicted sentiment for the message '{custom_message}' is: {predicted_sentiment}")

The predicted sentiment for the message 'I'm having a terrible experience.' is: Neutral
