In [None]:
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('punkt_tab')
df = pd.read_csv("/content/customer_reviews_without_sentiment.csv", encoding='latin-1')
def assign(Rating):
    if Rating == 1 or Rating == 2:
        return "Negative"
    elif Rating == 3:
        return "Neutral"
    else:
        return "Positive"

df['Sentimentlabel'] = df['Rating'].apply(assign)
df = df[["Review_Text", "Sentimentlabel"]]
stop_words = set(stopwords.words('english'))
lemma = WordNetLemmatizer()
def clean(text):
    if text is None:
        return ''
    text = re.sub(r'https?://\S+', '', text)
    text = re.sub('[^a-zA-Z]', ' ', text)
    text = text.lower()
    text = word_tokenize(text)
    text = [lemma.lemmatize(word, pos='v') for word in text if word not in stop_words and len(word) > 2]
    return ' '.join(text)

df['customerfeedback'] = df['Review_Text'].apply(clean)
df = df.drop('Review_Text', axis=1)
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['customerfeedback'])
y = df['Sentimentlabel']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = MultinomialNB()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Model Accuracy:", accuracy_score(y_test, y_pred))


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


Model Accuracy: 1.0


In [None]:
df

Unnamed: 0,Sentimentlabel,customerfeedback
0,Neutral,mix feelings service quality
1,Neutral,mix feelings service quality
2,Neutral,claim process could faster
3,Neutral,service okay great bad
4,Neutral,claim process could faster
5,Positive,easy policy purchase great benefit
6,Negative,premium increase without notice
7,Positive,easy policy purchase great benefit
8,Neutral,service okay great bad
9,Neutral,average experience policy purchase


In [None]:
def predict_sentiment(user_input):
    processed_text = clean(user_input)
    vectorized_text = vectorizer.transform([processed_text])
    sentiment = model.predict(vectorized_text)[0]
    return sentiment
while True:
    user_input = input("Enter customer feedback (or type 'exit' to stop): ")
    if user_input.lower() == 'exit':
        break
    print("Predicted Sentiment:", predict_sentiment(user_input))


Enter customer feedback (or type 'exit' to stop): easy policy purchase great benefit
Predicted Sentiment: Positive
Enter customer feedback (or type 'exit' to stop): exit
