In [5]:
import pandas as pd
df=pd.read_csv("insurance_reviews.csv")
df

Unnamed: 0,Rating,Review_Text
0,4,Transparent policies and fair terms
1,5,Hassle-free renewal process and great guidance
2,2,Hard to reach support team and poor response
3,3,Expected a bit more from the customer service
4,5,Got extra coverage at a discounted price
...,...,...
95,4,Very satisfied with the overall experience
96,4,"Easy to file a claim, very smooth process"
97,3,Expected a bit more from the customer service
98,3,Average experience nothing special


In [3]:
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('punkt_tab')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\SukilPriya\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\SukilPriya\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\SukilPriya\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\SukilPriya\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [7]:
# Load the dataset
df=pd.read_csv("insurance_reviews.csv")

# Assign sentiment labels based on ratings
def assign(Rating):
    if Rating == 1 or Rating == 2:
        return "Negative"
    elif Rating == 3:
        return "Neutral"
    else:
        return "Positive"

df['Sentimentlabel'] = df['Rating'].apply(assign)

# Keep relevant columns
df = df[["Review_Text", "Sentimentlabel"]]

In [9]:
df

Unnamed: 0,Review_Text,Sentimentlabel
0,Transparent policies and fair terms,Positive
1,Hassle-free renewal process and great guidance,Positive
2,Hard to reach support team and poor response,Negative
3,Expected a bit more from the customer service,Neutral
4,Got extra coverage at a discounted price,Positive
...,...,...
95,Very satisfied with the overall experience,Positive
96,"Easy to file a claim, very smooth process",Positive
97,Expected a bit more from the customer service,Neutral
98,Average experience nothing special,Neutral


In [11]:
# Initialize stopwords and lemmatizer
stop_words = set(stopwords.words('english'))
lemma = WordNetLemmatizer()

# Define text cleaning function
def clean(text):
    if text is None:
        return ''
    text = re.sub(r'https?://\S+', '', text)
    text = re.sub('[^a-zA-Z]', ' ', text)
    text = text.lower()
    text = word_tokenize(text)
    text = [lemma.lemmatize(word, pos='v') for word in text if word not in stop_words and len(word) > 2]
    return ' '.join(text)

# Apply cleaning
df['customerfeedback'] = df['Review_Text'].apply(clean)
df = df.drop('Review_Text', axis=1)


In [13]:
df

Unnamed: 0,Sentimentlabel,customerfeedback
0,Positive,transparent policies fair term
1,Positive,hassle free renewal process great guidance
2,Negative,hard reach support team poor response
3,Neutral,expect bite customer service
4,Positive,get extra coverage discount price
...,...,...
95,Positive,satisfy overall experience
96,Positive,easy file claim smooth process
97,Neutral,expect bite customer service
98,Neutral,average experience nothing special


In [15]:
# Convert text to vectors
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['customerfeedback'])
y = df['Sentimentlabel']

# Split data into training/testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = MultinomialNB()
model.fit(X_train, y_train)

# Predict on test set
y_pred = model.predict(X_test)

# Accuracy
print("✅ Model Accuracy:", accuracy_score(y_test, y_pred))


✅ Model Accuracy: 1.0


In [19]:
# Function to predict sentiment from user input
def predict_sentiment(user_input):
    processed_text = clean(user_input)
    vectorized_text = vectorizer.transform([processed_text])
    sentiment = model.predict(vectorized_text)[0]
    return sentiment

# Loop for user input
while True:
    user_input = input("💬 Enter customer feedback (or type 'exit' to stop): ")
    if user_input.lower() == 'exit':
        break
    print("🔍 Predicted Sentiment:", predict_sentiment(user_input))


💬 Enter customer feedback (or type 'exit' to stop):  Received reimbursement quickly after hospitalization


🔍 Predicted Sentiment: Positive


💬 Enter customer feedback (or type 'exit' to stop):  Renewal reminders were sent on time


🔍 Predicted Sentiment: Neutral


💬 Enter customer feedback (or type 'exit' to stop):  Difficult to cancel the policy


🔍 Predicted Sentiment: Negative


💬 Enter customer feedback (or type 'exit' to stop):  exit


In [3]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

# Load summarization model and tokenizer
summarizer_model_name = "t5-small"
summarizer_tokenizer = AutoTokenizer.from_pretrained(summarizer_model_name)
summarizer_model = AutoModelForSeq2SeqLM.from_pretrained(summarizer_model_name)

# Long text to summarize
long_text = """
Insurance provides financial protection against unforeseen events by transferring risk from an individual or business to an insurance company. 
Whether it's health, auto, home, or life insurance, the primary goal is to reduce the financial burden in case of accidents, illness, natural disasters, or death. 
Policyholders pay regular premiums in exchange for coverage, and when a covered event occurs, the insurer compensates for the loss as per the terms of the policy. 
By offering peace of mind and stability, insurance plays a vital role in personal financial planning and risk management for businesses.
"""

# Tokenize and summarize text
inputs = summarizer_tokenizer("summarize: " + long_text, return_tensors="pt", max_length=512, truncation=True)
summary_ids = summarizer_model.generate(inputs["input_ids"], max_length=50, min_length=10, length_penalty=2.0, num_beams=4)
summary = summarizer_tokenizer.decode(summary_ids[0], skip_special_tokens=True)

print("\nText Summarization:")
print(summary)


Text Summarization:
insurance provides financial protection against unforeseen events. policyholders pay regular premiums in exchange for coverage. insurance plays a vital role in personal financial planning and risk management.
