In [1]:
#!pip install pandas 
#!pip install nltk
#!pip install tensorflow

In [2]:
import pandas as pd
import re

# Load the dataset (Replace with actual file path)
df = pd.read_csv("real.csv")  # Change to your file path

# Drop rows where 'Review' is missing
df = df.dropna(subset=['Review'])

# Convert text to lowercase and remove special characters
def clean_text(text):
    if isinstance(text, str):  # Ensure it's a string
        text = text.lower()  # Lowercasing
        text = re.sub(r'[^a-zA-Z\s]', '', text)  # Remove special characters
        return text.strip()
    return ""

# Apply cleaning
df['cleaned_review'] = df['Review'].apply(clean_text)

# Show first few rows to verify
print(df[['Review', 'cleaned_review']].head())


                                              Review  \
0        there is no way back, enjoy what you have .   
1   1st 95 went over 300k before being totalled b...   
2   Sold 86 Toyota Van 285K miles to be replaced ...   
3   I have owned lots of vans, and the Previa is ...   
4   My 1997 AWD Previa is the third one that I ha...   

                                      cleaned_review  
0           there is no way back enjoy what you have  
1  st  went over k before being totalled by a tru...  
2  sold  toyota van k miles to be replaced with  ...  
3  i have owned lots of vans and the previa is fa...  
4  my  awd previa is the third one that i have ow...  


In [3]:
# we have downloaded the ntlk lib from that now we are installing stopwords and lemmatizier
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

In [4]:
# we are intializing the lemmatizer and stopwords now 
stop_words=set(stopwords.words('english'))
stop_words
# initializing the lemmatizer 
lemmatizer=WordNetLemmatizer()

def preprocess_text(text):
    words=text.split()  # this splits the sentences into words
    words=[lemmatizer.lemmatize(word) for word in words]
    return ' '.join(words)

df['processed_review'] = df['cleaned_review'].apply(preprocess_text)
print(df[['cleaned_review', 'processed_review']].head())



                                      cleaned_review  \
0           there is no way back enjoy what you have   
1  st  went over k before being totalled by a tru...   
2  sold  toyota van k miles to be replaced with  ...   
3  i have owned lots of vans and the previa is fa...   
4  my  awd previa is the third one that i have ow...   

                                    processed_review  
0           there is no way back enjoy what you have  
1  st went over k before being totalled by a truc...  
2  sold toyota van k mile to be replaced with pre...  
3  i have owned lot of van and the previa is far ...  
4  my awd previa is the third one that i have own...  


In [5]:
from textblob import TextBlob

# Function to get sentiment category
def get_sentiment(text):
    blob = TextBlob(text)
    polarity = blob.sentiment.polarity  # Ranges from -1 (negative) to +1 (positive)
    if polarity < -0.1:
        return "Frustrated"
    elif -0.1 <= polarity < 0.1:
        return "Neutral"
    else:
        return "Positive"

# Apply sentiment analysis
df['sentiment'] = df['processed_review'].apply(get_sentiment)

# Show the first few results
print(df[['processed_review', 'sentiment']].head())


                                    processed_review sentiment
0           there is no way back enjoy what you have  Positive
1  st went over k before being totalled by a truc...  Positive
2  sold toyota van k mile to be replaced with pre...  Positive
3  i have owned lot of van and the previa is far ...   Neutral
4  my awd previa is the third one that i have own...   Neutral


In [6]:
# Generate 200 additional frustrated reviews
extra_frustrated_reviews = [
    "The service was absolutely terrible! They kept delaying my request.",
    "I waited for hours, and still, no one attended to my issue. Completely useless!",
    "They promised a replacement but kept giving excuses. Never trusting them again!",
    "Why is everything so slow? I have been calling customer support for days!",
    "My issue was ignored! They don’t even bother to respond to emails.",
    "They wasted my time, kept transferring my call from one department to another!",
    "I was charged extra for a service I never received. Total scam!",
    "No proper communication! I was left clueless about my order status.",
    "Their app crashes all the time, and support is of no help!",
    "Their customer support is a joke. They just copy-paste responses.",
    "I had high expectations, but they completely let me down.",
    "They lost my documents and didn’t even apologize. Unacceptable!",
    "I feel cheated! They misled me about their policies.",
    "No updates, no follow-ups. They simply don’t care about customers.",
    "I had to call multiple times just to get a simple answer. Frustrating!",
    "This is the worst experience I’ve had with any company.",
    "Their attitude was rude and unprofessional. Never again!",
    "False promises! They said delivery would take 2 days, but it's been weeks!",
    "Terrible quality! Broke down within days, and now they refuse to help.",
    "They just ignore complaints. Absolutely worthless service.",
    "I regret ever choosing this brand. It’s a complete waste of money!",
    "Their manager spoke to me like I was an idiot. Disrespectful!",
    "The system overcharged me, and no one is willing to refund my money!",
    "The technician was clueless. Made things worse instead of fixing them!",
    "Took my money but didn't deliver what was promised. Fraudulent company!",
    "They claim 24/7 support, but I was on hold for 2 hours!",
    "Their chatbot is useless! It keeps giving the same responses.",
    "I've had enough of their constant excuses. Totally unreliable!",
    "They say they care about customers, but their actions prove otherwise!",
    "The website is full of bugs. I can't even place an order properly!",
    "I called multiple times, and each time they transferred me to someone else!",
    "They sent me the wrong item, and now they refuse to exchange it!",
    "I can't believe they treat customers like this. Absolutely disrespectful!",
    "They ruined my entire plan with their inefficiency!",
    "I had to fight to get a refund. They do everything to avoid paying!",
    "Delivery was delayed multiple times, and they kept lying about it!",
    "They never stick to their promises. Very unreliable company!",
    "Their staff is untrained. They don’t know what they are doing!",
    "I asked for help, but they kept pushing me to buy more stuff!",
    "The driver was rude and refused to deliver to my house!",
    "I paid extra for express delivery, and it still arrived late!",
    "They changed the price after I placed the order. Total scam!",
    "The return process is a nightmare. They make it impossible to get a refund!",
    "I was treated horribly. I felt completely disrespected!",
    "I don’t understand how they are still in business!",
    "I submitted multiple tickets, but no one even responds!",
    "I wasted so much time dealing with their incompetence!",
    "They keep making promises but never follow through!",
    "They canceled my order without telling me. Now I have to wait weeks for a refund!",
    "Their online system is a mess. It doesn't even work properly!",
    "They refuse to acknowledge their own mistakes!",
    "I had to explain my issue to 5 different people before getting a useless response!",
    "They overbooked and left me without a service I paid for!",
    "They don’t respect deadlines at all. Completely unprofessional!",
    "Every time I try to talk to someone, I get a different answer!",
    "I can’t believe I have to chase them just to get what I paid for!",
    "They keep saying 'sorry,' but nothing actually improves!",
    "They ignore customer feedback and keep making the same mistakes!",
    "I’ve been dealing with this issue for weeks, and still no solution!",
    "They keep lying to customers to cover up their incompetence!",
    "Their agents are rude and act like they don’t care at all!",
    "They wasted my entire day with their delays!",
    "Their warranty policy is just a scam to avoid responsibility!",
    "Their refund policy is designed to trap customers into not getting their money back!",
    "They closed my complaint without even resolving it!",
    "No one takes ownership of customer issues here!",
    "The product description was misleading. I got something completely different!",
    "They deleted my negative review instead of addressing my concerns!",
    "The live chat agent just disconnected when I asked for a refund!",
    "This company has no respect for its customers at all!",
    "They refuse to compensate me for their mistake!",
    "They keep blaming external factors instead of fixing their poor service!",
    "They trick people into subscribing and then make it impossible to cancel!",
    "Worst experience ever! I wish I had read the reviews before ordering!",
    "They overpromised and underdelivered. I feel completely misled!",
    "Their system charged me twice, and they won’t acknowledge it!",
    "They are experts at giving excuses, not solving problems!",
    "No transparency in pricing! They add hidden fees at checkout!",
    "Their shipping policy is a joke. It takes forever to receive anything!",
    "They act as if they are doing customers a favor instead of providing a service!",
    "I will never recommend this company to anyone!",
    "They have no regard for customer satisfaction whatsoever!",
    "They sent me a defective product and now refuse to replace it!",
    "They force you to go through ridiculous procedures just to get basic support!",
    "Every time I call, I have to start the process from scratch!",
    "Their CEO should be ashamed of how this company operates!",
    "They take your money and then stop responding to complaints!",
    "Their driver refused to deliver because it was 'too far'! What a joke!",
    "I had to escalate the issue to a legal team to get my refund!",
    "The worst customer experience I have ever had in my life!",
    "They keep saying 'we’ll look into it' but never actually do anything!",
]

# Assign "Frustrated" sentiment to these reviews
extra_sentiments = ["Frustrated"] * len(extra_frustrated_reviews)

# Convert to DataFrame and append
extra_data = pd.DataFrame({"processed_review": extra_frustrated_reviews, "sentiment": extra_sentiments})
df = pd.concat([df, extra_data], ignore_index=True)

print(f"✅ Added {len(extra_frustrated_reviews)} additional frustrated reviews.")


✅ Added 91 additional frustrated reviews.


In [7]:
from textblob import TextBlob

review = "The dealer was abusive, the car broke after delivery, it was very bad, didn't expect this from Toyota!"
sentiment = TextBlob(review).sentiment.polarity
print("Sentiment Score:", sentiment)


Sentiment Score: -1.0


In [8]:
df.groupby("sentiment").count()

Unnamed: 0_level_0,Column1,Review_Date,Author_Name,Vehicle_Title,Review_Title,Review,Rating,cleaned_review,processed_review
sentiment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Frustrated,832,832,832,832,831,832,789,832,923
Neutral,4601,4601,4601,4601,4600,4601,4326,4601,4601
Positive,13314,13314,13314,13314,13314,13314,12842,13314,13314


In [9]:
df["sentiment"].value_counts()


sentiment
Positive      13314
Neutral        4601
Frustrated      923
Name: count, dtype: int64

In [10]:
df.groupby("sentiment")['processed_review'].apply(list)


sentiment
Frustrated    [cant go wrong with this car they thought of e...
Neutral       [i have owned lot of van and the previa is far...
Positive      [there is no way back enjoy what you have, st ...
Name: processed_review, dtype: object

# categorizing

### **Step 3: Categorizing Reviews into Service, Parts, and Others**  
Now, we will classify reviews into three categories:  
1️⃣ **Service-related** (e.g., repair, maintenance)  
2️⃣ **Parts-related** (e.g., engine, battery)  
3️⃣ **Others** (everything else)  

---

### **📝 Steps to Implement**  
✅ Define **keyword lists** for Service & Parts.  
✅ Check if a review contains **any keyword** from these lists.  
✅ Assign a category based on **matched keywords**.  
✅ Store the category in a new column called `"category"`.  

---



### **📌 What This Does?**
- Checks if **service-related** words exist → Assigns **"Service"**  
- Checks if **parts-related** words exist → Assigns **"Parts"**  
- If neither → Assigns **"Others"**  

---

### **🔍 Next Step: Find New Frequent Words in "Others"**
Once we categorize the existing reviews, we can check **what words are commonly appearing in "Others"** (in case there are new complaints that need a new category).



In [11]:
# Define Keywords for Classification
service_keywords = ["repair", "maintenance", "delay", "service", "issue", "problem", "technician"]
parts_keywords = ["engine", "battery", "brake", "wheels", "tyre", "oil", "transmission"]

# Function to Assign Categories
def categorize_review(text):
    text = text.lower()  # Convert to lowercase
    if any(word in text for word in service_keywords):
        return "Service"
    elif any(word in text for word in parts_keywords):
        return "Parts"
    else:
        return "Others"

# Apply the Function to Categorize Reviews
df['category'] = df['processed_review'].apply(categorize_review)

# ✅ Check Category Distribution
df['category'].value_counts()

category
Others     9646
Service    6512
Parts      2680
Name: count, dtype: int64

### **Step 4: Identifying New Frequent Words in "Others" Category**  
Since some reviews are classified as **"Others"**, we should check for **frequent words** in them. This will help us identify:  
✅ **New complaint trends** (e.g., a recurring issue with a new car part).  
✅ **Missing keywords** that should be added to the "Service" or "Parts" category.  
✅ **Potential new categories** if a large number of reviews mention the same issue.  

---

### **📌 What This Does?**
1️⃣ Filters out only reviews in the `"Others"` category.  
2️⃣ Splits reviews into individual words.  
3️⃣ Counts the most **frequent** words.  
4️⃣ Shows the **top 20 words** appearing in `"Others"` reviews.  

---

### **🔍 Next Step: Analyze Results**
- If certain words appear **frequently**, we can **add them** to the `service_keywords` or `parts_keywords` list.  
- If a **new issue** emerges, we might need a **new category**.  

Run this and **share the top words** so we can refine the classification! 🚀

In [12]:
from collections import Counter

# Filter "Others" category reviews
others_reviews = df[df['category'] == "Others"]['processed_review']

# Tokenize words
all_words = " ".join(others_reviews).split()

# Get the most common words
word_counts = Counter(all_words)
common_words = word_counts.most_common(70)  # Get top 20 words

# Convert to DataFrame for better visualization
common_words_df = pd.DataFrame(common_words, columns=["Word", "Count"])

# Display the top words
common_words_df


Unnamed: 0,Word,Count
0,the,33728
1,a,21012
2,and,20770
3,i,19649
4,it,16053
...,...,...
65,no,1440
66,or,1436
67,am,1413
68,well,1397


In [13]:
'''from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder

# Encode Category Labels (Service = 0, Parts = 1, Others = 2)
label_encoder = LabelEncoder()
df["category_encoded"] = label_encoder.fit_transform(df["category"])

# Train-Test Split (80% Train, 20% Test)
X_train, X_test, y_train, y_test = train_test_split(
    df["processed_review"], df["category_encoded"], test_size=0.2, random_state=42, stratify=df["category_encoded"]
)

# TF-IDF Vectorization
tfidf = TfidfVectorizer(max_features=5000, stop_words='english')
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

# Initialize Models
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
svm_model = SVC(kernel='linear', probability=True, random_state=42)
nb_model = MultinomialNB()

# Train & Evaluate Models
models = {"Random Forest": rf_model, "SVM": svm_model, "Naïve Bayes": nb_model}
for name, model in models.items():
    print(f"\n🔹 Training {name}...")
    model.fit(X_train_tfidf, y_train)
    y_pred = model.predict(X_test_tfidf)
    
    print(f"\n✅ Results for {name}:")
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
    print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

'''

'from sklearn.model_selection import train_test_split\nfrom sklearn.feature_extraction.text import TfidfVectorizer\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.svm import SVC\nfrom sklearn.naive_bayes import MultinomialNB\nfrom sklearn.metrics import classification_report, accuracy_score\nfrom sklearn.preprocessing import LabelEncoder\n\n# Encode Category Labels (Service = 0, Parts = 1, Others = 2)\nlabel_encoder = LabelEncoder()\ndf["category_encoded"] = label_encoder.fit_transform(df["category"])\n\n# Train-Test Split (80% Train, 20% Test)\nX_train, X_test, y_train, y_test = train_test_split(\n    df["processed_review"], df["category_encoded"], test_size=0.2, random_state=42, stratify=df["category_encoded"]\n)\n\n# TF-IDF Vectorization\ntfidf = TfidfVectorizer(max_features=5000, stop_words=\'english\')\nX_train_tfidf = tfidf.fit_transform(X_train)\nX_test_tfidf = tfidf.transform(X_test)\n\n# Initialize Models\nrf_model = RandomForestClassifier(n_estimators=100, 

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder

# Encode Sentiment Labels (Negative = 0, Neutral = 1, Positive = 2)
label_encoder = LabelEncoder()
df["sentiment_encoded"] = label_encoder.fit_transform(df["sentiment"])

# Train-Test Split (80% Train, 20% Test)
X_train, X_test, y_train, y_test = train_test_split(
    df["processed_review"], df["sentiment_encoded"], test_size=0.2, random_state=42, stratify=df["sentiment_encoded"]
)

# TF-IDF Vectorization
tfidf = TfidfVectorizer(max_features=5000, stop_words='english')
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

# Train SVM for Sentiment Classification
svm_model = SVC(kernel="linear", probability=True, random_state=42)
svm_model.fit(X_train_tfidf, y_train)
y_pred = svm_model.predict(X_test_tfidf)

# Model Evaluation
print(f"\n✅ SVM Sentiment Classification Results:")
print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))


In [None]:
def predict_sentiment(review_text):
    review_text_processed = preprocess_text(review_text)  # Apply preprocessing
    review_tfidf = tfidf.transform([review_text_processed])  # Convert to TF-IDF
    predicted_label = svm_model.predict(review_tfidf)  # Predict sentiment
    sentiment = label_encoder.inverse_transform(predicted_label)  # Decode label
    return sentiment[0]

# Example
new_review = " i had the worst exprience with the dealer and he used abusive language"
print("Predicted Sentiment:", predict_sentiment(new_review))


Predicted Sentiment: Neutral


# to see the negative words it trained on 

In [None]:
import numpy as np

# Get feature names from TF-IDF
feature_names = tfidf.get_feature_names_out()

# Convert sparse matrix to dense array
svm_coefficients = svm_model.coef_.toarray()

# Extract top negative words
neg_class_idx = 0  # Assuming 0 = Negative
top_negative_words = [feature_names[i] for i in svm_coefficients[neg_class_idx].argsort()[:10]]  # Incorrect
top_corrected_negative_words = [feature_names[i] for i in svm_coefficients[neg_class_idx].argsort()[-10:]]  # Corrected

print("❌ Incorrect Negative Words:", top_negative_words)
print("✅ Correct Negative Words:", top_corrected_negative_words)


❌ Incorrect Negative Words: ['great', 'love', 'nice', 'fine', 'best', 'new', 'better', 'good', 'far', 'light']
✅ Correct Negative Words: ['wrong', 'awful', 'boring', 'annoying', 'poor', 'terrible', 'horrible', 'bad', 'worst', 'disappointed']


In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import numpy as np

# Set parameters
MAX_VOCAB_SIZE = 10000  # Limit vocabulary size
MAX_SEQUENCE_LENGTH = 100  # Max words per review
EMBEDDING_DIM = 100  # Embedding vector size

# Tokenization
tokenizer = Tokenizer(num_words=MAX_VOCAB_SIZE, oov_token="<OOV>")
tokenizer.fit_on_texts(df["cleaned_review"])

# Convert text to sequences
X = tokenizer.texts_to_sequences(df["cleaned_review"])

# Pad sequences to ensure uniform length
X_padded = pad_sequences(X, maxlen=MAX_SEQUENCE_LENGTH, padding='post', truncating='post')

# Encode sentiment labels
label_encoder = LabelEncoder()
df["sentiment_encoded"] = label_encoder.fit_transform(df["sentiment"])
y = np.array(df["sentiment_encoded"])

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_padded, y, test_size=0.1, random_state=42, stratify=y)

print("Data Preparation Complete! Ready for Next Step.")


AttributeError: 'float' object has no attribute 'lower'

**Step 2: Building the BiLSTM Model.**  

### **Step 2: Define the BiLSTM Model**
Here’s what we’ll do:
- Use an **Embedding Layer** to convert words into dense vectors.
- Add a **Bidirectional LSTM Layer** to capture dependencies from both past and future words.
- Use a **Dense Layer** with `softmax` activation for classification.

Run the following code:  


### **What’s Next?**
✅ If this runs fine, we’ll move to **Step 3: Training the Model.**  
Let me know if there are any issues! 🚀

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense, Dropout

# Define the BiLSTM model
model = Sequential([
    Embedding(input_dim=MAX_VOCAB_SIZE, output_dim=EMBEDDING_DIM, input_length=MAX_SEQUENCE_LENGTH),
    Bidirectional(LSTM(64, return_sequences=True)),  # BiLSTM Layer
    Dropout(0.3),  # Dropout for regularization
    Bidirectional(LSTM(32)),  # Another BiLSTM Layer
    Dense(32, activation='relu'),  # Fully connected layer
    Dropout(0.2),
    Dense(len(label_encoder.classes_), activation='softmax')  # Output layer
])

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Model summary
model.summary()


In [None]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer

# Define the tokenizer with a vocabulary size
vocab_size = 5000
tokenizer = Tokenizer(num_words=vocab_size, oov_token="<OOV>")

# ✅ Convert all values to strings and handle NaNs
X_train = X_train.astype(str).tolist() if isinstance(X_train, pd.Series) else [str(x) for x in X_train]
X_test = X_test.astype(str).tolist() if isinstance(X_test, pd.Series) else [str(x) for x in X_test]

# ✅ Fit tokenizer on training data
tokenizer.fit_on_texts(X_train)

# ✅ Convert text into sequences
X_train_sequences = tokenizer.texts_to_sequences(X_train)
X_test_sequences = tokenizer.texts_to_sequences(X_test)

print("✅ Tokenization complete.")


✅ Tokenization complete.


In [None]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Define maximum sequence length
max_length = 100  

# Pad the sequences
X_train_padded = pad_sequences(X_train_sequences, maxlen=max_length, padding='post', truncating='post')
X_test_padded = pad_sequences(X_test_sequences, maxlen=max_length, padding='post', truncating='post')

print("✅ Padding complete. Shapes:", X_train_padded.shape, X_test_padded.shape)


✅ Padding complete. Shapes: (16872, 100) (1875, 100)


# Training the Model 



In [None]:
# Train the model
history = model.fit(X_train_padded, y_train, 
                    validation_data=(X_test_padded, y_test), 
                    epochs=5, 
                    batch_size=32)


Epoch 1/5
[1m528/528[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 96ms/step - accuracy: 0.7072 - loss: 0.7376 - val_accuracy: 0.7221 - val_loss: 0.6121
Epoch 2/5
[1m528/528[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 70ms/step - accuracy: 0.7730 - loss: 0.5232 - val_accuracy: 0.8123 - val_loss: 0.4720
Epoch 3/5
[1m528/528[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 69ms/step - accuracy: 0.8310 - loss: 0.3930 - val_accuracy: 0.8352 - val_loss: 0.4157
Epoch 4/5
[1m528/528[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 67ms/step - accuracy: 0.8837 - loss: 0.2888 - val_accuracy: 0.8373 - val_loss: 0.4055
Epoch 5/5
[1m528/528[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 67ms/step - accuracy: 0.9092 - loss: 0.2278 - val_accuracy: 0.8128 - val_loss: 0.4678


In [None]:
# Evaluate the model
test_loss, test_acc = model.evaluate(X_test_padded, y_test)

print(f"✅ Test Accuracy: {test_acc:.4f}")
print(f"✅ Test Loss: {test_loss:.4f}")


[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 37ms/step - accuracy: 0.8060 - loss: 0.4778
✅ Test Accuracy: 0.8128
✅ Test Loss: 0.4678


In [None]:
# Predict on test data
y_pred_probs = model.predict(X_test_padded)  # Probabilities
y_pred_classes = np.argmax(y_pred_probs, axis=1)  # Get class labels

# Convert encoded labels back to original sentiment categories
y_pred_labels = label_encoder.inverse_transform(y_pred_classes)
y_test_labels = label_encoder.inverse_transform(y_test)


# Compare actual vs. predicted sentiments
sample_df = pd.DataFrame({'Actual': y_test_labels, 'Predicted': y_pred_labels})
print(sample_df.sample(10))  # Show random 10 samples


[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step
        Actual   Predicted
691    Neutral    Positive
812   Positive    Positive
125   Positive    Positive
1078   Neutral     Neutral
333    Neutral     Neutral
1758   Neutral  Frustrated
1682  Positive    Positive
1053   Neutral     Neutral
493   Positive    Positive
381   Positive     Neutral


In [None]:
df['Review'][381]

' Being in the tire business and an auto enthusiast, I was skeptical at first as I had looked at a Lexus product also. This car has a smooth ride, amazing back seat room, a powerful engine and just delivered an amazing 32.1 MPG on a recent interstate drive.  This auto is a "well kept" secret that needs to be unleashed on the automobile public who enjoy a smooth ride, volumes of interior room, etc. and is competitive with Lexus in value and features!'