In [12]:
!pip install pandas scikit-learn





[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [13]:
import pandas as pd
def load_dataset(file_path):
    df = pd.read_csv(file_path)
    print("Sample Data:\n", df.head())
    return df
df = load_dataset("reviews.csv")


Sample Data:
                                          review_text          category
0  The delivery was late and I received the wrong...    Delivery Issue
1  The product quality is very poor and broke aft...   Product Quality
2            Customer support was unhelpful and rude  Customer Service
3               Great product, exactly as described!   Product Quality
4       Delivery was quick but packaging was damaged    Delivery Issue


In [14]:
import string
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS

def preprocess_text(text):
    text = text.lower()
    # Remove punctuation
    text = ''.join(char for char in text if char not in string.punctuation)
    # Remove stopwords
    words = text.split()
    filtered_words = [word for word in words if word not in ENGLISH_STOP_WORDS]
    return ' '.join(filtered_words)


In [15]:

df['cleaned_text'] = df['review_text'].apply(preprocess_text)

print(df[['review_text', 'cleaned_text']])

                                         review_text  \
0  The delivery was late and I received the wrong...   
1  The product quality is very poor and broke aft...   
2            Customer support was unhelpful and rude   
3               Great product, exactly as described!   
4       Delivery was quick but packaging was damaged   
5   I had to wait a long time for support to respond   
6  This is my second time buying, and it's excellent   
7                    Package was lost during transit   
8         The support team solved my problem quickly   
9  Product works fine but color is slightly diffe...   

                                  cleaned_text  
0            delivery late received wrong item  
1               product quality poor broke use  
2              customer support unhelpful rude  
3              great product exactly described  
4             delivery quick packaging damaged  
5               wait long time support respond  
6                 second time buying exc

In [16]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

def train_model(df):
    # Vectorization
    vectorizer = TfidfVectorizer()
    X = vectorizer.fit_transform(df['cleaned_text'])
    y = df['category']
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    # Model training
    model = MultinomialNB()
    model.fit(X_train, y_train)
    # Prediction & Evaluation
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"Model Accuracy: {acc:.2f}")
    return model, vectorizer


In [17]:

model, vectorizer = train_model(df)


Model Accuracy: 0.50


In [18]:
def predict_category(model, vectorizer, new_review):
    cleaned_review = preprocess_text(new_review)
    vector = vectorizer.transform([cleaned_review])
    prediction = model.predict(vector)
    return prediction[0]


In [19]:
new_review = "I waited two weeks but my order never arrived."
predicted_category = predict_category(model, vectorizer, new_review)
print("Predicted Category:", predicted_category)


Predicted Category: Delivery Issue
