In [None]:
import pandas as pd                                                      #handles csv files
import numpy as np                                                       #Provides high-performance numerical func ons.
import string                                                            #Gives access to punctua on and character sets.
from textblob import TextBlob                                            #Analyze senment polarity of review text
from sklearn.feature_extraction.text import TfidfVectorizer              #Converts text into numerical vectors 
from sklearn.linear_model import LogisticRegression                      #Classifies reviews as posi ve or nega ve 
from sklearn.model_selection import train_test_split                     # Splits data into training and test sets. 
from sklearn.metrics import classification_report                        #Shows precision, recall, f1-score, and accuracy 

# Load dataset
df = pd.read_csv("mgn386.csv")  

# Ask user for Device_Type
device_type = input("Enter the Device_Type to analyze (e.g., Laptop, Smartphone, Mobile): ").strip()
df_device = df[df['Device_Type'].str.lower() == device_type.lower()]

if df_device.empty:
    print(f"No reviews found for device type '{device_type}'. Please check the input.")
else:
    # List available brands for the selected device type
    available_brands = df_device['Brand'].unique()
    print(f"\nAvailable brands for {device_type}: {', '.join(available_brands)}")
    brand = input(f"Enter the Brand to analyze under {device_type}: ").strip()
    df_filtered = df_device[df_device['Brand'].str.lower() == brand.lower()]

    if df_filtered.empty:
        print(f"No reviews found for brand '{brand}' under device type '{device_type}'.")
    else:
        print(f"\nAnalyzing {len(df_filtered)} reviews for {brand} {device_type}...")

        # Step 1: Clean text
        def clean_text(text):
            text = text.lower()
            text = ''.join([c for c in text if c not in string.punctuation])
            return text

        df_filtered['Clean_Review'] = df_filtered['Review_Text'].apply(clean_text)

        # Step 2: Sentiment labeling
        def get_sentiment(text):
            polarity = TextBlob(text).sentiment.polarity
            return 1 if polarity > 0 else 0

        df_filtered['Sentiment'] = df_filtered['Clean_Review'].apply(get_sentiment)

        # Step 3: TF-IDF
        vectorizer = TfidfVectorizer(stop_words='english', max_features=500)
        X = vectorizer.fit_transform(df_filtered['Clean_Review'])
        y = df_filtered['Sentiment']

        # Step 4: Train/test split
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        model = LogisticRegression()
        model.fit(X_train, y_train)

        # Step 5: Evaluation
        y_pred = model.predict(X_test)
        print("\n--- Classification Report ---")
        print(classification_report(y_test, y_pred))

        # Step 6: Top keywords
        feature_names = vectorizer.get_feature_names_out()
        coef = model.coef_[0]

        top_n = 10
        top_pos_indices = np.argsort(coef)[-top_n:][::-1]
        top_neg_indices = np.argsort(coef)[:top_n]

        top_positive_keywords = [(feature_names[i], coef[i]) for i in top_pos_indices]
        top_negative_keywords = [(feature_names[i], coef[i]) for i in top_neg_indices]

        print("\n--- Top Positive Keywords ---")
        for word, weight in top_positive_keywords:
            print(f"{word}: {weight:.2f}")

        print("\n--- Top Negative Keywords ---")
        for word, weight in top_negative_keywords:
            print(f"{word}: {weight:.2f}")

# Step 7: Overall Sentiment Verdict
positive_ratio = df_filtered['Sentiment'].mean() * 100  

if positive_ratio > 80:
    verdict = "Best"
elif positive_ratio > 50:
    verdict = "Better"
else:
    verdict = "Good"

print("\n--- Overall Sentiment Analysis ---")
print(f"{brand} {device_type} has a positive sentiment score of {positive_ratio:.2f}%")
print(f"→ Verdict: {verdict}")

Enter the Device_Type to analyze (e.g., Laptop, Smartphone, Mobile):  MOBILE



Available brands for MOBILE: Vivo, Xiaomi, Realme, Samsung, Oppo, Apple, Acer, HP, Lenovo, Dell, Asus, Huawei


Enter the Brand to analyze under MOBILE:  samsung



Analyzing 2643 reviews for samsung MOBILE...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['Clean_Review'] = df_filtered['Review_Text'].apply(clean_text)
