Exp 10: Develop social media text analytics models for improving existing product/service by analyzing customer’s reviews/comments

In [1]:
from google.colab import drive
drive.mount('/content/drive')
import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import re
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

nltk.download('vader_lexicon')

sid = SentimentIntensityAnalyzer()

file_path = '/content/twitter_validation.csv'
df = pd.read_csv(file_path)

print("Dataset Overview:")
print(df.head())

def preprocess_text(text):
    text = re.sub(r"http\S+|www\S+|@\w+", "", text)
    text = re.sub(r"[^a-zA-Z\s]", "", text)
    text = text.lower()
    return text

df['cleaned_text'] = df['Text'].apply(preprocess_text)

def analyze_sentiment(text):
    sentiment_scores = sid.polarity_scores(text)
    if sentiment_scores['compound'] >= 0.05:
        return "Positive"
    elif sentiment_scores['compound'] <= -0.05:
        return "Negative"
    else:
        return "Neutral"

df['predicted_sentiment'] = df['cleaned_text'].apply(analyze_sentiment)

if 'label' in df.columns:
    df['label'] = df['label'].map({0: "Negative", 1: "Positive"})
    print("Classification Report:")
    print(classification_report(df['label'], df['predicted_sentiment']))

X_train, X_test, y_train, y_test = train_test_split(
    df['cleaned_text'], df['predicted_sentiment'], test_size=0.2, random_state=42
)

print("Sample Results:")
print(df[['Text', 'cleaned_text', 'predicted_sentiment']].head()) # Changed 'review' to 'text' here as well

Mounted at /content/drive


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


Dataset Overview:
   Sr. No    Website    Analysis  \
0    3364   Facebook  Irrelevant   
1     352     Amazon     Neutral   
2    8312  Microsoft    Negative   
3    4371      CS-GO    Negative   
4    4433     Google     Neutral   

                                                Text  
0  I mentioned on Facebook that I was struggling ...  
1  BBC News - Amazon boss Jeff Bezos rejects clai...  
2  @Microsoft Why do I pay for WORD when it funct...  
3  CSGO matchmaking is so full of closet hacking,...  
4  Now the President is slapping Americans in the...  
Sample Results:
                                                Text  \
0  I mentioned on Facebook that I was struggling ...   
1  BBC News - Amazon boss Jeff Bezos rejects clai...   
2  @Microsoft Why do I pay for WORD when it funct...   
3  CSGO matchmaking is so full of closet hacking,...   
4  Now the President is slapping Americans in the...   

                                        cleaned_text predicted_sentiment  
0  i me