## Importing Libraries

In [1]:
import pandas as pd
import emoji
import pandas as pd
import re
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from textblob import TextBlob

## Reading the CSV

In [2]:
df =  pd.read_csv('reviews_data.csv')

In [3]:
df

Unnamed: 0,Review,Comments,Ratings,Locations,Cuctomers,Time
0,Does the job,Good,3,Jajapur District,Prakash Sethy,
1,Must buy!,Awesome,5,Murshidabad District,Sabyasachee Mukherjee,8 months ago
2,Super!,Superb Phone ❤️,5,Rampurhat,Souvik Dey,8 months ago
3,Excellent,Best phone.. Picture quality ossam...,5,Konnagar,Flipkart Customer,8 months ago
4,Value-for-money,Camera quality gud,4,Udham Singh Nagar District,Subrat Vishwas,8 months ago
...,...,...,...,...,...,...
1602,Good quality product,"best in fast charging, battery backup , displa...",5,Pune,Flipkart Customer,2 months ago
1603,Pretty good,Good one,4,Bengaluru,Anu Rose George,2 months ago
1604,Wonderful,Worthy of the price.,5,Udgir,Flipkart Customer,2 months ago
1605,Just wow!,Superb quality phone,5,East Godavari District,Narayana R,2 months ago


In [4]:
df.head(20)

Unnamed: 0,Review,Comments,Ratings,Locations,Cuctomers,Time
0,Does the job,Good,3,Jajapur District,Prakash Sethy,
1,Must buy!,Awesome,5,Murshidabad District,Sabyasachee Mukherjee,8 months ago
2,Super!,Superb Phone ❤️,5,Rampurhat,Souvik Dey,8 months ago
3,Excellent,Best phone.. Picture quality ossam...,5,Konnagar,Flipkart Customer,8 months ago
4,Value-for-money,Camera quality gud,4,Udham Singh Nagar District,Subrat Vishwas,8 months ago
5,Fabulous!,Just go for it🤩,5,Aurangabad,Flipkart Customer,7 months ago
6,Fabulous!,Nice product...🔥,5,Pudukkottai District,Dinesh Thangappa,7 months ago
7,Good quality product,Camera quality too good,4,Varkala,Pravitha Sunny,8 months ago
8,Mind-blowing purchase,"Excellent phone, smooth ui, great sound, aweso...",5,Surat,Lav,8 months ago
9,Just wow!,I am so happy happy this mobile phone 🔥🔥,5,Alandi,Flipkart Customer,8 months ago


## Createing a function for Removing Emojis

In [5]:
def remove_emojis(text):
    return emoji.get_emoji_regexp().sub(r'', text)


## Downloading NLTK Resources ,Creating Output DataFrame,Analyzing Sentiment,Iterating Over Rows and Building the Output DataFrame

In [6]:

# Download stopwords
nltk.download('stopwords')
nltk.download('vader_lexicon')
nltk.download('punkt')

# Load stopwords
stop_words = set(stopwords.words('english'))

# Initialize sentiment analyzer
vader = SentimentIntensityAnalyzer()

# Define regex pattern to match unwanted characters
pattern = r'[^A-Za-z\s]+'
output_df = pd.DataFrame(columns=["Review", "Comments", "Ratings","Times","Locations","Positive_Score", "Negative_Score", "Neutral_Score", "Sentiment", "Subjectivity_Score"])

def analyze_sentiment(text):
    # Apply regex to remove unwanted characters
    cleaned_text = re.sub(r'[^a-zA-Z\s]', '', text)

    # Tokenize text into words
    words = word_tokenize(cleaned_text)

    # Remove stopwords and lowercase
    words = [word.lower() for word in words if word.lower() not in stop_words]

    # Join words back into cleaned text
    cleaned_text = ' '.join(words)

    # Get polarity scores for cleaned text using VADER
    vader_scores = vader.polarity_scores(cleaned_text)
    compound_score = vader_scores["compound"]

    # To get the subjectivity scores using TextBlob
    blob = TextBlob(cleaned_text)
    polarity_score = blob.sentiment.polarity
    subjectivity_score = blob.sentiment.subjectivity

    return compound_score, polarity_score, subjectivity_score

# Analyze sentiment for each row (combining Review and Comments)
for index, row in df.iterrows():
    review_text = str(row["Review"])  
    comments_text = str(row["Comments"])
    ratings = row["Ratings"]
    Locations = row["Locations"]
    Cuctomers = row["Cuctomers"]
    Time = str(row["Time"])
    

    # Analyze sentiment for the combined text of review and comments
    compound_score, polarity_score, subjectivity_score = analyze_sentiment(review_text + ' ' + comments_text)

    # Calculate Negative_Score and Neutral_Score
    if compound_score < 0:
        negative_score = -compound_score
        neutral_score = 0
    elif compound_score == 0:
        negative_score = 0
        neutral_score = 0
    else:
        negative_score = 0
        neutral_score = compound_score

    # Append results to output dataframe
    new_row = {
        "Review": review_text,
        "Comments": comments_text,
        "Ratings": ratings,
        "Locations" : Locations,
        "Cuctomers" : Cuctomers,
        "Time" : Time,
        "Positive_Score": compound_score,
        "Negative_Score": negative_score,
        "Neutral_Score": neutral_score,
        "Sentiment": "Positive" if compound_score > 0 else ("Neutral" if compound_score == 0 else "Negative"),
        "Subjectivity_Score": subjectivity_score
    }

    output_df = pd.concat([output_df, pd.DataFrame([new_row])], ignore_index=True)

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\admin\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\admin\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\admin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## Removing Emojis from Review and Comments

In [7]:
# Apply the function to 'Review' and 'Comments' columns
output_df['Review'] = output_df['Review'].apply(remove_emojis)
output_df['Comments'] = output_df['Comments'].apply(remove_emojis)

  return emoji.get_emoji_regexp().sub(r'', text)
  return emoji.get_emoji_regexp().sub(r'', text)


In [8]:
output_df.head(5)

Unnamed: 0,Review,Comments,Ratings,Times,Locations,Positive_Score,Negative_Score,Neutral_Score,Sentiment,Subjectivity_Score,Cuctomers,Time
0,Does the job,Good,3,,Jajapur District,0.4404,0,0.4404,Positive,0.6,Prakash Sethy,
1,Must buy!,Awesome,5,,Murshidabad District,0.6249,0,0.6249,Positive,1.0,Sabyasachee Mukherjee,8 months ago
2,Super!,Superb Phone,5,,Rampurhat,0.8402,0,0.8402,Positive,0.833333,Souvik Dey,8 months ago
3,Excellent,Best phone.. Picture quality ossam...,5,,Konnagar,0.836,0,0.836,Positive,0.65,Flipkart Customer,8 months ago
4,Value-for-money,Camera quality gud,4,,Udham Singh Nagar District,0.0,0,0.0,Neutral,0.0,Subrat Vishwas,8 months ago


## Storing output_df into into another csv file

In [9]:

column_names = ['Review', 'Comments', 'Ratings', 'Locations', 'Time', 'Positive_Score', 'Negative_Score', 'Neutral_Score', 'Sentiment', 'Subjectivity_Score']

selected_columns = output_df[column_names]


In [12]:
df.dropna(inplace=True)

In [13]:
df.head()

Unnamed: 0,Review,Comments,Ratings,Locations,Cuctomers,Time
1,Must buy!,Awesome,5,Murshidabad District,Sabyasachee Mukherjee,8 months ago
2,Super!,Superb Phone ❤️,5,Rampurhat,Souvik Dey,8 months ago
3,Excellent,Best phone.. Picture quality ossam...,5,Konnagar,Flipkart Customer,8 months ago
4,Value-for-money,Camera quality gud,4,Udham Singh Nagar District,Subrat Vishwas,8 months ago
5,Fabulous!,Just go for it🤩,5,Aurangabad,Flipkart Customer,7 months ago


In [15]:
selected_columns.to_csv("sentiment_analysis_results.csv", index=False)