# Importing pandas library as pd for data manipulation

In [1]:
import pandas as pd

# Reading Json file and displaying 10 rows

In [2]:
df = pd.read_json( '/Users/srijanagella/Downloads/Arts_Crafts_and_Sewing_5.json',lines=True) # Reading JSON Data into a Pandas DataFrame
df.sample(10)# Displaying a sample of 10 rows from the DataFrame

Unnamed: 0,overall,verified,reviewTime,reviewerID,asin,style,reviewerName,reviewText,summary,unixReviewTime,vote,image
3654,5,True,"06 16, 2017",AEKXQV036EE34,B0001DUD9O,,Amazon Customer,Perfect. Discontinued color my mother in law n...,Five Stars,1497571200,,
18168,5,False,"10 15, 2014",A2IBF31N8P9FPJ,B000BVN18Q,,LAgrammie,Polishing cloths could not get any better. Lo...,Perfect,1413331200,,
451370,4,True,"06 30, 2017",A3DMIDOA8CMMQ5,B007H8JUHM,"{'Size:': ' 18 Inch x 10 Yard', 'Color:': ' Pi...",Connie Thomas,This tulle is pretty but I swear they basicall...,"Very pretty, but MESSY GLITTER EVERYWHERE!",1498780800,,[https://images-na.ssl-images-amazon.com/image...
485247,4,True,"02 21, 2016",AH09PC111TUSE,B00YJB42KK,,K. Sanders,There is nothing wrong with this fabric except...,a pretty print!,1456012800,,
185810,5,True,"02 12, 2016",A2ODHPNLZ3E0W3,B002PZR1MS,{'Size:': ' No. 9'},Lou Pearson Jr.,"Great Deal. good seller, much appreciated.",Five Stars,1455235200,,
68888,5,True,"10 2, 2014",AOF50DGMK791Y,B00134C4HO,,Mary k.,this was something that I didn't know I needed...,notcher your everyday notch,1412208000,,
493661,5,True,"06 29, 2017",A1WP02LSP6FS2E,B01FLQ0KEI,,LoveCraftShopping,Excellent and beautiful!!! This is the perfect...,Beautiful,1498694400,,
397603,2,True,"10 4, 2016",A2M2P9SDVIS2BV,B0197FGETY,{'Color:': ' Tropicals'},Nancy S. Cunningham,I am a beginner watercolorist. The colors see...,beware the sticker and be gentle with the fold...,1475539200,3.0,
257468,4,True,"12 3, 2017",AKCNHJDTOFA4N,B007PBHZK0,{'Size:': ' Medium'},LILI,This fits my hip 44 and chest 39. The waist is...,Fits hip size 44/46,1512259200,2.0,[https://images-na.ssl-images-amazon.com/image...
35305,1,True,"08 8, 2015",A24U332RM0HXEQ,B000S15IVM,"{'Size:': ' 2 Bottle Set Metallics', 'Color:':...",nwsunsets,shook for an hour and still clumps in botton.....,shook for an hour and still clumps in botton. ...,1438992000,2.0,


# Importing Libraries and Downloading Data for Sentiment Analysis

In [3]:
from sklearn import preprocessing  # Importing preprocessing module from scikit-learn library

In [4]:
import nltk  # Importing nltk library for natural language processing tasks

In [5]:
nltk.download('opinion_lexicon')  # Downloading the opinion lexicon dataset
from nltk.corpus import opinion_lexicon  # Importing opinion_lexicon from nltk.corpus
from nltk.tokenize import word_tokenize  # Importing word_tokenize from nltk.tokenize

[nltk_data] Downloading package opinion_lexicon to
[nltk_data]     /Users/srijanagella/nltk_data...
[nltk_data]   Package opinion_lexicon is already up-to-date!


# Exploring Opinion Lexicon and Creating a Dictionary for Sentiment Analysis

In [6]:
# Printing the Total Number of Words in the Opinion Lexicon
print('Total number of words in opinion lexicon', len(opinion_lexicon.words()))

Total number of words in opinion lexicon 6789


In [7]:
# Printing Examples of Positive Words in Opinion Lexicon
print('Examples of positive words in opinion lexicon',opinion_lexicon.positive()[:10])

Examples of positive words in opinion lexicon ['a+', 'abound', 'abounds', 'abundance', 'abundant', 'accessable', 'accessible', 'acclaim', 'acclaimed', 'acclamation']


In [8]:
# Printing Examples of Negative Words in Opinion Lexicon
print('Examples of negative words in opinion lexicon',opinion_lexicon.negative()[:10])

Examples of negative words in opinion lexicon ['2-faced', '2-faces', 'abnormal', 'abolish', 'abominable', 'abominably', 'abominate', 'abomination', 'abort', 'aborted']


In [9]:
# Let's create a dictionary which we can use for scoring our review text
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/srijanagella/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

# Renaming Columns, Defining Scores, and Initializing Dictionary

In [10]:
# Renaming Columns in DataFrame
df.rename(columns={"reviewText": "text"}, inplace=True)
# Defining Positive and Negative Scores
pos_score = 1
neg_score = -1
# Initializing an Empty Dictionary to Store Words and Their Scores
word_dict = {}

# Adding Positive and Negative Words to the Dictionary

In [11]:
# Adding the positive words to the dictionary
for word in opinion_lexicon.positive():
    word_dict[word] = pos_score

In [12]:
# Adding the negative words to the dictionary
for word in opinion_lexicon.negative():
    word_dict[word] = neg_score

# Function to Calculate Sentiment Score using Bing Liu Lexicon

In [13]:
def bing_liu_score(text):
    sentiment_score = 0  # Initializing sentiment score
    bag_of_words = word_tokenize(text.lower())  # Tokenizing the text into words and converting to lowercase
    for word in bag_of_words:
        if word in word_dict:    # Checking if the word is in the word_dict
            sentiment_score += word_dict[word]   # Adding the score of the word to the sentiment score
    return sentiment_score   # Returning the calculated sentiment score

# Sentiment Analysis Using Bing Liu Lexicon

In [14]:
# Filling Missing Values in 'text' Column with 'no review'
df['text'].fillna('no review', inplace=True)
# Calculating Bing Liu Score for Each Text and Adding a New Column 'Bing_Liu_Score'
df['Bing_Liu_Score'] = df['text'].apply(bing_liu_score)

In [15]:
df[['overall',"text", 'Bing_Liu_Score']].head(10)   # Displaying Columns 'overall', 'text', and 'Bing_Liu_Score' for the First 10 Rows

Unnamed: 0,overall,text,Bing_Liu_Score
0,4,Contains some interesting stitches.,1
1,5,I'm a fairly experienced knitter of the one-co...,22
2,4,Great book but the index is terrible. Had to w...,0
3,5,I purchased the Kindle edition which is incred...,4
4,5,Very well laid out and very easy to read.\n\nT...,5
5,5,"Beginning her career as a freelance knitter, M...",15
6,5,This is a terrific stitch handbook (and I have...,9
7,4,The book needs to be coil bound. The content i...,1
8,5,I really am enjoying this book! I like the siz...,12
9,5,Just received this book and looked over it cov...,6


# Grouping DataFrame by 'overall' Rating and Calculating Mean Bing Liu Score

In [16]:
df.groupby('overall').agg({'Bing_Liu_Score':'mean'})  # Grouping DataFrame by 'overall' Rating and Calculating Mean Bing Liu Score

Unnamed: 0_level_0,Bing_Liu_Score
overall,Unnamed: 1_level_1
1,-0.255049
2,0.566098
3,1.158796
4,2.028146
5,2.130005
