# 😄 Sentiment Analysis on Popular Products

## I. 📑 Importing libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import nltk
import re

plt.style.use('ggplot')

Importing dataset

In [None]:
df = pd.read_csv('../data/reviews/cleaned/kit-halteres-reviews-cleaned.csv')
print(df.shape)

In [None]:
df.head()

## II. 🧹 Data Cleaning

Adding an id column so we can concat the dataframe with the polarity scores dataframe afterwards

In [None]:
#df.insert(1, 'id', range(0, len(df)))

In [None]:
df.head()

Saving the cleaned version

In [None]:
#data_path = '../data/reviews/cleaned/'

#df.to_csv(data_path + 'kit-halteres-reviews-cleaned.csv', index=False)

Transform "review_date" to datetime

In [None]:
df['review_date'] = pd.to_datetime(df['review_date'])
df.head()

## III. 🕵️ Data Exploration

In [None]:
ax = df['review_score'].value_counts().sort_index() \
    .plot(kind='bar',
          title='Count of Reviews by Stars',
          figsize=(5, 5),
          color="orange")

ax.set_xlabel('Review Stars')
ax.set_ylabel('Count')
plt.show()

In [None]:
ax = df['range_used'].value_counts().sort_index() \
    .plot(kind='bar',
          figsize=(10, 5))

ax.set_xlabel('Product Range Used In')
ax.set_ylabel('Count')
plt.show()

Most reviews have 4 and 5 stars.

In [None]:
avg_rating = round(df['review_score'].mean(), 1)
print(f"Average rating stars : {avg_rating} ⭐")

## IV. 😍😐😡 Sentiment Analysis Using Vader 

Let's take an example review and use NTLK on it

In [28]:
example_desc = df['review_description'][25]
example_score = df['review_score'][25]

print(example_desc + " --- Rating : " + example_score * "⭐")

Bought for getting back in shape. I would like to get advice on the best use of this product --- Rating : ⭐⭐⭐⭐


In [30]:
tokens = nltk.word_tokenize(example_desc)
#tokens[:15]

LookupError: 
**********************************************************************
  Resource [93mpunkt[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('punkt')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtokenizers/punkt/PY3/english.pickle[0m

  Searched in:
    - '/home/vscode/nltk_data'
    - '/usr/local/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/local/lib/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
    - ''
**********************************************************************


In [None]:
tagged = nltk.pos_tag(tokens)
tagged[:10]

In [None]:
entities = nltk.chunk.ne_chunk(tagged)
entities.pprint()

In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from tqdm.notebook import tqdm

sia = SentimentIntensityAnalyzer()

In [None]:
sia.polarity_scores(example_desc)

In [None]:
print(f"Our example review : '{example_desc[:20]}...' has a compound score of 0.92 which means it's a positive review.")

In [None]:
# Run the polarity score on the entire dataset
res = {}

for i, row in tqdm(df.iterrows(), total=len(df)):
    text = row['review_description']
    myid = row['id']
    res[myid] = sia.polarity_scores(text)

In [None]:
vaders = pd.DataFrame(res).T
vaders = vaders.reset_index().rename(columns={'index': 'id'})
vaders = vaders.merge(df, how='left')

In [None]:
vaders.head()

In [None]:
ax = sns.barplot(data=vaders, x='review_score', y='compound')
ax.set_title('Compound Score by Star Review')
plt.show()

We can see that positive reviews descriptions have 4+ stars and negative ones have 1 stars, however 3 stars reviews are neutral.

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(12, 3))
sns.barplot(data=vaders, x='review_score', y='pos', ax=axs[0])
sns.barplot(data=vaders, x='review_score', y='neu', ax=axs[1])
sns.barplot(data=vaders, x='review_score', y='neg', ax=axs[2])
axs[0].set_title('Positive')
axs[1].set_title('Neutral')
axs[2].set_title('Negative')
plt.tight_layout()
plt.show()

Let's see some negative reviews but classified as positive.

In [None]:
vaders.query('review_score == 1') \
    .sort_values('pos', ascending=False)['review_description'].values[0]

Now let's see a positive review but classified as negative.

In [None]:
vaders.query('review_score == 5') \
    .sort_values('neg', ascending=False)['review_description'].values[0]