<a href="https://colab.research.google.com/github/lebe1/text-oriented-data-science-project/blob/main/Data_Exploration_Sentemint.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Sentiment

## Contect to google drive

In [193]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [194]:
folder_path = '/content/drive/MyDrive/DOPP_Ex2_data/'
csv_path = '/content/drive/MyDrive/DOPP_Ex2_data/combined_reviews.csv'

## Imports

In [195]:
import pandas as pd
import json
import os
import nltk
from nltk.corpus import stopwords
import re
from tqdm import tqdm
import string
import plotly.express as px
from transformers import pipeline
import ast

## Load data

In [196]:
df = pd.read_csv(csv_path)

In [197]:
df['reviewToken'] = df['reviewToken'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)

In [198]:
df.head(5)

Unnamed: 0,rating,reviewTime,reviewerID,reviewText,summary,unixReviewTime,category,reviewToken
0,5.0,2017-01-16,ASWLL1VJA7WOG,Great product... just what I wanted. Works gr...,Five Stars,1484524800,All_Beauty,"[great, product, want, works, great, stylish]"
1,5.0,2008-12-08,A265K3A7V83112,"After seeing the popularity of this shoe, I de...",What can i say? chucks rock,1228694400,Clothing_Shoes_and_Jewelry,"[see, popularity, shoe, decide, test, impresse..."
2,5.0,2013-02-08,A1D18EJF6LHYDV,I was nervousness about the scent because IVe ...,Smells great,1360281600,All_Beauty,"[nervousness, scent, ive, never, try, love, pa..."
3,5.0,2018-02-15,A25EOTX5I354I2,"I LOVE the smell. A bit expensive, so I cant b...",Five Stars,1518652800,Luxury_Beauty,"[love, smell, bit, expensive, buy, often, woul..."
4,5.0,2013-11-11,A1DFZPQPCHBYTY,Found this stuff in Japan and wondered if I co...,Super lathery nice soap!,1384128000,All_Beauty,"[found, stuff, japan, wonder, could, find, 3, ..."


## Sentiment Analysis

In [199]:
from textblob import TextBlob

def analyze_with_textblob(tokens):
  return TextBlob(str(tokens)).sentiment.polarity

In [200]:
df['sentiment_score_text_blob'] = df['reviewText'].progress_apply(analyze_with_textblob)

100%|██████████| 12000/12000 [00:07<00:00, 1550.60it/s]


In [201]:
df[['reviewToken','rating', 'sentiment_score_text_blob']].head(5)

Unnamed: 0,reviewToken,rating,sentiment_score_text_blob
0,"[great, product, want, works, great, stylish]",5.0,0.75
1,"[see, popularity, shoe, decide, test, impresse...",5.0,0.393269
2,"[nervousness, scent, ive, never, try, love, pa...",5.0,0.511111
3,"[love, smell, bit, expensive, buy, often, woul...",5.0,0.0
4,"[found, stuff, japan, wonder, could, find, 3, ...",5.0,0.333333


In [202]:
fig = px.box(df, x="category", y="sentiment_score_text_blob", color="category")
fig.show()

## Transformer

In [203]:
sentiment_analyzer = pipeline(
    "sentiment-analysis",
    model="distilbert-base-uncased-finetuned-sst-2-english"
)

In [212]:
def analyze_with_transformer(text):
    if not isinstance(text, str):
        return {'label': None, 'score': None}

    max_length = 512
    truncated_text = text[:max_length]

    try:
        result = sentiment_analyzer(truncated_text)[0]
    except Exception as e:
        print(f"Error processing text: {e}")
        return {'label': None, 'score': None}

    return result

In [213]:
df['sentiment_transformer'] = df['reviewText'].progress_apply(analyze_with_transformer)

100%|██████████| 12000/12000 [22:34<00:00,  8.86it/s]


In [220]:
labels = []
scores = []

for sentiment in df['sentiment_transformer']:

    if isinstance(sentiment, list) and len(sentiment) > 0 and isinstance(sentiment[0], dict):
        sentiment_dict = sentiment[0]
        labels.append(sentiment_dict.get('label', None))
        scores.append(sentiment_dict.get('score', None))

    else:
        labels.append(None)
        scores.append(None)

df['sentiment_label_transformer'] = labels
df['sentiment_score_transformer'] = scores

In [219]:
df[['reviewToken','reviewText','rating', 'sentiment_label_transformer', 'sentiment_score_transformer','sentiment_transformer']].head(15)

Unnamed: 0,reviewToken,reviewText,rating,sentiment_label_transformer,sentiment_score_transformer,sentiment_transformer
0,"[great, product, want, works, great, stylish]",Great product... just what I wanted. Works gr...,5.0,,,"{'label': 'POSITIVE', 'score': 0.999874472618103}"
1,"[see, popularity, shoe, decide, test, impresse...","After seeing the popularity of this shoe, I de...",5.0,,,"{'label': 'POSITIVE', 'score': 0.9997813105583..."
2,"[nervousness, scent, ive, never, try, love, pa...",I was nervousness about the scent because IVe ...,5.0,,,"{'label': 'POSITIVE', 'score': 0.9988742470741..."
3,"[love, smell, bit, expensive, buy, often, woul...","I LOVE the smell. A bit expensive, so I cant b...",5.0,,,"{'label': 'POSITIVE', 'score': 0.999458372592926}"
4,"[found, stuff, japan, wonder, could, find, 3, ...",Found this stuff in Japan and wondered if I co...,5.0,,,"{'label': 'POSITIVE', 'score': 0.9982933402061..."
5,"[never, use, tinted, sunscreen, million, sure,...",I had never used a tinted sunscreen before and...,5.0,,,"{'label': 'POSITIVE', 'score': 0.9801861047744..."
6,"[quick, tranasaction, love, shoe, would, purch...",Quick tranasaction!!! Loved the shoes!!! Wou...,5.0,,,"{'label': 'POSITIVE', 'score': 0.9996722936630..."
7,"[husband, wear, first, pair, death, lawn, mowi...",My husband wore his first pair to death. They ...,5.0,,,"{'label': 'POSITIVE', 'score': 0.9984630346298..."
8,"[great, product, great, price]","Great Product, Great Price!",5.0,,,"{'label': 'POSITIVE', 'score': 0.9998722076416..."
9,[],just do it :),5.0,,,"{'label': 'POSITIVE', 'score': 0.9526675343513..."


In [216]:
fig = px.box(df, x="category", y="rating", color="sentiment_label_transformer", title="Rating Boxplot by Category and Sentiment")
fig.show()

In [222]:
output_path = '/content/drive/MyDrive/DOPP_Ex2_data/reviews_sentiment.csv'
df.to_csv(output_path, index=False)