In [5]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
import warnings
warnings.filterwarnings("ignore")

In [6]:
# Load the cleaned dataset
df = pd.read_csv('cleaned_reviews.csv')

In [7]:
df.head()

Unnamed: 0,userName,content,score,thumbsUpCount,at
0,sateesh gunda,CheatingI cancel subscription 2days even thoug...,1,0,2024-09-21 16:07:04
1,Alexis Harber,Im echo everyone elses sentiment new update la...,1,0,2024-09-21 16:03:47
2,Carly-jade Howard,really like great bdays,3,0,2024-09-21 15:39:27
3,Bryan Shackelford,App keep crash open crash cant even use guess ...,1,1,2024-09-21 15:28:41
4,Tim Tucker,messed accurate go website get accurate depict...,2,1,2024-09-21 15:19:33


In [8]:
# Initialize the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("LiYuan/amazon-review-sentiment-analysis")
model = AutoModelForSequenceClassification.from_pretrained("LiYuan/amazon-review-sentiment-analysis")

In [9]:
def get_sentiment(text):
    # Truncate the text to a maximum length of 512 tokens
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    outputs = model(**inputs)
    sentiment = outputs.logits.argmax(dim=1).item()
    return 'POSITIVE' if sentiment == 1 else 'NEGATIVE'

In [10]:
# Create a new column for the sentiment of each review
df['sentiment'] = df['content'].apply(get_sentiment)

In [11]:
# Split the data into training and testing sets
train_df = df.sample(frac=0.8, random_state=0)
test_df = df.drop(train_df.index)

In [13]:
# Analyze positive aspects in negative reviews
negative_reviews = train_df[train_df['sentiment'] == 'NEGATIVE']
positive_aspects = negative_reviews[negative_reviews['score'] > 3]['content']

In [14]:
# Analyze negative aspects in positive reviews
positive_reviews = train_df[train_df['sentiment'] == 'POSITIVE']
negative_aspects = positive_reviews[positive_reviews['score'] < 3]['content']

In [15]:
# View the results
print('Negative Aspects in Positive Reviews:')
print(positive_aspects)

Negative Aspects in Positive Reviews:
4493                 love Amazon great price return easily
19774    Great offer free delivery trial dont enough ex...
41971    love Amazon app Ive good experience Amazon eve...
46115    Solid app Works every time Simple easy access ...
56053    Love app whatever look find time cheap anywher...
                               ...                        
45064    use amazon pretty much purchase convenient cus...
52457    Good app thumb Would give 5 rating extremely d...
55870    bad app forget password pain change wont let c...
45743        Good price great deal right door thank amazon
52100    amazon app really easy useful use different va...
Name: content, Length: 15599, dtype: object


In [16]:
print('Postivie Aspects in Negative Reviews:')
print(negative_aspects)

Postivie Aspects in Negative Reviews:
31316    app way well doesnt support Samsung s8 2 year ...
42060    clunky really difficult sort filter feel prett...
24998    Deals page Lightning Deals etc frequently fail...
54987              recent update app keep lag really annoy
11744    Ill still use Amazon Ill never go pay Amazon p...
                               ...                        
21578    Ok app great package come house great stuff gi...
21187    Updating review 2 star recently Amazon shoppin...
32162    Im put two star whenever open app look somethi...
2363     cant apply filter search app quite slow lately...
44823    Much slow previous version Menu bottom take fo...
Name: content, Length: 125, dtype: object


In [17]:
# Save the positive and negative aspects to CSV files
# positive_aspects.to_csv('positive_aspects.csv', index=False)
# negative_aspects.to_csv('negative_aspects.csv', index=False)