In [109]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from IPython.display import clear_output
import time
import torch
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSequenceClassification

## Sentiment analysis

In [None]:
# Load model directly

tokenizer = AutoTokenizer.from_pretrained("lxyuan/distilbert-base-multilingual-cased-sentiments-student")
model = AutoModelForSequenceClassification.from_pretrained("lxyuan/distilbert-base-multilingual-cased-sentiments-student")

inputs = tokenizer(list(df2.review_combined.values[0]), return_tensors="pt", padding=True, truncation=True, max_length=512)

with torch.no_grad():  # Disable gradient calculation for inference
    outputs = model(**inputs)
    logits = outputs.logits

probabilities = torch.softmax(logits, dim=1)
predicted_class = torch.argmax(probabilities, dim=1)

In [None]:
distilled_student_sentiment_classifier = pipeline(
    model="lxyuan/distilbert-base-multilingual-cased-sentiments-student", 
    return_all_scores=True
)

distilled_student_sentiment_classifier(list(df2.review_combined.values[:1000]))

In [None]:
label = {0.0:'positive',1.0:'neutral',2.0:'negative'}
def extract_sentiment(row):
    inputs = tokenizer(row['review_combined'], return_tensors="pt", padding=True, truncation=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
    probabilities = torch.softmax(logits, dim=1)
    predicted_class = torch.argmax(probabilities, dim=1).tolist()[0]

    return label[predicted_class], probabilities.tolist()[0][predicted_class]

### Beauty products

In [161]:
df = pd.read_csv(r"C:\Users\91962\Downloads\Course Material\CS9873-BrainInspiredAI\processed_expanded_stratified_reviews_beauty_v1.tsv", 
sep= '\t', on_bad_lines= 'skip')
df.dropna(subset= ['review_combined'], inplace= True)

In [116]:
df.head()

Unnamed: 0,customer_id,review_id,product_id,product_title,product_category,star_rating,helpful_votes,vine,verified_purchase,review_combined
0,16802621,r2ri5t1qdurkk1,b00dsc9yd6,cerave facial moisturizing lotion pm 3 oz (pac...,beauty,5,3,n,y,great product for oily and sensitive skin
1,16802621,r2ri5t1qdurkk1,b00dsc9yd6,cerave facial moisturizing lotion pm 3 oz (pac...,beauty,5,3,n,y,"i love this lotion, gentle and does not cause ..."
2,16802621,r2ri5t1qdurkk1,b00dsc9yd6,cerave facial moisturizing lotion pm 3 oz (pac...,beauty,5,3,n,y,i have very sensitive oily skin and it does no...
3,15727572,r2acmmb1u2wd04,b00koj8zpy,"instyler wet to dry rotating iron, pink, 1-1/4...",beauty,5,0,n,y,five stars
4,15727572,r2acmmb1u2wd04,b00koj8zpy,"instyler wet to dry rotating iron, pink, 1-1/4...",beauty,5,0,n,y,ok


In [152]:
sentiments = df.apply(lambda row: extract_sentiment(row), axis=1, result_type= 'expand')
sentiments.head()

Unnamed: 0,0,1
0,positive,0.943549
1,positive,0.895411
2,negative,0.46459
3,positive,0.840872
4,positive,0.673849


In [162]:
df = pd.concat([df, sentiments], axis= 1)
df.rename(columns= {0:'sentiment_label',1:'sentiment_score'}, inplace= True)
df.head()

Unnamed: 0,customer_id,review_id,product_id,product_title,product_category,star_rating,helpful_votes,vine,verified_purchase,review_combined,sentiment_label,sentiment_score
0,16802621,r2ri5t1qdurkk1,b00dsc9yd6,cerave facial moisturizing lotion pm 3 oz (pac...,beauty,5,3,n,y,great product for oily and sensitive skin,positive,0.943549
1,16802621,r2ri5t1qdurkk1,b00dsc9yd6,cerave facial moisturizing lotion pm 3 oz (pac...,beauty,5,3,n,y,"i love this lotion, gentle and does not cause ...",positive,0.895411
2,16802621,r2ri5t1qdurkk1,b00dsc9yd6,cerave facial moisturizing lotion pm 3 oz (pac...,beauty,5,3,n,y,i have very sensitive oily skin and it does no...,negative,0.46459
3,15727572,r2acmmb1u2wd04,b00koj8zpy,"instyler wet to dry rotating iron, pink, 1-1/4...",beauty,5,0,n,y,five stars,positive,0.840872
4,15727572,r2acmmb1u2wd04,b00koj8zpy,"instyler wet to dry rotating iron, pink, 1-1/4...",beauty,5,0,n,y,ok,positive,0.673849


In [164]:
df.to_csv(r"C:\Users\91962\Downloads\Course Material\CS9873-BrainInspiredAI\processed_expanded_stratified_reviews_beauty_v1_with_sentiments.tsv", sep= '\t',
index=False)

### Health and Personal products

In [176]:
df = pd.read_csv(r"C:\Users\91962\Downloads\Course Material\CS9873-BrainInspiredAI\processed_expanded_stratified_reviews_health_v1.tsv", 
sep= '\t', on_bad_lines= 'skip')
df.dropna(subset= ['review_combined'], inplace= True)

In [178]:
sentiments = df.apply(lambda row: extract_sentiment(row), axis=1, result_type= 'expand')
sentiments.head()

Unnamed: 0,0,1
0,positive,0.487475
1,positive,0.844414
2,positive,0.917963
3,positive,0.542714
4,positive,0.917963


In [179]:
df = pd.concat([df, sentiments], axis= 1)
df.rename(columns= {0:'sentiment_label',1:'sentiment_score'}, inplace= True)
df.head()

Unnamed: 0,customer_id,review_id,product_id,product_title,product_category,star_rating,helpful_votes,vine,verified_purchase,review_combined,sentiment_label,sentiment_score
0,15722143,r2i2pv61y4w6wr,b001bknwfi,bacitraycin plus size 1z,health & personal care,5,0,n,y,first aid kit must have,positive,0.487475
1,15722143,r2i2pv61y4w6wr,b001bknwfi,bacitraycin plus size 1z,health & personal care,5,0,n,y,provides excellent protection from infection w...,positive,0.844414
2,13990039,r3f7uocqloo9v4,b002vwjz6u,drive medical economy removable top i. v. pole...,health & personal care,4,2,n,y,well made and works well,positive,0.917963
3,13990039,r3f7uocqloo9v4,b002vwjz6u,drive medical economy removable top i. v. pole...,health & personal care,4,2,n,y,i needed this to,positive,0.542714
4,13990039,r3f7uocqloo9v4,b002vwjz6u,drive medical economy removable top i. v. pole...,health & personal care,4,2,n,y,well made and works well,positive,0.917963


In [180]:
df.to_csv(r"C:\Users\91962\Downloads\Course Material\CS9873-BrainInspiredAI\processed_expanded_stratified_reviews_health_v1_with_sentiments.tsv", sep= '\t',
index=False)

### Pet products

In [181]:
df = pd.read_csv(r"C:\Users\91962\Downloads\Course Material\CS9873-BrainInspiredAI\processed_expanded_stratified_reviews_pet_v1.tsv", 
sep= '\t', on_bad_lines= 'skip')
df.dropna(subset= ['review_combined'], inplace= True)

In [182]:
sentiments = df.apply(lambda row: extract_sentiment(row), axis=1, result_type= 'expand')
sentiments.head()

Unnamed: 0,0,1
0,positive,0.427189
1,positive,0.371213
2,positive,0.935705
3,positive,0.472556
4,positive,0.74644


In [183]:
df = pd.concat([df, sentiments], axis= 1)
df.rename(columns= {0:'sentiment_label',1:'sentiment_score'}, inplace= True)
df.head()

Unnamed: 0,customer_id,review_id,product_id,product_title,product_category,star_rating,helpful_votes,vine,verified_purchase,review_combined,sentiment_label,sentiment_score
0,1713538,rxfiy0irg9f79,b00cb8dy1g,p.l soft sponge strawberry small cotton soft d...,pet products,3,0,n,y,to small,positive,0.427189
1,1713538,rxfiy0irg9f79,b00cb8dy1g,p.l soft sponge strawberry small cotton soft d...,pet products,3,0,n,y,it was to small for my pet,positive,0.371213
2,52422157,r2qcw7x0dcb826,b00cd535lq,dog glucosamine for dogs with chondroitin & ms...,pet products,4,1,n,n,our dogs love these chews,positive,0.935705
3,52422157,r2qcw7x0dcb826,b00cd535lq,dog glucosamine for dogs with chondroitin & ms...,pet products,4,1,n,n,i have to say i was concerned at first when i ...,positive,0.472556
4,27426860,r1mam2vv5z49jw,b0018mm8rc,petnation port-a-crate e2 indoor/outdoor pet home,pet products,3,2,n,y,quality and design flaws found,positive,0.74644


In [184]:
df.to_csv(r"C:\Users\91962\Downloads\Course Material\CS9873-BrainInspiredAI\processed_expanded_stratified_reviews_pet_v1_with_sentiments.tsv", sep= '\t',
index=False)