<a href="https://colab.research.google.com/github/kavyaaaa16/Sentiment_analysis/blob/main/Labelling_reviews.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import torch
print(torch.__version__)


2.6.0+cu124


In [2]:
import transformers
print(transformers.__version__)


4.52.4


In [9]:
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax
from tqdm import tqdm

# Load dataset
df = pd.read_csv("/content/all_kindle_review .csv")
df = df.dropna(subset=['summary'])

# Load model + tokenizer
MODEL = "cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL)

# Sentiment label map
labels = ['Negative', 'Neutral', 'Positive']

# Sentiment function
def get_sentiment(text):
    try:
        encoded_input = tokenizer(text, return_tensors='pt', truncation=True, max_length=512)
        with torch.no_grad():
            output = model(**encoded_input)
        scores = softmax(output.logits.numpy()[0])
        return labels[scores.argmax()]
    except:
        return "Unknown"

# Apply with progress bar
tqdm.pandas()
df['sentiment'] = df['reviewText'].progress_apply(get_sentiment)

# Save to CSV
df.to_csv("labeled_book_summaries_roberta.csv", index=False)

# Preview
print(df[['summary', 'sentiment']].head())


100%|██████████| 11998/11998 [1:32:52<00:00,  2.15it/s]


                    summary sentiment
0  Entertaining But Average   Neutral
1   Terrific menage scenes!  Positive
2          Snapdragon Alley  Positive
3    very light murder cozy  Positive
4                      Book  Positive


In [15]:
df

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,asin,helpful,rating,reviewText,reviewTime,reviewerID,reviewerName,summary,unixReviewTime,sentiment
0,0,11539,B0033UV8HI,"[8, 10]",3,"Jace Rankin may be short, but he's nothing to ...","09 2, 2010",A3HHXRELK8BHQG,Ridley,Entertaining But Average,1283385600,LABEL_1
1,1,5957,B002HJV4DE,"[1, 1]",5,Great short read. I didn't want to put it dow...,"10 8, 2013",A2RGNZ0TRF578I,Holly Butler,Terrific menage scenes!,1381190400,LABEL_2
2,2,9146,B002ZG96I4,"[0, 0]",3,I'll start by saying this is the first of four...,"04 11, 2014",A3S0H2HV6U1I7F,Merissa,Snapdragon Alley,1397174400,LABEL_1
3,3,7038,B002QHWOEU,"[1, 3]",3,Aggie is Angela Lansbury who carries pocketboo...,"07 5, 2014",AC4OQW3GZ919J,Cleargrace,very light murder cozy,1404518400,LABEL_1
4,4,1776,B001A06VJ8,"[0, 1]",4,I did not expect this type of book to be in li...,"12 31, 2012",A3C9V987IQHOQD,Rjostler,Book,1356912000,LABEL_1
...,...,...,...,...,...,...,...,...,...,...,...,...
11995,11995,2183,B001DUGORO,"[0, 0]",4,Valentine cupid is a vampire- Jena and Ian ano...,"02 28, 2014",A1OKS5Q1HD8WQC,lisa jon jung,jena,1393545600,LABEL_1
11996,11996,6272,B002JCSFSQ,"[2, 2]",5,I have read all seven books in this series. Ap...,"05 16, 2011",AQRSPXLNEQAMA,TerryLP,Peacekeepers Series,1305504000,LABEL_1
11997,11997,12483,B0035N1V7K,"[0, 1]",3,This book really just wasn't my cuppa. The si...,"07 26, 2013",A2T5QLT5VXOJAK,hwilson,a little creepy,1374796800,LABEL_0
11998,11998,3640,B001W1XT40,"[1, 2]",1,"tried to use it to charge my kindle, it didn't...","09 17, 2013",A28MHD2DDY6DXB,"Allison A. Slater ""Gryphon50""",didn't work,1379376000,LABEL_0


In [11]:
df[['reviewText','summary','sentiment']]

Unnamed: 0,reviewText,summary,sentiment
0,"Jace Rankin may be short, but he's nothing to ...",Entertaining But Average,Neutral
1,Great short read. I didn't want to put it dow...,Terrific menage scenes!,Positive
2,I'll start by saying this is the first of four...,Snapdragon Alley,Positive
3,Aggie is Angela Lansbury who carries pocketboo...,very light murder cozy,Positive
4,I did not expect this type of book to be in li...,Book,Positive
...,...,...,...
11995,Valentine cupid is a vampire- Jena and Ian ano...,jena,Positive
11996,I have read all seven books in this series. Ap...,Peacekeepers Series,Positive
11997,This book really just wasn't my cuppa. The si...,a little creepy,Negative
11998,"tried to use it to charge my kindle, it didn't...",didn't work,Negative


In [18]:
label_map = {
    'LABEL_0': 'Negative',
    'LABEL_1': 'Neutral',
    'LABEL_2': 'Positive'
}

df['sentiment'] = df['sentiment'].map(label_map)


In [20]:
df

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,asin,helpful,rating,reviewText,reviewTime,reviewerID,reviewerName,summary,unixReviewTime,sentiment
0,0,11539,B0033UV8HI,"[8, 10]",3,"Jace Rankin may be short, but he's nothing to ...","09 2, 2010",A3HHXRELK8BHQG,Ridley,Entertaining But Average,1283385600,
1,1,5957,B002HJV4DE,"[1, 1]",5,Great short read. I didn't want to put it dow...,"10 8, 2013",A2RGNZ0TRF578I,Holly Butler,Terrific menage scenes!,1381190400,
2,2,9146,B002ZG96I4,"[0, 0]",3,I'll start by saying this is the first of four...,"04 11, 2014",A3S0H2HV6U1I7F,Merissa,Snapdragon Alley,1397174400,
3,3,7038,B002QHWOEU,"[1, 3]",3,Aggie is Angela Lansbury who carries pocketboo...,"07 5, 2014",AC4OQW3GZ919J,Cleargrace,very light murder cozy,1404518400,
4,4,1776,B001A06VJ8,"[0, 1]",4,I did not expect this type of book to be in li...,"12 31, 2012",A3C9V987IQHOQD,Rjostler,Book,1356912000,
...,...,...,...,...,...,...,...,...,...,...,...,...
11995,11995,2183,B001DUGORO,"[0, 0]",4,Valentine cupid is a vampire- Jena and Ian ano...,"02 28, 2014",A1OKS5Q1HD8WQC,lisa jon jung,jena,1393545600,
11996,11996,6272,B002JCSFSQ,"[2, 2]",5,I have read all seven books in this series. Ap...,"05 16, 2011",AQRSPXLNEQAMA,TerryLP,Peacekeepers Series,1305504000,
11997,11997,12483,B0035N1V7K,"[0, 1]",3,This book really just wasn't my cuppa. The si...,"07 26, 2013",A2T5QLT5VXOJAK,hwilson,a little creepy,1374796800,
11998,11998,3640,B001W1XT40,"[1, 2]",1,"tried to use it to charge my kindle, it didn't...","09 17, 2013",A28MHD2DDY6DXB,"Allison A. Slater ""Gryphon50""",didn't work,1379376000,


In [22]:
df.to_csv("labeled_book_summaries.csv", index=False)
