# Importing Dependencies and Reading data

In [25]:
import pandas as pd

In [26]:
df = pd.read_csv("amazon_review.csv")

# Exploring the data

In [27]:
df.head()

Unnamed: 0,Index,Name,review
0,0,Jamir,I�m not a phone fanatic but here is my simple ...
1,1,Drieza,Coming from android to iphone its just phenome...
2,2,RamLagan Yadav,iPhone 13 is very bad no response of company i...
3,3,vineet vs,"I took this phone in Feb , after using it for ..."
4,4,Vipul,Man undoubtedly most satisfying phone on the p...


In [28]:
df.shape

(97, 3)

In [29]:
df.columns

Index(['Index', 'Name', 'review'], dtype='object')

# Data cleaning

Removing duplicates

In [30]:
def remove_duplicates(df, column_name):
    df.drop_duplicates(subset=[column_name], inplace=True)
    df.reset_index(drop=True, inplace=True)
    return df
df = remove_duplicates(df, 'review')

In [31]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
nltk_data_path = nltk.data.path[0]
nltk.data.path.append(nltk_data_path)


def preprocess_text(text):

    # Convert text to lowercase
    text = text.lower()

    # Remove non-alphanumeric characters
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)

    # Tokenize the text into individual words
    tokens = nltk.word_tokenize(text)

    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]

    # Lemmatize words to their base form
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]

    # Join tokens back into a single string
    preprocessed_text = ' '.join(tokens)

    return preprocessed_text

In [32]:
df['preprocessed_review'] = df['review'].apply(preprocess_text)

In [33]:
df.head()

Unnamed: 0,Index,Name,review,preprocessed_review
0,0,Jamir,I�m not a phone fanatic but here is my simple ...,im phone fanatic simple honest review
1,1,Drieza,Coming from android to iphone its just phenome...,coming android iphone phenomenal buttery smoot...
2,2,RamLagan Yadav,iPhone 13 is very bad no response of company i...,iphone 13 bad response company iam sad
3,3,vineet vs,"I took this phone in Feb , after using it for ...",took phone feb using month almost good come ca...
4,4,Vipul,Man undoubtedly most satisfying phone on the p...,man undoubtedly satisfying phone planet


In [34]:
df.to_csv("preprocessed_review_data.csv")

# Labelling Data

Sentiment scorer used: VADER

In [35]:
pip install vaderSentiment

Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'C:\Users\USER\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip' command.


In [36]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [37]:
# Create an instance of the SentimentIntensityAnalyzer class
analyzer = SentimentIntensityAnalyzer()

In [38]:
compound_score=[]
positive_score=[]
negative_score=[]
neutral_score=[]
texts=df['preprocessed_review']

In [39]:
# Iterate through the list of texts
for text in texts:
    # Analyze sentiment using VADER
    sentiment_scores = analyzer.polarity_scores(text)
    
    # Access sentiment scores
    compound_score.append(sentiment_scores['compound'])
    positive_score.append(sentiment_scores['pos'])
    negative_score.append(sentiment_scores['neg'])
    neutral_score.append(sentiment_scores['neu'])

Labelling: 1, -1, 0

In [45]:
label=[]

# Iterate through the list of tweets
for review in df['review']:
    # Analyze sentiment using VADER
    sentiment_scores = analyzer.polarity_scores(review)
    
    # Assign sentiment label based on compound score
    compound_score = sentiment_scores['compound']
    if compound_score >= 0:
        label.append(1)
    else:
        label.append(0)

In [46]:
df['label']=label

In [47]:
df.head()

Unnamed: 0,Index,Name,review,preprocessed_review,label
0,0,Jamir,I�m not a phone fanatic but here is my simple ...,im phone fanatic simple honest review,1
1,1,Drieza,Coming from android to iphone its just phenome...,coming android iphone phenomenal buttery smoot...,1
2,2,RamLagan Yadav,iPhone 13 is very bad no response of company i...,iphone 13 bad response company iam sad,0
3,3,vineet vs,"I took this phone in Feb , after using it for ...",took phone feb using month almost good come ca...,0
4,4,Vipul,Man undoubtedly most satisfying phone on the p...,man undoubtedly satisfying phone planet,1


In [48]:
df.to_csv('labelled.csv')