In [120]:
#read file
import pandas as pd
import re
import numpy as np
import nltk
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.stem.porter import PorterStemmer
from wordcloud import WordCloud, STOPWORDS
import matplotlib.pyplot as plt

In [121]:
from sklearn.feature_extraction.text import TfidfTransformer, TfidfVectorizer

In [122]:
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [123]:
df=pd.read_excel('PhoneReviews.xlsx')
df.head(3)

Unnamed: 0,Product Name,Brand Name,Price,Rating,Reviews,Review Votes
0,"""CLEAR CLEAN ESN"" Sprint EPIC 4G Galaxy SPH-D7...",Samsung,199.99,5,I feel so LUCKY to have found this used (phone...,1
1,"""CLEAR CLEAN ESN"" Sprint EPIC 4G Galaxy SPH-D7...",Samsung,199.99,4,"nice phone, nice up grade from my pantach revu...",0
2,"""CLEAR CLEAN ESN"" Sprint EPIC 4G Galaxy SPH-D7...",Samsung,199.99,5,Very pleased,0


In [124]:
def categories(row):
  if row['Rating']==5:
    return "very happy"
  elif row['Rating']==4:
    return "happy"
  elif row['Rating']==3:
    return "neutral"
  elif row['Rating']==2:
    return "unhappy"
  return "very unhappy"

In [125]:
df['categories']=df.apply(lambda row:categories(row),axis=1)

In [126]:
PhoneReviews_df=df.drop(['Product Name','Brand Name','Price','Rating','Review Votes'],axis=1)
PhoneReviews_df.head(3)

Unnamed: 0,Reviews,categories
0,I feel so LUCKY to have found this used (phone...,very happy
1,"nice phone, nice up grade from my pantach revu...",happy
2,Very pleased,very happy


In [127]:
corpus=np.array(PhoneReviews_df['Reviews'])
corpus

array(["I feel so LUCKY to have found this used (phone to us & not used hard at all), phone on line from someone who upgraded and sold this one. My Son liked his old one that finally fell apart after 2.5+ years and didn't want an upgrade!! Thank you Seller, we really appreciate it & your honesty re: said used phone.I recommend this seller very highly & would but from them again!!",
       'nice phone, nice up grade from my pantach revue. Very clean set up and easy set up. never had an android phone but they are fantastic to say the least. perfect size for surfing and social media. great phone samsung',
       'Very pleased',
       'It works good but it goes slow sometimes but its a very good phone I love it',
       'Great phone to replace my lost phone. The only thing is the volume up button does not work, but I can still go into settings to adjust. Other than that, it does the job until I am eligible to upgrade my phone again.Thaanks!',
       'I already had a phone with problems...

In [128]:
wpt=nltk.WordPunctTokenizer()
stop_words=nltk.corpus.stopwords.words('english')

In [129]:
def normalize_document(doc):
  doc=re.sub(r'\W',' ',str(doc))
  doc=re.sub(r'http\S+',' ',str(doc))
  doc=doc.lower()
  doc=re.sub(r'\s+[a-z]\s+',' ',str(doc))
  doc=re.sub(r'^[a-z]\s+',' ',str(doc))
  doc=re.sub(r'\s+',' ',str(doc))
  tokens=wpt.tokenize(doc)
  filtered_tokens=[token for token in tokens if token not in stop_words]
  doc=' '.join(filtered_tokens)
  return doc

In [130]:
cleaned_corpus=PhoneReviews_df.drop(['categories'],axis=1)
cleaned_corpus['Reviews']=cleaned_corpus['Reviews'].apply(normalize_document)
cleaned_corpus

Unnamed: 0,Reviews
0,feel lucky found used phone us used hard phone...
1,nice phone nice grade pantach revue clean set ...
2,pleased
3,works good goes slow sometimes good phone love
4,great phone replace lost phone thing volume bu...
5,already phone problems know stated used dang s...
6,charging port loose got soldered needed new ba...
7,phone looks good stay charged buy new battery ...
8,originally using samsung s2 galaxy sprint want...
9,battery life great responsive touch issue some...


In [131]:
vectorize_corpus=np.vectorize(normalize_document)
vectorize_corpus

<numpy.vectorize at 0x7f0a1b523ee0>

In [132]:
normalized_corpus=vectorize_corpus(corpus)
normalized_corpus

array(['feel lucky found used phone us used hard phone line someone upgraded sold one son liked old one finally fell apart 2 5 years want upgrade thank seller really appreciate honesty said used phone recommend seller highly would',
       'nice phone nice grade pantach revue clean set easy set never android phone fantastic say least perfect size surfing social media great phone samsung',
       'pleased', 'works good goes slow sometimes good phone love',
       'great phone replace lost phone thing volume button work still go settings adjust job eligible upgrade phone thaanks',
       'already phone problems know stated used dang state charge wish would read comments would purchased item cracked side damaged goods trying charge another way work requesting money back get money back signed unhappy customer',
       'charging port loose got soldered needed new battery well 100 later including cost purchase usable phone phone sold state',
       'phone looks good stay charged buy new batt

In [133]:

tv=TfidfVectorizer(min_df=0.2,max_df=0.8,norm='l2',
                   use_idf=True,smooth_idf=True)
tv_matrix=tv.fit_transform(normalized_corpus)
tv_matrix=tv_matrix.toarray()
vocab=tv.get_feature_names_out()
normalized_corpus=pd.DataFrame(np.round(tv_matrix,2),columns=vocab)
normalized_corpus

Unnamed: 0,battery,get,great,issue,one,problems,still,well
0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.66,0.0,0.0,0.0,0.75,0.0
5,0.0,0.71,0.0,0.0,0.0,0.71,0.0,0.0
6,0.71,0.0,0.0,0.0,0.0,0.0,0.0,0.71
7,0.73,0.0,0.0,0.0,0.0,0.0,0.68,0.0
8,0.43,0.0,0.69,0.0,0.0,0.0,0.4,0.43
9,0.52,0.52,0.42,0.52,0.0,0.0,0.0,0.0


In [134]:
normalized_corpus.to_csv('PhoneReviews_normal.csv')

In [135]:
! pip install textblob



In [136]:
from textblob import TextBlob

In [137]:
cleaned_corpus['Reviews']=cleaned_corpus['Reviews'].astype(pd.StringDtype())

In [138]:
cleaned_corpus[['polarity','subjectivity']]=cleaned_corpus['Reviews'].apply(lambda x:pd.Series(TextBlob(x).sentiment))
cleaned_corpus['sentiment']=np.select([cleaned_corpus['polarity']>0,cleaned_corpus['polarity']<0],[1,0])
cleaned_corpus=cleaned_corpus.drop(['polarity','subjectivity'],axis=1)
cleaned_corpus

Unnamed: 0,Reviews,sentiment
0,feel lucky found used phone us used hard phone...,1
1,nice phone nice grade pantach revue clean set ...,1
2,pleased,1
3,works good goes slow sometimes good phone love,1
4,great phone replace lost phone thing volume bu...,1
5,already phone problems know stated used dang s...,0
6,charging port loose got soldered needed new ba...,1
7,phone looks good stay charged buy new battery ...,1
8,originally using samsung s2 galaxy sprint want...,1
9,battery life great responsive touch issue some...,1


In [139]:
cleaned_corpus.to_csv('Sentiments.csv')