In [1]:
def processRow(row):
    import re
    import nltk
    from textblob import TextBlob
    from nltk.corpus import stopwords
    from nltk.stem import PorterStemmer
    from textblob import Word
    from nltk.util import ngrams
    from nltk.tokenize import word_tokenize


    #Lower case
    row.lower()

    #Removes unicode strings like "\u002c"  -> ,(comma)
    row= re.sub(r'(\\u[0-9A-Fa-f]+)',r'', row)

    # Removes non-ascii characters. note : \x00 to \x7f is 00 to 255
    # non-ascii characters like copyrigth symbol, trademark symbol
    row = re.sub(r'[^\x00-\x7f]',r'',row)

    #convert any url to URL
    row = re.sub('((www\.[^\s]+)|(https?://[^\s]+))','URL',row)

    #Convert any @Username to "AT_USER"
    row = re.sub('@[^\s]+','AT_USER',row)

    #Remove additional white spaces
    row = re.sub('[\s]+', ' ', row)
    row = re.sub('[\n]+', ' ', row)

    #Remove not alphanumeric symbols white spaces
    row = re.sub(r'[^\w]', ' ', row)

    #Removes hastag in front of a word """
    row = re.sub(r'#([^\s]+)', r'\1', row)

    #Replace #word with word
    row = re.sub(r'#([^\s]+)', r'\1', row)

    #Removes all possible emoticons
    row = re.sub(':\)|:\(|:\)|;\)|:-\)|\(-:|:-D|=D|:P|xD|X-p|\^\^|:-|\^\.\^|\^\-\^|\^\\^|\,-\)|\)-:|:\'\(|:\(|:-\(|:\S|T\.T|\.\\.|:<|:-\S|:-<|\\-\*|:O|=O|=\-O|O\.o|XO|O\_O|:-\@|=/|:/|X\-\(|>\.<|>=\(|D:', '', row)

    #remove numbers -> this is optional
    row = ''.join([i for i in row if not i.isdigit()])

    #remove multiple exclamation -> this is optional
    row = re.sub(r"(\!)\1+", ' ', row)

    #remove multiple question marks -> this is optional
    row = re.sub(r"(\?)\1+", ' ', row)

    #remove multistop -> this is optional
    row = re.sub(r"(\.)\1+", ' ', row)

    #trim
    row = row.strip('\'"')

    #lemma
    from textblob import Word
    row =" ".join([Word(word).lemmatize() for word in row.split()])

    #stemmer
    #st = PorterStemmer()
    #row=" ".join([st.stem(word) for word in row.split()])


    return row

In [2]:
import pandas 
df_review = pandas.read_csv('amazon_alexa.csv',sep='\t')
df_review.head()

Unnamed: 0,rating,date,variation,verified_reviews,feedback
0,5,31-Jul-18,Charcoal Fabric,Love my Echo!,1
1,5,31-Jul-18,Charcoal Fabric,Loved it!,1
2,4,31-Jul-18,Walnut Finish,"Sometimes while playing a game, you can answer...",1
3,5,31-Jul-18,Charcoal Fabric,I have had a lot of fun with this thing. My 4 ...,1
4,5,31-Jul-18,Charcoal Fabric,Music,1


In [3]:
df_review['feedback'].value_counts()

1    2893
0     257
Name: feedback, dtype: int64

In [5]:
pip install textblob

Collecting textblob
  Downloading textblob-0.18.0.post0-py3-none-any.whl (626 kB)
     -------------------------------------- 626.3/626.3 kB 4.4 MB/s eta 0:00:00
Collecting nltk>=3.8
  Downloading nltk-3.8.1-py3-none-any.whl (1.5 MB)
     ---------------------------------------- 1.5/1.5 MB 10.6 MB/s eta 0:00:00
Installing collected packages: nltk, textblob
  Attempting uninstall: nltk
    Found existing installation: nltk 3.7
    Uninstalling nltk-3.7:
      Successfully uninstalled nltk-3.7
Successfully installed nltk-3.8.1 textblob-0.18.0.post0
Note: you may need to restart the kernel to use updated packages.


In [6]:
#clean your verified_reviews
cleaned_verified_reviews = []

for line in df_review['verified_reviews']:
    cleanLine = processRow(line)
    cleaned_verified_reviews.append(cleanLine)
    
import numpy as np
df_review['cleaned_verified_reviews'] = np.asarray(cleaned_verified_reviews)

df_review.head(5)

Unnamed: 0,rating,date,variation,verified_reviews,feedback,cleaned_verified_reviews
0,5,31-Jul-18,Charcoal Fabric,Love my Echo!,1,Love my Echo
1,5,31-Jul-18,Charcoal Fabric,Loved it!,1,Loved it
2,4,31-Jul-18,Walnut Finish,"Sometimes while playing a game, you can answer...",1,Sometimes while playing a game you can answer ...
3,5,31-Jul-18,Charcoal Fabric,I have had a lot of fun with this thing. My 4 ...,1,I have had a lot of fun with this thing My yr ...
4,5,31-Jul-18,Charcoal Fabric,Music,1,Music


In [7]:
#Lets define our senti9ment analyzer function:
from textblob import TextBlob

def analyze_sentiment(cleaned_verified_reviews):
    analysis = TextBlob(cleaned_verified_reviews)
    if analysis.sentiment.polarity > 0:
        return 'Positive'
    elif analysis.sentiment.polarity == 0:
        return 'Neutral'
    else:
        return 'Negative'

In [8]:
#Lets find the sentiment by calling the above defnn fn
#create a new colunn called 'Sentiment'
df_review['Sentiment'] = df_review['cleaned_verified_reviews'].apply(lambda x: analyze_sentiment(x))

df_review[['cleaned_verified_reviews','Sentiment']].head(3)

Unnamed: 0,cleaned_verified_reviews,Sentiment
0,Love my Echo,Positive
1,Loved it,Positive
2,Sometimes while playing a game you can answer ...,Neutral


In [None]:
#no.