In [5]:
import pandas as pd
import numpy as np
from textblob import TextBlob

In [2]:
df = pd.read_csv('amazon_alexa.tsv', delimiter = '\t')
df.head()

Unnamed: 0,rating,date,variation,verified_reviews,feedback
0,5,31-Jul-18,Charcoal Fabric,Love my Echo!,1
1,5,31-Jul-18,Charcoal Fabric,Loved it!,1
2,4,31-Jul-18,Walnut Finish,"Sometimes while playing a game, you can answer...",1
3,5,31-Jul-18,Charcoal Fabric,I have had a lot of fun with this thing. My 4 ...,1
4,5,31-Jul-18,Charcoal Fabric,Music,1


In [3]:
df.shape

(3150, 5)

In [4]:
# Part of speech dictionary

pos = {'noun' : ['NN','NNS','NNP','NNPS'],
    'pron' : ['PRP','PRP$','WP','WP$'],
    'verb' : ['VB','VBD','VBG','VBN','VBP','VBZ'],
    'adj' :  ['JJ','JJR','JJS'],
    'adv' : ['RB','RBR','RBS','WRB']}

In [6]:
# Function to check part of speech tag count of a word in a sentence

def pos_check(x, flag):
    count = 0
    try:
        w = Textblob(x)
        for i in w.tags:
            ppo = list(i)[1]
            if ppo in pos[flag]:
                count = count + 1
    except:
        pass
    return count

In [8]:
# Counting number of nouns in the text

df['noun_count'] = df['verified_reviews'].apply(lambda x: pos_check(x, 'noun'))
df.head()

Unnamed: 0,rating,date,variation,verified_reviews,feedback,noun_count
0,5,31-Jul-18,Charcoal Fabric,Love my Echo!,1,0
1,5,31-Jul-18,Charcoal Fabric,Loved it!,1,0
2,4,31-Jul-18,Walnut Finish,"Sometimes while playing a game, you can answer...",1,0
3,5,31-Jul-18,Charcoal Fabric,I have had a lot of fun with this thing. My 4 ...,1,0
4,5,31-Jul-18,Charcoal Fabric,Music,1,0


In [9]:
# Counting number of verbs in the text

df['verb_count'] = df['verified_reviews'].apply(lambda x: pos_check(x, 'verb'))
df.head()

Unnamed: 0,rating,date,variation,verified_reviews,feedback,noun_count,verb_count
0,5,31-Jul-18,Charcoal Fabric,Love my Echo!,1,0,0
1,5,31-Jul-18,Charcoal Fabric,Loved it!,1,0,0
2,4,31-Jul-18,Walnut Finish,"Sometimes while playing a game, you can answer...",1,0,0
3,5,31-Jul-18,Charcoal Fabric,I have had a lot of fun with this thing. My 4 ...,1,0,0
4,5,31-Jul-18,Charcoal Fabric,Music,1,0,0


In [13]:
# Counting adjectives in the text

df['adj_count'] = df['verified_reviews'].apply(lambda x: pos_check(x, 'adj'))
df.head()

Unnamed: 0,rating,date,variation,verified_reviews,feedback,noun_count,verb_count,adj_count
0,5,31-Jul-18,Charcoal Fabric,Love my Echo!,1,0,0,0
1,5,31-Jul-18,Charcoal Fabric,Loved it!,1,0,0,0
2,4,31-Jul-18,Walnut Finish,"Sometimes while playing a game, you can answer...",1,0,0,0
3,5,31-Jul-18,Charcoal Fabric,I have had a lot of fun with this thing. My 4 ...,1,0,0,0
4,5,31-Jul-18,Charcoal Fabric,Music,1,0,0,0


In [14]:
# Counting adverbs in the text

df['adv_count'] = df['verified_reviews'].apply(lambda x: pos_check(x, 'adv'))
df.head()

Unnamed: 0,rating,date,variation,verified_reviews,feedback,noun_count,verb_count,adj_count,adv_count
0,5,31-Jul-18,Charcoal Fabric,Love my Echo!,1,0,0,0,0
1,5,31-Jul-18,Charcoal Fabric,Loved it!,1,0,0,0,0
2,4,31-Jul-18,Walnut Finish,"Sometimes while playing a game, you can answer...",1,0,0,0,0
3,5,31-Jul-18,Charcoal Fabric,I have had a lot of fun with this thing. My 4 ...,1,0,0,0,0
4,5,31-Jul-18,Charcoal Fabric,Music,1,0,0,0,0


In [17]:
# Counting pronouns in the text

df['pron_count'] = df['verified_reviews'].apply(lambda x: pos_check(x, 'pron'))
df.head()

Unnamed: 0,rating,date,variation,verified_reviews,feedback,noun_count,verb_count,adj_count,adv_count,pron_count
0,5,31-Jul-18,Charcoal Fabric,Love my Echo!,1,0,0,0,0,0
1,5,31-Jul-18,Charcoal Fabric,Loved it!,1,0,0,0,0,0
2,4,31-Jul-18,Walnut Finish,"Sometimes while playing a game, you can answer...",1,0,0,0,0,0
3,5,31-Jul-18,Charcoal Fabric,I have had a lot of fun with this thing. My 4 ...,1,0,0,0,0,0
4,5,31-Jul-18,Charcoal Fabric,Music,1,0,0,0,0,0


In [18]:
# Summarizing the dataframe

df[['noun_count', 'verb_count', 'adj_count', 'adv_count', 'pron_count']].describe()

Unnamed: 0,noun_count,verb_count,adj_count,adv_count,pron_count
count,3150.0,3150.0,3150.0,3150.0,3150.0
mean,0.0,0.0,0.0,0.0,0.0
std,0.0,0.0,0.0,0.0,0.0
min,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,0.0,0.0,0.0
max,0.0,0.0,0.0,0.0,0.0
