# Sentiment Analyzer for Products

In [1]:
import graphlab
import os

# Read data

In [2]:
URL = 'https://d396qusza40orc.cloudfront.net/phoenixassets/amazon_baby.csv'

In [3]:
def get_data(filename='amazon_baby.csv', url=URL, force_download=False):
    
    """Download and cache the amazon data
    
    Parameters
    ----------
    filename: string (optional)
        location to save the data
        
    url: string (optional)
    
    force_download: bool (optional)
        if True, force redownload of data
    
    Returns
    -------
    data: graphlab SFrame. Similer to a pandas DataFrame,
        but with capacity for faster analysis of larger data sets
    
    """   
    
    if force_download or not os.path.exists(filename):
        urlretrieve(url, filename)
        
    sf = graphlab.SFrame('amazon_baby.csv')
    
    return sf

products = get_data()
products.head()

This non-commercial license of GraphLab Create for academic use is assigned to william_gray@alumni.brown.edu and will expire on March 20, 2018.


[INFO] graphlab.cython.cy_server: GraphLab Create v2.1 started. Logging: /tmp/graphlab_server_1491876739.log


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,str,int]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


name,review,rating
Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4


# Build word count vector for each review

In [4]:
products['word_count'] = graphlab.text_analytics.count_words(products['review'])
products.head()

name,review,rating,word_count
Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3,"{'and': 5, '6': 1, 'stink': 1, 'because' ..."
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5,"{'and': 3, 'love': 1, 'it': 2, 'highly': 1, ..."
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5,"{'and': 2, 'quilt': 1, 'it': 1, 'comfortable': ..."
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5,"{'ingenious': 1, 'and': 3, 'love': 2, ..."
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5,"{'and': 2, 'parents!!': 1, 'all': 2, 'puppet.': ..."
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5,"{'and': 2, 'this': 2, 'her': 1, 'help': 2, ..."
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4,"{'shop': 1, 'noble': 1, 'is': 1, 'it': 1, 'as': ..."
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5,"{'and': 2, 'all': 1, 'right': 1, 'when': 1, ..."
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5,"{'and': 1, 'help': 1, 'give': 1, 'is': 1, ' ..."
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4,"{'journal.': 1, 'nanny': 1, 'standarad': 1, ..."


In [5]:
# Set graphs to appear within the notebook
graphlab.canvas.set_target('ipynb')

# Look at products with the most reviews
products['name'].show()

# Take a look at Vulli Sophie, top baby toy according to # of reviews

This is a teether, a toy that babies chew on, in the form of a small giraffe. It's the most gifted baby toy on amazon.

In [6]:
giraffe_reviews = products[products['name']=='Vulli Sophie the Giraffe Teether']
len(giraffe_reviews)

785

In [7]:
giraffe_reviews['rating'].show(view='Categorical')

# Build a sentiment classifier
Take a deeper look at where the most positive and negative reviews came from

In [8]:
products['rating'].show(view='Categorical')

## Add positive or negative sentiment columns based on rating
###  1 = Positive, 0 = Negative

In [9]:
# I consider 3 star ratings neutral, so they will be removed from the dataset
products = products[products['rating'] != 3]

In [10]:
# if rating is 4 or 5, the review is considered positive (1), else negative (0)
products['sentiment'] = products['rating'] >= 4
products.head()

name,review,rating,word_count,sentiment
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5,"{'and': 3, 'love': 1, 'it': 2, 'highly': 1, ...",1
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5,"{'and': 2, 'quilt': 1, 'it': 1, 'comfortable': ...",1
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5,"{'ingenious': 1, 'and': 3, 'love': 2, ...",1
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5,"{'and': 2, 'parents!!': 1, 'all': 2, 'puppet.': ...",1
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5,"{'and': 2, 'this': 2, 'her': 1, 'help': 2, ...",1
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4,"{'shop': 1, 'noble': 1, 'is': 1, 'it': 1, 'as': ...",1
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5,"{'and': 2, 'all': 1, 'right': 1, 'when': 1, ...",1
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5,"{'and': 1, 'help': 1, 'give': 1, 'is': 1, ' ...",1
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4,"{'journal.': 1, 'nanny': 1, 'standarad': 1, ...",1
"Baby Tracker&reg; - Daily Childcare Journal, ...",I love this journal and our nanny uses it ...,4,"{'all': 1, 'forget': 1, 'just': 1, 'food': 1, ...",1


# Training the sentiment classifier

In [11]:
# randomly split data set into training data and test data
train_data, test_data = products.random_split(.8, seed=0)
print len(train_data)
print len(test_data)

133448
33304


In [12]:
sentiment_model = graphlab.logistic_classifier.create(train_data,
                                                      target='sentiment',
                                                      features=['word_count'],
                                                      validation_set=test_data)

# Evaluate the sentiment model

In [13]:
# evaluate the model using an roc curve
sentiment_model.evaluate(test_data, metric='roc_curve')

{'roc_curve': Columns:
 	threshold	float
 	fpr	float
 	tpr	float
 	p	int
 	n	int
 
 Rows: 100001
 
 Data:
 +-----------+----------------+----------------+-------+------+
 | threshold |      fpr       |      tpr       |   p   |  n   |
 +-----------+----------------+----------------+-------+------+
 |    0.0    |      1.0       |      1.0       | 27976 | 5328 |
 |   1e-05   | 0.907845345345 | 0.998856162425 | 27976 | 5328 |
 |   2e-05   | 0.893768768769 | 0.998713182728 | 27976 | 5328 |
 |   3e-05   | 0.883821321321 | 0.99839147841  | 27976 | 5328 |
 |   4e-05   | 0.877627627628 | 0.998319988562 | 27976 | 5328 |
 |   5e-05   | 0.874436936937 | 0.998141263941 | 27976 | 5328 |
 |   6e-05   | 0.870307807808 | 0.998034029168 | 27976 | 5328 |
 |   7e-05   | 0.866366366366 | 0.997998284244 | 27976 | 5328 |
 |   8e-05   | 0.862424924925 | 0.997998284244 | 27976 | 5328 |
 |   9e-05   | 0.858858858859 | 0.997998284244 | 27976 | 5328 |
 +-----------+----------------+----------------+-------+------

In [14]:
sentiment_model.show(view='Evaluation')

In [15]:
giraffe_reviews['predicted_sentiment'] = sentiment_model.predict(giraffe_reviews, output_type='probability')
giraffe_reviews.head()

name,review,rating,word_count,predicted_sentiment
Vulli Sophie the Giraffe Teether ...,He likes chewing on all the parts especially the ...,5,"{'and': 1, 'all': 1, 'because': 1, 'it': 1, ...",0.999530720309
Vulli Sophie the Giraffe Teether ...,My son loves this toy and fits great in the diaper ...,5,"{'and': 1, 'right': 1, 'help': 1, 'just': 1, ...",0.999345752065
Vulli Sophie the Giraffe Teether ...,There really should be a large warning on the ...,1,"{'and': 2, 'all': 1, 'would': 1, 'latex.': 1, ...",0.0131574256321
Vulli Sophie the Giraffe Teether ...,All the moms in my moms' group got Sophie for ...,5,"{'and': 2, 'one!': 1, 'all': 1, 'love': 1, ...",0.995591722087
Vulli Sophie the Giraffe Teether ...,I was a little skeptical on whether Sophie was ...,5,"{'and': 3, 'all': 1, 'months': 1, 'old': 1, ...",0.658983410894
Vulli Sophie the Giraffe Teether ...,I have been reading about Sophie and was going ...,5,"{'and': 6, 'seven': 1, 'already': 1, 'love': 1, ...",0.999997197609
Vulli Sophie the Giraffe Teether ...,My neice loves her sophie and has spent hours ...,5,"{'and': 4, 'drooling,': 1, 'love': 1, ...",0.989417123576
Vulli Sophie the Giraffe Teether ...,What a friendly face! And those mesmerizing ...,5,"{'and': 3, 'chew': 1, 'be': 1, 'is': 1, ...",0.999719314337
Vulli Sophie the Giraffe Teether ...,We got this just for my son to chew on instea ...,5,"{'chew': 2, 'seemed': 1, 'because': 1, 'about.': ...",0.971230951886
Vulli Sophie the Giraffe Teether ...,"My baby seems to like this toy, but I could ...",3,"{'and': 2, 'already': 1, 'some': 1, 'it': 3, ...",0.190351700637


# Sort the reviews based on predicted sentiment and explore

In [16]:
giraffe_reviews = giraffe_reviews.sort('predicted_sentiment', ascending=False)
giraffe_reviews.head()

name,review,rating,word_count,predicted_sentiment
Vulli Sophie the Giraffe Teether ...,"Sophie, oh Sophie, your time has come. My ...",5,"{'giggles': 1, 'all': 1, ""violet's"": 2, 'bring': ...",1.0
Vulli Sophie the Giraffe Teether ...,I'm not sure why Sophie is such a hit with the ...,4,"{'adoring': 1, 'find': 1, 'month': 1, 'bright': 1, ...",0.999999999718
Vulli Sophie the Giraffe Teether ...,I'll be honest...I bought this toy because all the ...,4,"{'all': 2, 'discovered': 1, 'existence.': 1, ...",0.999999999397
Vulli Sophie the Giraffe Teether ...,We got this little giraffe as a gift from a ...,5,"{'all': 2, ""don't"": 1, '(literally).so': 1, ...",0.99999999926
Vulli Sophie the Giraffe Teether ...,As a mother of 16month old twins; I bought ...,5,"{'cute': 1, 'all': 1, 'reviews.': 2, 'just' ...",0.999999998631
Vulli Sophie the Giraffe Teether ...,Sophie the Giraffe is the perfect teething toy. ...,5,"{'just': 2, 'both': 1, 'month': 1, 'ears,': 1, ...",0.999999997272
Vulli Sophie the Giraffe Teether ...,Sophie la giraffe is absolutely the best toy ...,5,"{'and': 5, 'the': 1, 'all': 1, 'that': 2, ...",0.999999996017
Vulli Sophie the Giraffe Teether ...,My 5-mos old son took to this immediately. The ...,5,"{'just': 1, 'shape': 2, 'mutt': 1, '""dog': 1, ...",0.999999995643
Vulli Sophie the Giraffe Teether ...,My nephews and my four kids all had Sophie in ...,5,"{'and': 4, 'chew': 1, 'all': 1, 'perfect;': 1, ...",0.999999990363
Vulli Sophie the Giraffe Teether ...,Never thought I'd see my son French kissing a ...,5,"{'giggles': 1, 'all': 1, 'out,': 1, 'over': 1, ...",0.999999984298


In [17]:
# review with highest predicted sentiment
giraffe_reviews[0]['review']

"Sophie, oh Sophie, your time has come. My granddaughter, Violet is 5 months old and starting to teeth. What joy little Sophie brings to Violet. Sophie is made of a very pliable rubber that is sturdy but not tough. It is quite easy for Violet to twist Sophie into unheard of positions to get Sophie into her mouth. The little nose and hooves fit perfectly into small mouths, and the drooling has purpose. The paint on Sophie is food quality.Sophie was born in 1961 in France. The maker had wondered why there was nothing available for babies and made Sophie from the finest rubber, phthalate-free on St Sophie's Day, thus the name was born. Since that time millions of Sophie's populate the world. She is soft and for babies little hands easy to grasp. Violet especially loves the bumpy head and horns of Sophie. Sophie has a long neck that easy to grasp and twist. She has lovely, sizable spots that attract Violet's attention. Sophie has happy little squeaks that bring squeals of delight from Viol

In [18]:
giraffe_reviews[1]['review']

"I'm not sure why Sophie is such a hit with the little ones, but my 7 month old baby girl is one of her adoring fans.  The rubber is softer and more pleasant to handle, and my daughter has enjoyed chewing on her legs and the nubs on her head even before she started teething.  She also loves the squeak that Sophie makes when you squeeze her.  Not sure what it is but if Sophie is amongst a pile of her other toys, my daughter will more often than not reach for Sophie.  And I have the peace of mind of knowing that only edible and safe paints and materials have been used to make Sophie, as opposed to Bright Starts and other baby toys made in China.  Now that the research is out on phthalates and other toxic substances in baby toys, I think it's more important than ever to find good quality toys that are also safe for our babies to handle and put in their mouths.  Sophie is a must-have for every new mom in my opinion.  Even if your kid is one of the few that can take or leave her, it's worth

In [19]:
# review with lowest predicted sentiment
giraffe_reviews[-1]['review']

"My son (now 2.5) LOVED his Sophie, and I bought one for every baby shower I've gone to. Now, my daughter (6 months) just today nearly choked on it and I will never give it to her again. Had I not been within hearing range it could have been fatal. The strange sound she was making caught my attention and when I went to her and found the front curved leg shoved well down her throat and her face a purply/blue I panicked. I pulled it out and she vomited all over the carpet before screaming her head off. I can't believe how my opinion of this toy has changed from a must-have to a must-not-use. Please don't disregard any of the choking hazard comments, they are not over exaggerated!"

## Reviews with high ratings but low predicted sentiment

In [20]:
giraffe_reviews[(giraffe_reviews['rating']>=4) & (giraffe_reviews['predicted_sentiment'] < 0.5)]

name,review,rating,word_count,predicted_sentiment
Vulli Sophie the Giraffe Teether ...,I tried to offer this to my son several times ...,4,"{'and': 2, 'old': 2, 'suddenly': 1, 'win': 1, ...",0.310325983159
Vulli Sophie the Giraffe Teether ...,"it's good,but not suitable for everyoneCmy ...",4,"{'a': 1, ""doesn't"": 1, 'it,': 1, 'it.': 1, ...",0.269156926826


In [21]:
giraffe_reviews[(giraffe_reviews['rating']>=4) & (giraffe_reviews['predicted_sentiment'] < 0.5)][0]['review']

"I tried to offer this to my son several times when he was a few months old and he was very disinterested. Eventually when he was about 5 months old he suddenly became interested. He likes it but it's not his favorite by any means. These teething rings from Vulli win hands down over sophie-http://www.amazon.com/Vulli-Vanilla-Flavored-Teethe-Colors/dp/B001N45DM2/ref=sr_1_2?s=baby-products&ie=UTF8&qid=1360729347&sr=1-2&keywords=vulli and they are ADORABLE"

## Reviews with low ratings but high predicted sentiments

In [22]:
giraffe_reviews[(giraffe_reviews['rating']<=2) & (giraffe_reviews['predicted_sentiment'] > 0.5)]

name,review,rating,word_count,predicted_sentiment
Vulli Sophie the Giraffe Teether ...,I got one of these as a showe gift that my baby ...,1,"{'all': 2, ""don't"": 2, 'better.': 1, 'one.': 1, ...",0.999010678673
Vulli Sophie the Giraffe Teether ...,Do NOT leave your baby unsupervised with this ...,1,"{'just': 1, 'month': 1, ""won't"": 3, 'still': 1, ...",0.969080092843
Vulli Sophie the Giraffe Teether ...,this is a super cute toy that my baby really ...,1,"{'and': 4, 'cute': 1, 'breaking': 1, ...",0.965496751412
Vulli Sophie the Giraffe Teether ...,I seriously don't know what all the fuss is ...,2,"{'chew': 2, 'all': 1, 'because': 1, 'somewh ...",0.96260258945
Vulli Sophie the Giraffe Teether ...,This is a fake Sophie. I will not buy it from ...,1,"{'and': 1, 'raised': 1, 'on': 1, 'being': 1, ...",0.961692521592
Vulli Sophie the Giraffe Teether ...,I don't understand what all the rave about this ...,2,"{'and': 3, 'all': 1, ""don't"": 1, 'of': 1, ...",0.919133526883
Vulli Sophie the Giraffe Teether ...,Our daughter received this cute little toy ...,1,"{'and': 2, 'the': 3, 'cute': 1, 'upset': 1, ...",0.864668412518
Vulli Sophie the Giraffe Teether ...,"Going by the Amazon reviews, there was too ...",2,"{'and': 1, 'reviews,': 1, 'there': 1, 'old': 1, ...",0.830343819665
Vulli Sophie the Giraffe Teether ...,WoW. That is my one word summary. Originally I ...,1,"{'cute': 1, 'just': 1, '-': 2, 'over': 1, ...",0.795318268238
Vulli Sophie the Giraffe Teether ...,I think this toy is so cute and I have tried so ...,2,"{'price,': 1, 'cute': 1, ""she'll"": 1, 'shot': 1, ...",0.673350125572


In [23]:
giraffe_reviews[(giraffe_reviews['rating']<=2) & (giraffe_reviews['predicted_sentiment'] > 0.5)][0]['review']

"I got one of these as a showe gift that my baby loves. I wanted to buy another and saw it on Amazon for $19. What a deal considering these are normally $25. So I ordered and am not sure if it's the same and if the one from Amazon is authentic.I realize these are hand painted so there will be variation. But the rubber on the new one smells different and feels slightly harder than my original one. There are also some rough spots on the Amazon one whereas there were no rough spots on my original one at all as you could plainly tell it was very high quality. The sqeaker is not the same in the two versions. When you press on it to feel it, the shape of the squeaker is different shape, size, and sound. My original one squeaks better. The one from Amazon doesn't squeak well, and is higher pitched. The serial number font size on the two are different. The original one had a larger font size and the Amazon has a smaller font size. The most notable difference is the feet/hooves. The original on