# Predicting sentiment from product reviews
# Fire up GraphLab Create


In [58]:
import graphlab
# Limit number of worker processes. This preserves system memory, which prevents hosted notebooks from crashing.
graphlab.set_runtime_config('GRAPHLAB_DEFAULT_NUM_PYLAMBDA_WORKERS', 4)

## Read some product review data¶

In [59]:
products = graphlab.SFrame('amazon_baby.gl/')

## Let's explore this data

In [60]:
products.head()

name,review,rating
Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3.0
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5.0
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4.0


# Build the word count vector for each review

In [61]:
products['word_count'] = graphlab.text_analytics.count_words(products['review'])

In [62]:
products.head()

name,review,rating,word_count
Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3.0,"{'and': 5, '6': 1, 'stink': 1, 'because' ..."
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0,"{'and': 3, 'love': 1, 'it': 2, 'highly': 1, ..."
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0,"{'and': 2, 'quilt': 1, 'it': 1, 'comfortable': ..."
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0,"{'ingenious': 1, 'and': 3, 'love': 2, ..."
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0,"{'and': 2, 'parents!!': 1, 'all': 2, 'puppet.': ..."
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5.0,"{'and': 2, 'this': 2, 'her': 1, 'help': 2, ..."
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4.0,"{'shop': 1, 'noble': 1, 'is': 1, 'it': 1, 'as': ..."
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5.0,"{'and': 2, 'all': 1, 'right': 1, 'when': 1, ..."
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5.0,"{'and': 1, 'help': 1, 'give': 1, 'is': 1, ' ..."
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4.0,"{'journal.': 1, 'nanny': 1, 'standarad': 1, ..."


## build a new feature with the counts for each selected word from each product review

In [63]:
selected_words = ['awesome', 'great', 'fantastic', 'amazing', 'love', 'horrible', 'bad', 'terrible', 'awful', 'wow', 'hate']

In [64]:
def select_count(x):
    count = 0
    if 'awesome' in x:
        count = x['awesome']
    else:
        count = 0
    return(count)

products['awesome'] = products['word_count'].apply(select_count)

products.tail()

name,review,rating,word_count,awesome
Airline Seat Belt Extender - The Best ...,"Up until recently I have hated flying, putting on ...",5.0,"{'and': 1, 'have': 1, 'being': 1, 'protrudes': ...",0
Airline Seat Belt Extender - The Best ...,I bought this as a father's day gift for my ...,5.0,"{'and': 4, 'dad': 1, 'because': 1, 'being' ...",0
Squeasy Snacker 6oz Silicone Reusable Food ...,"I love that these can hold purees OR liquids, ...",5.0,"{'and': 5, 'smaller': 1, 'love': 2, 'clip': 1, ...",0
Squeasy Snacker 6oz Silicone Reusable Food ...,"I love this product, it makes my life easier. ...",5.0,"{'and': 6, 'love': 1, 'school': 1, 'just': 1, ...",0
Squeasy Snacker 6oz Silicone Reusable Food ...,This reusable squeeze bottle is the best I ...,5.0,"{'-': 1, 'through': 1, 'go': 1, 'yet': 1, ...",1
Baby Teething Necklace for Mom Pretty Donut ...,Such a great idea! very handy to have and look ...,5.0,"{'and': 1, 'help': 1, 'too,': 1, 'reduce': 1, ...",0
Baby Teething Necklace for Mom Pretty Donut ...,This product rocks! It is a great blend of ...,5.0,"{'accessible': 1, 'and': 5, 'concept': 1, 'is' ...",0
Abstract 2 PK Baby / Toddler Training Cup ...,This item looks great and cool for my kids....I ...,5.0,"{'and': 2, 'great': 2, 'kids....i': 1, 'for' ...",0
"Baby Food Freezer Tray - Bacteria Resistant, BPA ...",I am extremely happy with this product. I have ...,5.0,"{'just': 1, 'bimbi': 2, 'one.': 1, 'bright': 1, ...",0
Best 2 Pack Baby Car Shade for Kids - Window ...,I love this product very mush . I have bought ...,5.0,"{'and': 1, 'love': 1, 'keeps': 1, 'shades': 1, ...",0


In [65]:
def select_count(x):
    count = 0
    if 'great' in x:
        count = x['great']
    else:
        count = 0
    return(count)

In [66]:
products['great'] = products['word_count'].apply(select_count)

In [67]:
products.tail()

name,review,rating,word_count,awesome,great
Airline Seat Belt Extender - The Best ...,"Up until recently I have hated flying, putting on ...",5.0,"{'and': 1, 'have': 1, 'being': 1, 'protrudes': ...",0,0
Airline Seat Belt Extender - The Best ...,I bought this as a father's day gift for my ...,5.0,"{'and': 4, 'dad': 1, 'because': 1, 'being' ...",0,0
Squeasy Snacker 6oz Silicone Reusable Food ...,"I love that these can hold purees OR liquids, ...",5.0,"{'and': 5, 'smaller': 1, 'love': 2, 'clip': 1, ...",0,1
Squeasy Snacker 6oz Silicone Reusable Food ...,"I love this product, it makes my life easier. ...",5.0,"{'and': 6, 'love': 1, 'school': 1, 'just': 1, ...",0,1
Squeasy Snacker 6oz Silicone Reusable Food ...,This reusable squeeze bottle is the best I ...,5.0,"{'-': 1, 'through': 1, 'go': 1, 'yet': 1, ...",1,0
Baby Teething Necklace for Mom Pretty Donut ...,Such a great idea! very handy to have and look ...,5.0,"{'and': 1, 'help': 1, 'too,': 1, 'reduce': 1, ...",0,2
Baby Teething Necklace for Mom Pretty Donut ...,This product rocks! It is a great blend of ...,5.0,"{'accessible': 1, 'and': 5, 'concept': 1, 'is' ...",0,1
Abstract 2 PK Baby / Toddler Training Cup ...,This item looks great and cool for my kids....I ...,5.0,"{'and': 2, 'great': 2, 'kids....i': 1, 'for' ...",0,2
"Baby Food Freezer Tray - Bacteria Resistant, BPA ...",I am extremely happy with this product. I have ...,5.0,"{'just': 1, 'bimbi': 2, 'one.': 1, 'bright': 1, ...",0,0
Best 2 Pack Baby Car Shade for Kids - Window ...,I love this product very mush . I have bought ...,5.0,"{'and': 1, 'love': 1, 'keeps': 1, 'shades': 1, ...",0,0


In [68]:
def select_count(x):
    count = 0
    if 'fantastic' in x:
        count = x['fantas']
    else:
        count = 0
    return(count)

products['awesome'] = products['word_count'].apply(select_count)

products.tail()

name,review,rating,word_count,awesome,great
Airline Seat Belt Extender - The Best ...,"Up until recently I have hated flying, putting on ...",5.0,"{'and': 1, 'have': 1, 'being': 1, 'protrudes': ...",0,0
Airline Seat Belt Extender - The Best ...,I bought this as a father's day gift for my ...,5.0,"{'and': 4, 'dad': 1, 'because': 1, 'being' ...",0,0
Squeasy Snacker 6oz Silicone Reusable Food ...,"I love that these can hold purees OR liquids, ...",5.0,"{'and': 5, 'smaller': 1, 'love': 2, 'clip': 1, ...",0,1
Squeasy Snacker 6oz Silicone Reusable Food ...,"I love this product, it makes my life easier. ...",5.0,"{'and': 6, 'love': 1, 'school': 1, 'just': 1, ...",0,1
Squeasy Snacker 6oz Silicone Reusable Food ...,This reusable squeeze bottle is the best I ...,5.0,"{'-': 1, 'through': 1, 'go': 1, 'yet': 1, ...",1,0
Baby Teething Necklace for Mom Pretty Donut ...,Such a great idea! very handy to have and look ...,5.0,"{'and': 1, 'help': 1, 'too,': 1, 'reduce': 1, ...",0,2
Baby Teething Necklace for Mom Pretty Donut ...,This product rocks! It is a great blend of ...,5.0,"{'accessible': 1, 'and': 5, 'concept': 1, 'is' ...",0,1
Abstract 2 PK Baby / Toddler Training Cup ...,This item looks great and cool for my kids....I ...,5.0,"{'and': 2, 'great': 2, 'kids....i': 1, 'for' ...",0,2
"Baby Food Freezer Tray - Bacteria Resistant, BPA ...",I am extremely happy with this product. I have ...,5.0,"{'just': 1, 'bimbi': 2, 'one.': 1, 'bright': 1, ...",0,0
Best 2 Pack Baby Car Shade for Kids - Window ...,I love this product very mush . I have bought ...,5.0,"{'and': 1, 'love': 1, 'keeps': 1, 'shades': 1, ...",0,0

fantastic
0
0
0
0
0
0
0
0
0
0


In [70]:
def select_count(x):
    count = 0
    if 'amazing' in x:
        count = x['amazing']
    else:
        count = 0
    return(count)

products['amazing'] = products['word_count'].apply(select_count)

products.head()

name,review,rating,word_count,awesome,great,fantastic
Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3.0,"{'and': 5, '6': 1, 'stink': 1, 'because' ...",0,0,0
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0,"{'and': 3, 'love': 1, 'it': 2, 'highly': 1, ...",0,0,0
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0,"{'and': 2, 'quilt': 1, 'it': 1, 'comfortable': ...",0,0,0
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0,"{'ingenious': 1, 'and': 3, 'love': 2, ...",0,0,0
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0,"{'and': 2, 'parents!!': 1, 'all': 2, 'puppet.': ...",0,1,0
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5.0,"{'and': 2, 'this': 2, 'her': 1, 'help': 2, ...",0,1,0
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4.0,"{'shop': 1, 'noble': 1, 'is': 1, 'it': 1, 'as': ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5.0,"{'and': 2, 'all': 1, 'right': 1, 'when': 1, ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5.0,"{'and': 1, 'help': 1, 'give': 1, 'is': 1, ' ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4.0,"{'journal.': 1, 'nanny': 1, 'standarad': 1, ...",0,0,0

amazing
0
0
0
0
0
0
0
0
0
0


In [71]:
def select_count(x):
    count = 0
    if 'love' in x:
        count = x['love']
    else:
        count = 0
    return(count)

products['love'] = products['word_count'].apply(select_count)

products.head()

name,review,rating,word_count,awesome,great,fantastic
Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3.0,"{'and': 5, '6': 1, 'stink': 1, 'because' ...",0,0,0
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0,"{'and': 3, 'love': 1, 'it': 2, 'highly': 1, ...",0,0,0
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0,"{'and': 2, 'quilt': 1, 'it': 1, 'comfortable': ...",0,0,0
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0,"{'ingenious': 1, 'and': 3, 'love': 2, ...",0,0,0
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0,"{'and': 2, 'parents!!': 1, 'all': 2, 'puppet.': ...",0,1,0
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5.0,"{'and': 2, 'this': 2, 'her': 1, 'help': 2, ...",0,1,0
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4.0,"{'shop': 1, 'noble': 1, 'is': 1, 'it': 1, 'as': ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5.0,"{'and': 2, 'all': 1, 'right': 1, 'when': 1, ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5.0,"{'and': 1, 'help': 1, 'give': 1, 'is': 1, ' ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4.0,"{'journal.': 1, 'nanny': 1, 'standarad': 1, ...",0,0,0

amazing,love
0,0
0,1
0,0
0,2
0,0
0,0
0,0
0,0
0,0
0,0


In [72]:
def select_count(x):
    count = 0
    if 'horrible' in x:
        count = x['horrible']
    else:
        count = 0
    return(count)

products['horrible'] = products['word_count'].apply(select_count)

products.head()

name,review,rating,word_count,awesome,great,fantastic
Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3.0,"{'and': 5, '6': 1, 'stink': 1, 'because' ...",0,0,0
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0,"{'and': 3, 'love': 1, 'it': 2, 'highly': 1, ...",0,0,0
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0,"{'and': 2, 'quilt': 1, 'it': 1, 'comfortable': ...",0,0,0
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0,"{'ingenious': 1, 'and': 3, 'love': 2, ...",0,0,0
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0,"{'and': 2, 'parents!!': 1, 'all': 2, 'puppet.': ...",0,1,0
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5.0,"{'and': 2, 'this': 2, 'her': 1, 'help': 2, ...",0,1,0
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4.0,"{'shop': 1, 'noble': 1, 'is': 1, 'it': 1, 'as': ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5.0,"{'and': 2, 'all': 1, 'right': 1, 'when': 1, ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5.0,"{'and': 1, 'help': 1, 'give': 1, 'is': 1, ' ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4.0,"{'journal.': 1, 'nanny': 1, 'standarad': 1, ...",0,0,0

amazing,love,horrible
0,0,0
0,1,0
0,0,0
0,2,0
0,0,0
0,0,0
0,0,0
0,0,0
0,0,0
0,0,0


In [73]:
def select_count(x):
    count = 0
    if 'bad' in x:
        count = x['bad']
    else:
        count = 0
    return(count)

products['bad'] = products['word_count'].apply(select_count)

products.head()

name,review,rating,word_count,awesome,great,fantastic
Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3.0,"{'and': 5, '6': 1, 'stink': 1, 'because' ...",0,0,0
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0,"{'and': 3, 'love': 1, 'it': 2, 'highly': 1, ...",0,0,0
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0,"{'and': 2, 'quilt': 1, 'it': 1, 'comfortable': ...",0,0,0
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0,"{'ingenious': 1, 'and': 3, 'love': 2, ...",0,0,0
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0,"{'and': 2, 'parents!!': 1, 'all': 2, 'puppet.': ...",0,1,0
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5.0,"{'and': 2, 'this': 2, 'her': 1, 'help': 2, ...",0,1,0
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4.0,"{'shop': 1, 'noble': 1, 'is': 1, 'it': 1, 'as': ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5.0,"{'and': 2, 'all': 1, 'right': 1, 'when': 1, ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5.0,"{'and': 1, 'help': 1, 'give': 1, 'is': 1, ' ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4.0,"{'journal.': 1, 'nanny': 1, 'standarad': 1, ...",0,0,0

amazing,love,horrible,bad
0,0,0,0
0,1,0,0
0,0,0,0
0,2,0,0
0,0,0,0
0,0,0,0
0,0,0,0
0,0,0,0
0,0,0,0
0,0,0,0


In [74]:
def select_count(x):
    count = 0
    if 'terrible' in x:
        count = x['terrible']
    else:
        count = 0
    return(count)

products['terrible'] = products['word_count'].apply(select_count)

products.head()

name,review,rating,word_count,awesome,great,fantastic
Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3.0,"{'and': 5, '6': 1, 'stink': 1, 'because' ...",0,0,0
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0,"{'and': 3, 'love': 1, 'it': 2, 'highly': 1, ...",0,0,0
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0,"{'and': 2, 'quilt': 1, 'it': 1, 'comfortable': ...",0,0,0
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0,"{'ingenious': 1, 'and': 3, 'love': 2, ...",0,0,0
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0,"{'and': 2, 'parents!!': 1, 'all': 2, 'puppet.': ...",0,1,0
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5.0,"{'and': 2, 'this': 2, 'her': 1, 'help': 2, ...",0,1,0
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4.0,"{'shop': 1, 'noble': 1, 'is': 1, 'it': 1, 'as': ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5.0,"{'and': 2, 'all': 1, 'right': 1, 'when': 1, ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5.0,"{'and': 1, 'help': 1, 'give': 1, 'is': 1, ' ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4.0,"{'journal.': 1, 'nanny': 1, 'standarad': 1, ...",0,0,0

amazing,love,horrible,bad,terrible
0,0,0,0,0
0,1,0,0,0
0,0,0,0,0
0,2,0,0,0
0,0,0,0,0
0,0,0,0,0
0,0,0,0,0
0,0,0,0,0
0,0,0,0,0
0,0,0,0,0


In [75]:
def select_count(x):
    count = 0
    if 'awful' in x:
        count = x['awful']
    else:
        count = 0
    return(count)

products['awful'] = products['word_count'].apply(select_count)

products.head()

name,review,rating,word_count,awesome,great,fantastic
Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3.0,"{'and': 5, '6': 1, 'stink': 1, 'because' ...",0,0,0
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0,"{'and': 3, 'love': 1, 'it': 2, 'highly': 1, ...",0,0,0
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0,"{'and': 2, 'quilt': 1, 'it': 1, 'comfortable': ...",0,0,0
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0,"{'ingenious': 1, 'and': 3, 'love': 2, ...",0,0,0
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0,"{'and': 2, 'parents!!': 1, 'all': 2, 'puppet.': ...",0,1,0
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5.0,"{'and': 2, 'this': 2, 'her': 1, 'help': 2, ...",0,1,0
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4.0,"{'shop': 1, 'noble': 1, 'is': 1, 'it': 1, 'as': ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5.0,"{'and': 2, 'all': 1, 'right': 1, 'when': 1, ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5.0,"{'and': 1, 'help': 1, 'give': 1, 'is': 1, ' ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4.0,"{'journal.': 1, 'nanny': 1, 'standarad': 1, ...",0,0,0

amazing,love,horrible,bad,terrible,awful
0,0,0,0,0,0
0,1,0,0,0,0
0,0,0,0,0,0
0,2,0,0,0,0
0,0,0,0,0,0
0,0,0,0,0,0
0,0,0,0,0,0
0,0,0,0,0,0
0,0,0,0,0,0
0,0,0,0,0,0


In [76]:
def select_count(x):
    count = 0
    if 'wow' in x:
        count = x['wow']
    else:
        count = 0
    return(count)

products['wow'] = products['word_count'].apply(select_count)

products.head()

name,review,rating,word_count,awesome,great,fantastic
Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3.0,"{'and': 5, '6': 1, 'stink': 1, 'because' ...",0,0,0
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0,"{'and': 3, 'love': 1, 'it': 2, 'highly': 1, ...",0,0,0
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0,"{'and': 2, 'quilt': 1, 'it': 1, 'comfortable': ...",0,0,0
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0,"{'ingenious': 1, 'and': 3, 'love': 2, ...",0,0,0
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0,"{'and': 2, 'parents!!': 1, 'all': 2, 'puppet.': ...",0,1,0
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5.0,"{'and': 2, 'this': 2, 'her': 1, 'help': 2, ...",0,1,0
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4.0,"{'shop': 1, 'noble': 1, 'is': 1, 'it': 1, 'as': ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5.0,"{'and': 2, 'all': 1, 'right': 1, 'when': 1, ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5.0,"{'and': 1, 'help': 1, 'give': 1, 'is': 1, ' ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4.0,"{'journal.': 1, 'nanny': 1, 'standarad': 1, ...",0,0,0

amazing,love,horrible,bad,terrible,awful,wow
0,0,0,0,0,0,0
0,1,0,0,0,0,0
0,0,0,0,0,0,0
0,2,0,0,0,0,0
0,0,0,0,0,0,0
0,0,0,0,0,0,0
0,0,0,0,0,0,0
0,0,0,0,0,0,0
0,0,0,0,0,0,0
0,0,0,0,0,0,0


In [86]:
def select_count(x):
    count = 0
    if 'hate' in x:
        count = x['hate']
    else:
        count = 0
    return(count)

products['hate'] = products['word_count'].apply(select_count)

products.head()

products.num_rows

<bound method SFrame.num_rows of Columns:
	name	str
	review	str
	rating	float
	word_count	dict
	awesome	int
	great	int
	fantastic	int
	amazing	int
	love	int
	horrible	int
	bad	int
	terrible	int
	awful	int
	wow	int
	hate	int

Rows: 183531

Data:
+-------------------------------+-------------------------------+--------+
|              name             |             review            | rating |
+-------------------------------+-------------------------------+--------+
|    Planetwise Flannel Wipes   | These flannel wipes are OK... |  3.0   |
|     Planetwise Wipe Pouch     | it came early and was not ... |  5.0   |
| Annas Dream Full Quilt wit... | Very soft and comfortable ... |  5.0   |
| Stop Pacifier Sucking with... | This is a product well wor... |  5.0   |
| Stop Pacifier Sucking with... | All of my kids have cried ... |  5.0   |
| Stop Pacifier Sucking with... | When the Binky Fairy came ... |  5.0   |
| A Tale of Baby's Days with... | Lovely book, it's bound ti... |  4.0   |
| Bab

## most used and least used words - assignment

In [80]:
products['awesome'].sum()

2090

In [81]:
products['great'].sum()

45206

In [87]:
products['fantastic'].sum()


932

In [88]:
products['amazing'].sum()


1363

In [89]:
products['love'].sum()


42065

In [90]:
products['horrible'].sum()


734

In [91]:
products['bad'].sum()


3724

In [92]:
products['terrible'].sum()


748

In [93]:
products['awful'].sum()


383

In [94]:
products['wow'].sum()


144

In [95]:
products['hate'].sum()

1220

## Define what's a positive and a negative sentiment
We will ignore all reviews with rating = 3, since they tend to have a neutral sentiment. Reviews with a rating of 4 or higher will be considered positive, while the ones with rating of 2 or lower will have a negative sentiment.

In [100]:
# ignore all 3* reviews
products = products[products['rating'] != 3]

In [101]:
# positive sentiment = 4* or 5* reviews
products['sentiment'] = products['rating'] >=4

products.(head)

name,review,rating,word_count,awesome,great,fantastic
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0,"{'and': 3, 'love': 1, 'it': 2, 'highly': 1, ...",0,0,0
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0,"{'and': 2, 'quilt': 1, 'it': 1, 'comfortable': ...",0,0,0
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0,"{'ingenious': 1, 'and': 3, 'love': 2, ...",0,0,0
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0,"{'and': 2, 'parents!!': 1, 'all': 2, 'puppet.': ...",0,1,0
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5.0,"{'and': 2, 'this': 2, 'her': 1, 'help': 2, ...",0,1,0
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4.0,"{'shop': 1, 'noble': 1, 'is': 1, 'it': 1, 'as': ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5.0,"{'and': 2, 'all': 1, 'right': 1, 'when': 1, ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5.0,"{'and': 1, 'help': 1, 'give': 1, 'is': 1, ' ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4.0,"{'journal.': 1, 'nanny': 1, 'standarad': 1, ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",I love this journal and our nanny uses it ...,4.0,"{'all': 1, 'forget': 1, 'just': 1, 'food': 1, ...",0,0,0

amazing,love,horrible,bad,terrible,awful,wow,hate,sentiment
0,1,0,0,0,0,0,0,1
0,0,0,0,0,0,0,0,1
0,2,0,0,0,0,0,0,1
0,0,0,0,0,0,0,0,1
0,0,0,0,0,0,0,0,1
0,0,0,0,0,0,0,0,1
0,0,0,0,0,0,0,0,1
0,0,0,0,0,0,0,0,1
0,0,0,0,0,0,0,0,1
0,2,0,0,0,0,0,0,1


## train the sentiment classifier

In [102]:
train_data,test_data = products.random_split(.8, seed=0)

In [103]:
selected_words_model = graphlab.logistic_classifier.create(train_data,
                                                     target='sentiment',
                                                     features=selected_words,
                                                     validation_set=test_data)

##  examine the weights the learned classifier assigned to each of the 11 words
column called ‘value’ contains the weight learned for each feature

In [108]:
coeff = selected_words_model['coefficients']

## sorting the values (assignment)

In [130]:
coeff = coeff.sort('value')
coeff.print_rows(num_rows=12) # print more rows

+-------------+-------+-------+------------------+------------------+
|     name    | index | class |      value       |      stderr      |
+-------------+-------+-------+------------------+------------------+
|   terrible  |  None |   1   |  -2.09049998487  | 0.0967241912229  |
|   horrible  |  None |   1   |  -1.99651800559  | 0.0973584169028  |
|    awful    |  None |   1   |  -1.76469955631  |  0.134679803365  |
|     hate    |  None |   1   |  -1.40916406276  | 0.0771983993506  |
|     bad     |  None |   1   | -0.985827369929  | 0.0433603009142  |
|     wow     |  None |   1   | -0.0541450123333 |  0.275616449416  |
|    great    |  None |   1   |  0.883937894898  | 0.0217379527921  |
|  fantastic  |  None |   1   |  0.891303090304  |  0.154532343591  |
|   amazing   |  None |   1   |  0.892802422508  |  0.127989503231  |
|   awesome   |  None |   1   |  1.05800888878   |  0.110865296265  |
| (intercept) |  None |   1   |  1.36728315229   | 0.00861805467824 |
|     love    |  Non

##  Comparing the accuracy of different sentiment analysis model

In [117]:
selected_words_model.evaluate(test_data)

{'accuracy': 0.8431419649291376,
 'auc': 0.6648096413721418,
 'confusion_matrix': Columns:
 	target_label	int
 	predicted_label	int
 	count	int
 
 Rows: 4
 
 Data:
 +--------------+-----------------+-------+
 | target_label | predicted_label | count |
 +--------------+-----------------+-------+
 |      0       |        0        |  234  |
 |      0       |        1        |  5094 |
 |      1       |        1        | 27846 |
 |      1       |        0        |  130  |
 +--------------+-----------------+-------+
 [4 rows x 3 columns],
 'f1_score': 0.914242563530107,
 'log_loss': 0.4054747110366022,
 'precision': 0.8453551912568306,
 'recall': 0.9953531598513011,
 'roc_curve': Columns:
 	threshold	float
 	fpr	float
 	tpr	float
 	p	int
 	n	int
 
 Rows: 100001
 
 Data:
 +-----------+-----+-----+-------+------+
 | threshold | fpr | tpr |   p   |  n   |
 +-----------+-----+-----+-------+------+
 |    0.0    | 1.0 | 1.0 | 27976 | 5328 |
 |   1e-05   | 1.0 | 1.0 | 27976 | 5328 |
 |   2e-05   | 

##  Interpreting the difference in performance between the models

In [119]:
diaper_champ_reviews = products[products['name'] == 'Baby Trend Diaper Champ']
diaper_champ_reviews.head()

name,review,rating,word_count,awesome,great,fantastic
Baby Trend Diaper Champ,Ok - newsflash. Diapers are just smelly. We've ...,4.0,"{'son': 1, 'just': 2, 'less': 1, '-': 3, ...",0,0,0
Baby Trend Diaper Champ,"My husband and I selected the Diaper ""Champ"" ma ...",1.0,"{'material)': 1, 'bags,': 1, 'less': 1, 'when': 3, ...",0,0,0
Baby Trend Diaper Champ,Excellent diaper disposal unit. I used it in ...,5.0,"{'control': 1, 'am': 1, 'it': 1, 'used': 1, ' ...",0,0,0
Baby Trend Diaper Champ,We love our diaper champ. It is very easy to use ...,5.0,"{'and': 3, 'over.': 1, 'all': 1, 'bags.': 1, ...",0,0,0
Baby Trend Diaper Champ,Two girlfriends and two family members put me ...,5.0,"{'just': 1, '-': 3, 'both': 1, 'results': 1, ...",0,0,0
Baby Trend Diaper Champ,I waited to review this until I saw how it ...,4.0,"{'lysol': 1, 'all': 1, 'mom.': 1, 'busy': 1, ...",0,0,0
Baby Trend Diaper Champ,I have had a diaper genie for almost 4 years since ...,1.0,"{'all': 1, 'bags.': 1, 'just': 1, ""don't"": 2, ...",0,0,0
Baby Trend Diaper Champ,I originally put this item on my baby registry ...,5.0,"{'lysol': 1, 'all': 2, 'bags.': 1, 'feedback': ...",0,0,0
Baby Trend Diaper Champ,I am so glad I got the Diaper Champ instead of ...,5.0,"{'and': 2, 'all': 1, 'just': 1, 'is': 2, ' ...",0,0,0
Baby Trend Diaper Champ,We had 2 diaper Genie's both given to us as a ...,4.0,"{'hand.': 1, 'both': 1, '(required': 1, 'befo ...",0,0,0

amazing,love,horrible,bad,terrible,awful,wow,hate,sentiment
0,0,0,0,0,0,0,0,1
0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,1
0,1,0,0,0,0,0,0,1
0,0,1,0,0,0,0,0,1
0,0,0,1,0,0,0,0,1
0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,1
0,0,0,0,0,0,0,0,1
0,2,0,0,0,0,0,0,1


# assignment

In [120]:
diaper_champ_reviews['predicted_sentiment'] = selected_words_model.predict(diaper_champ_reviews, output_type='probability')

In [121]:
diaper_champ_reviews= diaper_champ_reviews.sort('predicted_sentiment', ascending=False)
#OR selected_words_model.predict(diaper_champ_reviews[0:1], output_type='probability')

In [122]:
diaper_champ_reviews.head()

name,review,rating,word_count,awesome,great,fantastic
Baby Trend Diaper Champ,I LOVE LOVE LOVE this product! It is SO much ...,4.0,"{'rating': 1, 'contacted': 1, 'over': ...",0,1,0
Baby Trend Diaper Champ,I received my Diaper Champ at my baby shower ...,5.0,"{'bags.': 1, ""don't"": 1, 'son.': 1, 'of,': 1, ...",0,0,0
Baby Trend Diaper Champ,"Love it, love it, love it! This lives up to ...",5.0,"{'instead': 1, 'all': 1, 'already': 1, 'love': 3, ...",0,0,0
Baby Trend Diaper Champ,Works great - no smells. LOVE that it uses reg ...,5.0,"{'and': 2, 'bags.': 1, 'garbage': 1, 'wastef ...",0,2,0
Baby Trend Diaper Champ,I love this diaper pale and wouldn't dream of ...,5.0,"{'and': 3, 'love': 1, 'use.': 1, 'is': 2, ' ...",0,2,0
Baby Trend Diaper Champ,I've worked with kids more than half my life. ...,5.0,"{'and': 4, 'genies': 1, 'all': 1, 'because': 1, ...",0,0,0
Baby Trend Diaper Champ,I love this diaper pail. It keeps the diapers ...,4.0,"{'and': 1, 'old': 1, 'extra': 1, 'is': 1, ...",0,0,0
Baby Trend Diaper Champ,"This is absolutely, by far, the best diaper ...",5.0,"{'just': 3, 'money': 1, 'still': 3, 'fine': 1, ...",0,0,0
Baby Trend Diaper Champ,Love the Diaper Champ. I had planned to get the ...,4.0,"{'reviews,': 1, 'infant': 1, 'bags.': 1, 'just' ...",0,0,0
Baby Trend Diaper Champ,We had 2 diaper Genie's both given to us as a ...,4.0,"{'hand.': 1, 'both': 1, '(required': 1, 'befo ...",0,0,0

amazing,love,horrible,bad,terrible,awful,wow,hate,sentiment,predicted_sentiment
0,3,0,0,0,0,0,0,1,0.998423414594
0,3,0,0,0,0,0,0,1,0.996192539732
0,3,0,0,0,0,0,0,1,0.996192539732
0,1,0,0,0,0,0,0,1,0.989387539605
0,1,0,0,0,0,0,0,1,0.989387539605
0,2,0,0,0,0,0,0,1,0.984739056527
0,2,0,0,0,0,0,0,1,0.984739056527
0,2,0,0,0,0,0,0,1,0.984739056527
0,2,0,0,0,0,0,0,1,0.984739056527
0,2,0,0,0,0,0,0,1,0.984739056527


In [125]:
diaper_champ_reviews[diaper_champ_reviews['review'] == 'Baby Luke can turn a clean diaper to a dirty diaper in 3 seconds flat. The diaper champ turns the smelly diaper into "what diaper smell" in less time than that. I hesitated and wondered what I REALLY needed for the nursery. This is one of the best purchases we made. The champ, the baby bjorn, fluerville diaper bag, and graco pack and play bassinet all vie for the best baby purchase.Great product, easy to use, economical, effective, absolutly fabulous.UpdateI knew that I loved the champ, and useing the diaper genie at a friend\'s house REALLY reinforced that!! There is no comparison, the chanp is easy and smell free, the genie was difficult to use one handed (which is absolutly vital if you have a little one on a changing pad) and there was a deffinite odor eminating from the genieplus we found that the quick tie garbage bags where the ties are integrated into the bag work really well because there isn\'t any added bulk around the sealing edge of the champ.']

name,review,rating,word_count,awesome,great,fantastic
Baby Trend Diaper Champ,Baby Luke can turn a clean diaper to a dirty ...,5.0,"{'all': 1, 'less': 1, ""friend's"": 1, '(which': ...",0,0,0

amazing,love,horrible,bad,terrible,awful,wow,hate,sentiment,predicted_sentiment
0,0,0,0,0,0,0,0,1,0.796940851291


## majority classifier based on proportion of the positives
if half are positives the predicted should have atleast more than 50% accuracy

Hint: the majority class classifier simply predicts that every data point is from the most common class. This baseline is something we definitely want to beat with models we learn from data.