# Use Selected Words to Build Sentiment Model

In [1]:
import graphlab as gl

# load in data
products = gl.SFrame('amazon_baby.gl')
# setup selected words
selected_words = ['awesome', 'great', 'fantastic', 'amazing', 'love', 'horrible', 'bad', 'terrible', 'awful', 'wow', 'hate']

This non-commercial license of GraphLab Create for academic use is assigned to nikki12345001@gmail.com and will expire on July 31, 2018.


[INFO] graphlab.cython.cy_server: GraphLab Create v2.1 started. Logging: /tmp/graphlab_server_1502867170.log


In [2]:
products['word_count'] = gl.text_analytics.count_words(products['review'])

## Each entry of the ‘word_count’ column is of python dictionary

## Create word count table for the selected words

In [16]:
def word_count(word, dictionary_of_row):
    if word in dictionary_of_row:
        return dictionary_of_row[word]
    else:
        return 0

In [17]:
# Calculate words count by each row for the selected words only
for word in selected_words:
    products[word] = products['word_count'].apply(lambda row: word_count(word, row))
    
products.head()

name,review,rating,word_count,awesome,great,fantastic
Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3.0,"{'and': 5, '6': 1, 'stink': 1, 'because' ...",0,0,0
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0,"{'and': 3, 'love': 1, 'it': 2, 'highly': 1, ...",0,0,0
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0,"{'and': 2, 'quilt': 1, 'it': 1, 'comfortable': ...",0,0,0
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0,"{'ingenious': 1, 'and': 3, 'love': 2, ...",0,0,0
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0,"{'and': 2, 'parents!!': 1, 'all': 2, 'puppet.': ...",0,1,0
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5.0,"{'and': 2, 'this': 2, 'her': 1, 'help': 2, ...",0,1,0
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4.0,"{'shop': 1, 'noble': 1, 'is': 1, 'it': 1, 'as': ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5.0,"{'and': 2, 'all': 1, 'right': 1, 'when': 1, ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5.0,"{'and': 1, 'help': 1, 'give': 1, 'is': 1, ' ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4.0,"{'journal.': 1, 'nanny': 1, 'standarad': 1, ...",0,0,0

amazing,love,horrible,bad,terrible,awful,wow,hate
0,0,0,0,0,0,0,0
0,1,0,0,0,0,0,0
0,0,0,0,0,0,0,0
0,2,0,0,0,0,0,0
0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0


In [57]:
for word in selected_words:
    print word, products[word].sum()

awesome 2002
great 42420
fantastic 873
amazing 1305
love 40277
horrible 659
bad 3197
terrible 673
awful 345
wow 131
hate 1057


## Build a Sentiment Classifier

In [20]:
# Ignore all 3-stars reviews
products = products[products['rating']!=3]
# For positive tag: >=4 stars
products['sentiment'] = products['rating'] >= 4

# Split the training and testing data
train_data, test_data = products.random_split(.8, seed = 0)

In [21]:
selected_words_sentiment_model = gl.logistic_classifier.create(train_data,
                                                target = 'sentiment',
                                                features=selected_words,
                                                validation_set=test_data)

In [22]:
selected_words_sentiment_model['coefficients']

name,index,class,value,stderr
(intercept),,1,1.36728315229,0.00861805467824
awesome,,1,1.05800888878,0.110865296265
great,,1,0.883937894898,0.0217379527921
fantastic,,1,0.891303090304,0.154532343591
amazing,,1,0.892802422508,0.127989503231
love,,1,1.39989834302,0.0287147460124
horrible,,1,-1.99651800559,0.0973584169028
bad,,1,-0.985827369929,0.0433603009142
terrible,,1,-2.09049998487,0.0967241912229
awful,,1,-1.76469955631,0.134679803365


In [23]:
selected_words_sentiment_model['coefficients'].sort('value', ascending=False)

name,index,class,value,stderr
love,,1,1.39989834302,0.0287147460124
(intercept),,1,1.36728315229,0.00861805467824
awesome,,1,1.05800888878,0.110865296265
amazing,,1,0.892802422508,0.127989503231
fantastic,,1,0.891303090304,0.154532343591
great,,1,0.883937894898,0.0217379527921
wow,,1,-0.0541450123333,0.275616449416
bad,,1,-0.985827369929,0.0433603009142
hate,,1,-1.40916406276,0.0771983993506
awful,,1,-1.76469955631,0.134679803365


## Evaluate the Model

In [24]:
selected_words_sentiment_model.evaluate(test_data)

{'accuracy': 0.8431419649291376,
 'auc': 0.6648096413721418,
 'confusion_matrix': Columns:
 	target_label	int
 	predicted_label	int
 	count	int
 
 Rows: 4
 
 Data:
 +--------------+-----------------+-------+
 | target_label | predicted_label | count |
 +--------------+-----------------+-------+
 |      0       |        0        |  234  |
 |      0       |        1        |  5094 |
 |      1       |        1        | 27846 |
 |      1       |        0        |  130  |
 +--------------+-----------------+-------+
 [4 rows x 3 columns],
 'f1_score': 0.914242563530107,
 'log_loss': 0.40547471103659266,
 'precision': 0.8453551912568306,
 'recall': 0.9953531598513011,
 'roc_curve': Columns:
 	threshold	float
 	fpr	float
 	tpr	float
 	p	int
 	n	int
 
 Rows: 100001
 
 Data:
 +-----------+-----+-----+-------+------+
 | threshold | fpr | tpr |   p   |  n   |
 +-----------+-----+-----+-------+------+
 |    0.0    | 1.0 | 1.0 | 27976 | 5328 |
 |   1e-05   | 1.0 | 1.0 | 27976 | 5328 |
 |   2e-05   |

## Compare the results of prediction with model trained by all words

In [31]:
## Use the data created in W3 - Classification Demo
diaper_champ_reviews = gl.SFrame('diaper_champ_reviews.gl')

gl.canvas.set_target('ipynb')
diaper_champ_reviews['rating'].show(view='Categorical')

In [32]:
diaper_champ_reviews[0:1]

name,review,rating,word_count,sentiment
Baby Trend Diaper Champ,Baby Luke can turn a clean diaper to a dirty ...,5.0,"{'all': 1, 'less': 1, ""friend's"": 1, '(which': ...",1

predicted_sentiment
0.999999937267


In [62]:
for word in selected_words:
    diaper_champ_reviews[word] = diaper_champ_reviews['word_count'].apply(lambda row: word_count(word, row))
    
diaper_champ_reviews[0:1]

name,review,rating,word_count,sentiment
Baby Trend Diaper Champ,Baby Luke can turn a clean diaper to a dirty ...,5.0,"{'all': 1, 'less': 1, ""friend's"": 1, '(which': ...",1

predicted_sentiment,awesome,great,fantastic,amazing,love,horrible,bad,terrible,awful,wow,hate
0.999999937267,0,0,0,0,0,0,0,0,0,0,0


In [64]:
selected_words_sentiment_model.predict(diaper_champ_reviews[0:1], output_type='probability')

# The result is 0.7969

dtype: float
Rows: 1
[0.7969408512906713]

In [58]:
diaper_champ_reviews[0]['review']

'Baby Luke can turn a clean diaper to a dirty diaper in 3 seconds flat. The diaper champ turns the smelly diaper into "what diaper smell" in less time than that. I hesitated and wondered what I REALLY needed for the nursery. This is one of the best purchases we made. The champ, the baby bjorn, fluerville diaper bag, and graco pack and play bassinet all vie for the best baby purchase.Great product, easy to use, economical, effective, absolutly fabulous.UpdateI knew that I loved the champ, and useing the diaper genie at a friend\'s house REALLY reinforced that!! There is no comparison, the chanp is easy and smell free, the genie was difficult to use one handed (which is absolutly vital if you have a little one on a changing pad) and there was a deffinite odor eminating from the genieplus we found that the quick tie garbage bags where the ties are integrated into the bag work really well because there isn\'t any added bulk around the sealing edge of the champ.'