## Fire up pandas

In [1]:
import pandas as pd

## Read some product review data

In [13]:
products = pd.read_csv('amazon_baby.csv')

In [14]:
len(products)

183531

In [4]:
products.head()

Unnamed: 0,name,review,rating
0,Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3
1,Planetwise Wipe Pouch,it came early and was not disappointed. i love...,5
2,Annas Dream Full Quilt with 2 Shams,Very soft and comfortable and warmer than it l...,5
3,Stop Pacifier Sucking without tears with Thumb...,This is a product well worth the purchase. I ...,5
4,Stop Pacifier Sucking without tears with Thumb...,All of my kids have cried non-stop when I trie...,5


## Build the word count vector for each review

In [5]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()
word_count = vectorizer.fit_transform(products['review'].values.astype('U'))

In [6]:
print word_count.shape

(183531, 68032)


## Build a sentiment classifier

## Define what's a positive and a negative sentiment

## positive sentiment = 4 or 5 * rating

In [15]:
products['sentiment'] = products['rating'] >= 4

In [16]:
products.head()

Unnamed: 0,name,review,rating,sentiment
0,Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3,False
1,Planetwise Wipe Pouch,it came early and was not disappointed. i love...,5,True
2,Annas Dream Full Quilt with 2 Shams,Very soft and comfortable and warmer than it l...,5,True
3,Stop Pacifier Sucking without tears with Thumb...,This is a product well worth the purchase. I ...,5,True
4,Stop Pacifier Sucking without tears with Thumb...,All of my kids have cried non-stop when I trie...,5,True


## Let's train  the sentiment classifier

In [17]:
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB().fit(word_count, products['sentiment'])

## Analyze built model

In [18]:
clf.coef_.shape

(1, 68032)

In [19]:
clf.coef_

array([[-10.33142725, -12.17725394, -15.47309081, ..., -15.47309081,
        -15.47309081, -16.16623799]])

In [20]:
vectorizer.get_feature_names()

[u'00',
 u'000',
 u'0001',
 u'000ft',
 u'000importer',
 u'000sqft',
 u'001',
 u'001cm',
 u'00am',
 u'00amcreepy',
 u'00cons',
 u'00dollars',
 u'00etwhile',
 u'00not',
 u'00pm',
 u'01',
 u'01262',
 u'016sc01',
 u'01992',
 u'01p',
 u'01wrgftjdd1bxmtbsg76',
 u'02',
 u'02000z',
 u'02060',
 u'02072',
 u'02090',
 u'020902nd',
 u'0209a',
 u'02100',
 u'02100a',
 u'0210a',
 u'02180',
 u'02220',
 u'02640a',
 u'02644',
 u'02700',
 u'02720',
 u'03',
 u'030',
 u'0312258',
 u'04',
 u'0409',
 u'0453',
 u'046060us',
 u'05',
 u'05oz',
 u'06',
 u'0635',
 u'07',
 u'0752sjthe',
 u'07pm',
 u'07the',
 u'08',
 u'08280',
 u'09',
 u'093',
 u'093010c',
 u'09she',
 u'09this',
 u'0bviously',
 u'0fast',
 u'0ghz',
 u'0ld',
 u'0m',
 u'0mm',
 u'0ne',
 u'0px',
 u'0r',
 u'0s',
 u'0ver',
 u'0z',
 u'10',
 u'100',
 u'1000',
 u'10000',
 u'100000',
 u'1000ccfor',
 u'1000ma',
 u'1000x',
 u'10010',
 u'10012telephone',
 u'10084',
 u'100ct',
 u'100degree',
 u'100f',
 u'100feet',
 u'100ft',
 u'100lb',
 u'100lbs',
 u'100lbswill',

## Building a pipeline

In [21]:
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfTransformer
text_clf = Pipeline([('vect', CountVectorizer()),
                     ('tfidf', TfidfTransformer()),
                     ('clf', MultinomialNB()), ])

## Train the model using the pipeline

In [23]:
text_clf.fit(products.review.values.astype('U'), products.sentiment)

Pipeline(memory=None,
     steps=[('vect', CountVectorizer(analyzer=u'word', binary=False, decode_error=u'strict',
        dtype=<type 'numpy.int64'>, encoding=u'utf-8', input=u'content',
        lowercase=True, max_df=1.0, max_features=None, min_df=1,
        ngram_range=(1, 1), preprocessor=None, stop_words=None,
        st...False,
         use_idf=True)), ('clf', MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True))])

## predicting probabilities

In [26]:
products['prediction'] = text_clf.predict_proba(products.review.values.astype('U'))[:, [True, False]]

## Examining the reviews for most sold product: 'Vulli Sophie the Giraffe Teether'

In [27]:
giraffe_reviews = products[products['name'] == 'Vulli Sophie the Giraffe Teether']

In [28]:
len(giraffe_reviews)

785

In [29]:
giraffe_reviews.head()

Unnamed: 0,name,review,rating,sentiment,prediction
34313,Vulli Sophie the Giraffe Teether,He likes chewing on all the parts especially t...,5,True,0.03397
34314,Vulli Sophie the Giraffe Teether,My son loves this toy and fits great in the di...,5,True,0.025724
34315,Vulli Sophie the Giraffe Teether,There really should be a large warning on the ...,1,False,0.429675
34316,Vulli Sophie the Giraffe Teether,All the moms in my moms' group got Sophie for ...,5,True,0.07901
34317,Vulli Sophie the Giraffe Teether,I was a little skeptical on whether Sophie was...,5,True,0.046075


## Sort the reviews based on the predicted sentiment and explore

In [40]:
giraffe_reviews = giraffe_reviews.sort_values('prediction')

In [41]:
giraffe_reviews.head()

Unnamed: 0,name,review,rating,sentiment,prediction
34529,Vulli Sophie the Giraffe Teether,"I hesitated to buy this because it was such a high price for a simple toy, but after hearing so many great things, I decided to order for my little guy. He LOVES it! It is one of his favorite toys, and was worth every penny! I will definitely keep it in mind for friends family members who have babies. It would make a great gift.",5,True,0.005882
34760,Vulli Sophie the Giraffe Teether,"We love Sophie!! Perfect toy/teether for a fussy baby! Our daughter loves holding Sophie, worth every penny!!! Highly recommend for all babies!",5,True,0.006824
35010,Vulli Sophie the Giraffe Teether,Bought this for my 4 month old nephew who was teething. My grandaughter also has one and loves it. Just the right size and feel. Has quickly become a favorite. I think they would highly recommend Sophie the giraffe!,5,True,0.008304
35002,Vulli Sophie the Giraffe Teether,My daughter instantly loved Sophie when I handed it to her. This is perfect for her gums and her tooth that is coming in. I highly recommend this to every mom and dad. My daughter is a lot happier!!,5,True,0.009808
34561,Vulli Sophie the Giraffe Teether,This was recommended to me by a friend. I bought it when my daughter was 3 months old and she loves it! she chews on it wherever we go and it is always with us! she smiles when we squeak it and she can manipulate it very well! we are actually buying a 2nd one to have as a backup in case we lose the first. We highly recommend this to all parents.,5,True,0.010162


In [39]:
pd.options.display.max_colwidth = 1000
giraffe_reviews.iloc[0].review

"This children's toy is nostalgic and very cute. However, there is a distinct rubber smell and a very odd taste, yes I tried it, that my baby did not enjoy. Also, if it is soiled it is extremely difficult to clean as the rubber is a kind of porus material and does not clean well. The final thing is the squeaking device inside which stopped working after the first couple of days. I returned this item feeling I had overpaid for a toy that was defective and did not meet my expectations. Please do not be swayed by the cute packaging and hype surounding it as I was. One more thing, I was given a full refund from Amazon without any problem."

## Most positive reviews

In [42]:
pd.options.display.max_colwidth = 1000
giraffe_reviews.iloc[0].review

'I hesitated to buy this because it was such a high price for a simple toy, but after hearing so many great things, I decided to order for my little guy.  He LOVES it!  It is one of his favorite toys, and was worth every penny!  I will definitely keep it in mind for friends family members who have babies.  It would make a great gift.'

In [43]:
giraffe_reviews.iloc[1].review

'We love Sophie!! Perfect toy/teether for a fussy baby! Our daughter loves holding Sophie, worth every penny!!! Highly recommend for all babies!'

## Show most negative reviews

In [44]:
giraffe_reviews.iloc[-1].review

"This children's toy is nostalgic and very cute. However, there is a distinct rubber smell and a very odd taste, yes I tried it, that my baby did not enjoy. Also, if it is soiled it is extremely difficult to clean as the rubber is a kind of porus material and does not clean well. The final thing is the squeaking device inside which stopped working after the first couple of days. I returned this item feeling I had overpaid for a toy that was defective and did not meet my expectations. Please do not be swayed by the cute packaging and hype surounding it as I was. One more thing, I was given a full refund from Amazon without any problem."

In [45]:
giraffe_reviews.iloc[-2].review

'I wanted to love this product and was excited to buy it when I became pregnant but am now hesitant to let my baby use it after reading about the recall in Europe. Apparently, as I understand it, their toxin standards of measurement are lower than ours so they have not been recalled here (apparently we are OK with low levels of nitrates in the toys our children put in their mouths, but Europeans are not...hmmm)...Be that as it may, toxins registering even CLOSE to a dangerous level made me nervous about using. After digging around online I did discover that the company claims to have changed the product after a certain date and lists manufacturing codes so you can check yours (those listed were made after a certain date and are said to be safer). Sadly mine was not made after the &#34;improved&#34; date but I could not return it because there was no formal recall in our country. I considered returning it and hunting for one with an approved manufacturing date but man that was just too 