# Analyze Product Sentiment

In [None]:
import turicreate as tc

# Read product review data

In [None]:
products = tc.SFrame('./amazon_baby.sframe')

# Explore data

## Peek the content of the data

In [None]:
products

## Group the product by name and count the number of review

In [None]:
products.groupby('name', operations={'count':tc.aggregate.COUNT()}).sort('count', ascending=False)

## Examine the reivews for the most-reviewed product

In [None]:
giraffe_reviews = products[products['name'] == 'Vulli Sophie the Giraffe Teether']

In [None]:
giraffe_reviews

In [None]:
len(giraffe_reviews)

In [None]:
giraffe_reviews['rating'].show()

# Building a sentiment classifier

## Build word count vectors

In [None]:
products['word_count'] = tc.text_analytics.count_words(products['review'])

In [None]:
products

# Define what is positive and negative sentiment

In [None]:
products['rating'].show()

In [None]:
#ignore all 3*  reviews
products = products[products['rating']!= 3]

In [None]:
#positive sentiment = 4-star or 5-star reviews
products['sentiment'] = products['rating'] >= 4

In [None]:
products

In [None]:
products['sentiment'].show()

# Train our sentiment classifier

In [None]:
train_data, test_data = products.random_split(.8, seed=0)

In [None]:
sentiment_model = tc.logistic_classifier.create(train_data, target='sentiment', features=['word_count'], validation_set=test_data)

# Apply the sentiment classifier to better understand the Giraffe reviews

In [None]:
products['predicted_sentiment'] = sentiment_model.predict(products, output_type = 'probability')

In [None]:
products

In [None]:
giraffe_reviews = products[products['name']== 'Vulli Sophie the Giraffe Teether']

In [None]:
giraffe_reviews

# Sort the Giraffe reviews according to predicted sentiment

In [None]:
giraffe_reviews = giraffe_reviews.sort('predicted_sentiment', ascending=False)

In [None]:
giraffe_reviews

In [None]:
giraffe_reviews.tail()

## Show the most positive reviews

In [None]:
giraffe_reviews[0]['review']

In [None]:
giraffe_reviews[1]['review']

# Most negative reivews

In [None]:
giraffe_reviews[-1]['review']

In [None]:
giraffe_reviews[-2]['review']

# Using selected words

In [None]:
selected_words = ['awesome', 'great', 'fantastic', 'amazing', 'love', 'horrible', 'bad', 'terrible', 'awful', 'wow', 'hate']

In [None]:
for word in selected_words:
    products[word] = products.apply(lambda row: row["word_count"][word] if word in row["word_count"] else 0)

In [None]:
products.head()

### Most used selected words

In [None]:
def most_used_selected_words(selected_words, products):
    print(selected_words)
    highest_frequency = 0
    most_used_word = ""
    for word in selected_words:
        print(word)
        word_count = products[word].sum()
        print(word_count)
        if word_count > highest_frequency:
            highest_frequency = word_count
            most_used_word = word
    return most_used_word

In [None]:
print(most_used_selected_words(selected_words, products))

### Least used selected words

In [None]:
def least_used_selected_words(selected_words, products):
    print(selected_words)
    lowest_frequency = 0
    least_used_word = ""
    for word in selected_words:
        print(word)
        word_count = products[word].sum()
        print(word_count)
        if word_count > lowest_frequency:
            lowest_frequency = word_count
            least_used_word = word
    return least_used_word

In [None]:
train_data, test_data = products.random_split(.8, seed=0)

In [None]:
selected_words_model = tc.logistic_classifier.create(train_data, target='sentiment', features=selected_words, validation_set=test_data)

In [None]:
selected_words_model.coefficients.sort("value").print_rows(num_rows=12)

In [None]:
selected_words_model.evaluate(test_data)

In [None]:
sentiment_model.evaluate(test_data)

In [None]:
diaper_champ_reviews = products[products['name']== 'Baby Trend Diaper Champ']

In [None]:
diaper_champ_reviews

In [None]:
diaper_champ_reviews.sort("predicted_sentiment", ascending=False)[0]["review"]

In [None]:
selected_words_model.predict(diaper_champ_reviews.sort("predicted_sentiment", ascending=False), output_type = 'probability')