In [2]:
import pymongo
client = pymongo.MongoClient()
mydb = client['y2buy_1']
product_ids = mydb.reviews.distinct("productId")
print(product_ids)
my_collection = mydb['feature_sentiment']
result = mydb.feature_sentiment.create_index([('productId', pymongo.ASCENDING)],unique=True)

[32611785137, 32662806201, 32721716490, 32763320059, 32766086429, 32769974242, 32790980431, 32792320034, 32796300206]


In [3]:
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer
from nltk import word_tokenize
import string
stop = set(stopwords.words('english'))
exclude = set(string.punctuation)
lemma = WordNetLemmatizer()
def clean(doc):
    #stop_free = " ".join([i for i in doc.lower().split() if i not in stop])
    #punc_free = ''.join(ch for ch in stop_free if ch not in exclude)
    tokens = word_tokenize(doc.lower())
    normalized = " ".join(lemma.lemmatize(token) for token in tokens if token not in stop and token not in exclude
                         )
    return normalized

In [4]:
def traverse(t,noun,phrases):
    try:
        t.label()
    except AttributeError:
        n=0
    else:
        if t.label() == 'NOUN_PHRASE': 
            phrase = " ".join([word for (word,pos) in t.leaves()])
            if noun in phrase.split():
                phrases.append(phrase)
                #print(phrase)
        for child in t:
            traverse(child,noun,phrases)
    return phrases



In [14]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import numpy

sid =  SentimentIntensityAnalyzer()
doc_complete = []
sentiments = {}
# iterate all products
for product_id in product_ids:
    print()
    print("Product: "+str(product_id))
    sentiments.clear()
    sentiments["productId"] = str(product_id)
    sentiments["features"] = {}
    # iterate reviews for product
    for review in mydb.reviews.find({ "productId" : product_id}):
        print(".", end ="")
        if "buyerTranslationFeedback" in review.keys():
            # clean review_text
            clean_review_text = clean(review["buyerTranslationFeedback"])
            # do POS tagging
            tokens = nltk.word_tokenize(clean_review_text)
            tagged = nltk.pos_tag(tokens)
            # do noun phrase tagging
            grammar = r"""
                          NOUN_PHRASE:  {<CD|IN|DT|JJ|PP|RB|\$>*<N.*>+<V.*>*<CD|IN|DT|JJ|PP|RB|\$>*<N.*>*}  
                        """
            cp = nltk.RegexpParser(grammar)
            noun_phrase_tagged = cp.parse(tagged)
            #print(noun_phrase_tagged)
            
            for feature in mydb.review_features.distinct("feature"):
                #print(feature, end = "")
                if feature not in sentiments["features"].keys():
                    sentiments["features"][feature] = { "average_sentiment": 0, "review_snippets": []  }
                feature_array = sentiments["features"][feature] 
                for feature_record in mydb.review_features.find({"feature" : feature}):
                    for noun in feature_record["terms"]:
                        #print(noun)
                        phrases=[]
                        phrases=traverse(noun_phrase_tagged,noun,phrases)
                        #print(phrases)
                        if len(phrases)!=0:
                            #print(noun)
                            [feature_array["review_snippets"].append({"sentiment": sid.polarity_scores(phrase)["compound"],  "reviewId": review["_id"],"snippet": phrase,}) for phrase in phrases]
    # after iterating over all reviews for product
    #average sentiments of feature
    for feature in sentiments["features"]:
        feature_array = sentiments["features"][feature]
        #print(feature_array)
        if len(feature_array["review_snippets"])!=0:
            sentiments["features"][feature]["average_sentiment"]= numpy.mean([snippet["sentiment"] for snippet in feature_array["review_snippets"]])
        else:
            sentiments["features"][feature]["average_sentiment"] = 0
    print(sentiments)
    mydb.feature_sentiment.insert_one(sentiments)
                           



Product: 32611785137

Product: 32662806201


............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................{'productId': '32662806201', 'features': {'adapter': {'average_sentiment': -0.1779, 'review_snippets': [{'sentiment': -0.1779, 'snippet': 'l

........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................{'productId': '32769

........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

......{'productId': '32792320034', 'features': {'adapter': {'average_sentiment': 0.23616153846153848, 'review_snippets': [{'sentiment': 0.0, 'snippet': 'level complete film phone adapter', 'reviewId': ObjectId('596a7803d60bcd0f3b108a8f')}, {'sentiment': 0.3612, 'snippet': 'additional glass screen adapter euro socket say thank delivery kaluga region', 'reviewId': ObjectId('596a7803d60bcd0f3b108a93')}, {'sentiment': 0.7003, 'snippet': '4 happy socket came adapter well film', 'reviewId': ObjectId('596a7803d60bcd0f3b108a96')}, {'sentiment': 0.3612, 'snippet': 'additional protective film adapter charger recommend seller', 'reviewId': ObjectId('596a7803d60bcd0f3b108a9d')}, {'sentiment': -0.25, 'snippet': 'cord charging/usb adapter film screen wanted put 5 star downside', 'reviewId': ObjectId('596a7803d60bcd0f3b108a9e')}, {'sentiment': 0.0, 'snippet': 'seller put eu adapter', 'reviewId': ObjectId('596a7803d60bcd0f3b108aa1')}, {'sentiment': 0.0, 'snippet': 'parcel addition adapter charging sel


Product: 32796300206
..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................