In [1]:
import os

In [2]:
import pandas as pd

dataset = pd.read_csv(r'C:/Users/LENOVO/Documents/mobile_review_train_data.csv', header= None, engine='python')
dataset = dataset.loc[:, [0, 1, 2]]
dataset = dataset.rename(index=str, columns={ 0: "sentiment", 1: "aspect_category", 2: "review"})
#dataset = dataset.rename(index=str, columns={ 0: "sentiment", 1: "aspect_category", 2: "review"})
dataset.head(5)

Unnamed: 0,sentiment,aspect_category,review
0,Negative,BATTERY#GENERAL,The unit is working bad and due to the slow ch...
1,Positive,CAMERA#GENERAL,Innovative and good camera product. I am able ...
2,Positive,BATTERY#CHARGING,I tried many battery but this battery is just ...
3,Positive,DISPLAY#GENRAL,good display of the phone
4,Neutral,WORKING#GENRAL,it’s a moderate performance


In [3]:
import spacy
nlp = spacy.load('en')

dataset.review = dataset.review.str.lower()

aspect_terms = []
for review in nlp.pipe(dataset.review):
    chunks = [(chunk.root.text) for chunk in review.noun_chunks if chunk.root.pos_ == 'NOUN']
    aspect_terms.append(' '.join(chunks))
dataset['aspect_terms'] = aspect_terms
dataset.head(10)

Unnamed: 0,sentiment,aspect_category,review,aspect_terms
0,Negative,BATTERY#GENERAL,the unit is working bad and due to the slow ch...,unit charge life
1,Positive,CAMERA#GENERAL,innovative and good camera product. i am able ...,product battery mobile polarities
2,Positive,BATTERY#CHARGING,i tried many battery but this battery is just ...,battery battery seller need
3,Positive,DISPLAY#GENRAL,good display of the phone,display phone
4,Neutral,WORKING#GENRAL,it’s a moderate performance,performance
5,Negative,FUNCTIONALITY#BATTERY,very bad . battery heats up very fast,battery
6,Positive,SERVICE#GENERAL,product is genuine and effective. original bat...,product battery
7,Positive,PERFORMANCE#GENERAL,"it's good,awesomethe battery backup is very go...",backup warranty
8,Negative,FUNCTIONALITY#BATTERY,"very poor quality, did not charge properly and...",quality second
9,Positive,SERVICE#GENERAL,product is genuine and effective. original bat...,product battery


In [4]:
from tensorflow.python.keras.models import load_model
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, Activation

aspect_categories_model = Sequential()
aspect_categories_model.add(Dense(512, input_shape=(6000,), activation='relu'))
aspect_categories_model.add(Dense(11, activation='softmax'))
#aspect_categories_model.add(Dense(400,30, activation='softmax'))
aspect_categories_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [5]:
from tensorflow.python.keras.preprocessing.text import Tokenizer

vocab_size = 6000 # We set a maximum size for the vocabulary
tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(dataset.review)
aspect_tokenized = pd.DataFrame(tokenizer.texts_to_matrix(dataset.aspect_terms))

In [6]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.python.keras.utils import to_categorical

label_encoder = LabelEncoder()
integer_category = label_encoder.fit_transform(dataset.aspect_category)
dummy_category = to_categorical(integer_category)


In [7]:
aspect_categories_model.fit(aspect_tokenized, dummy_category,batch_size=100, epochs=5, verbose=1)

Train on 17 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x195447eb748>

In [8]:
new_review = "very worst "

chunks = [(chunk.root.text) for chunk in nlp(new_review).noun_chunks if chunk.root.pos_ == 'NOUN']
new_review_aspect_terms = ' '.join(chunks)
new_review_aspect_tokenized = tokenizer.texts_to_matrix([new_review_aspect_terms])

new_review_category = label_encoder.inverse_transform(aspect_categories_model.predict_classes(new_review_aspect_tokenized))
print(new_review_category)

['FUNCTIONALITY#BATTERY']


In [9]:
sentiment_terms = []
for review in nlp.pipe(dataset['review']):
        if review.is_parsed:
            sentiment_terms.append(' '.join([token.lemma_ for token in review if (not token.is_stop and not token.is_punct and (token.pos_ == "ADJ" or token.pos_ == "VERB"))]))
        else:
            sentiment_terms.append('')  
dataset['sentiment_terms'] = sentiment_terms
dataset.head(10)

Unnamed: 0,sentiment,aspect_category,review,aspect_terms,sentiment_terms
0,Negative,BATTERY#GENERAL,the unit is working bad and due to the slow ch...,unit charge life,work bad slow enhance
1,Positive,CAMERA#GENERAL,innovative and good camera product. i am able ...,product battery mobile polarities,innovative good able charge spare sense
2,Positive,BATTERY#CHARGING,i tried many battery but this battery is just ...,battery battery seller need,try awsome buy thank understand
3,Positive,DISPLAY#GENRAL,good display of the phone,display phone,good
4,Neutral,WORKING#GENRAL,it’s a moderate performance,performance,moderate
5,Negative,FUNCTIONALITY#BATTERY,very bad . battery heats up very fast,battery,bad heat
6,Positive,SERVICE#GENERAL,product is genuine and effective. original bat...,product battery,genuine effective original
7,Positive,PERFORMANCE#GENERAL,"it's good,awesomethe battery backup is very go...",backup warranty,good awesomethe good have
8,Negative,FUNCTIONALITY#BATTERY,"very poor quality, did not charge properly and...",quality second,poor charge discharge
9,Positive,SERVICE#GENERAL,product is genuine and effective. original bat...,product battery,genuine effective original


In [10]:
sentiment_model = Sequential()
sentiment_model.add(Dense(512, input_shape=(6000,), activation='relu'))
sentiment_model.add(Dense(3, activation='softmax'))
sentiment_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [11]:
sentiment_tokenized = pd.DataFrame(tokenizer.texts_to_matrix(dataset.sentiment_terms))

In [12]:
label_encoder_2 = LabelEncoder()
integer_sentiment = label_encoder_2.fit_transform(dataset.sentiment)
dummy_sentiment = to_categorical(integer_sentiment)

In [13]:
sentiment_model.fit(sentiment_tokenized, dummy_sentiment, epochs=5, verbose=1)


Train on 17 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x19545b9bcc8>

In [14]:

new_review = "one of the best phone ,working very fine"

chunks = [(chunk.root.text) for chunk in nlp(new_review).noun_chunks if chunk.root.pos_ == 'NOUN']
new_review_aspect_terms = ' '.join(chunks)
new_review_aspect_tokenized = tokenizer.texts_to_matrix([new_review_aspect_terms])

new_review_category = label_encoder_2.inverse_transform(sentiment_model.predict_classes(new_review_aspect_tokenized))
print(new_review_category)

['Positive']


In [15]:

import pandas as pd
df = pd.read_csv(r'C:/Users/LENOVO/Documents/Mobile_review_test_data.csv',engine='python')
df = df.rename(index=str, columns={ 0: "Review"})

test_reviews = df.Review
neutral=0
negative=0
positive=0

# Aspect preprocessing
test_reviews = [review.lower() for review in test_reviews]
test_aspect_terms = []
for review in nlp.pipe(test_reviews):
    chunks = [(chunk.root.text) for chunk in review.noun_chunks if chunk.root.pos_ == 'NOUN']
    test_aspect_terms.append(' '.join(chunks))
test_aspect_terms = pd.DataFrame(tokenizer.texts_to_matrix(test_aspect_terms))
                             
# Sentiment preprocessing
test_sentiment_terms = []
for review in nlp.pipe(test_reviews):
        if review.is_parsed:
            test_sentiment_terms.append(' '.join([token.lemma_ for token in review if (not token.is_stop and not token.is_punct and (token.pos_ == "ADJ" or token.pos_ == "VERB"))]))
        else:
            test_sentiment_terms.append('') 
test_sentiment_terms = pd.DataFrame(tokenizer.texts_to_matrix(test_sentiment_terms))

# Models output
test_aspect_categories = label_encoder.inverse_transform(aspect_categories_model.predict_classes(test_aspect_terms))
test_sentiment = label_encoder_2.inverse_transform(sentiment_model.predict_classes(test_sentiment_terms))
for i in range(len(test_reviews)):
    a=test_sentiment[i] 
    #print(a)
    if a == "Negative":
            negative=negative+1 
    elif a== "Positive":
            positive=positive+1
    else: neutral=neutral+1
  
    print("Review " + str(i+1) + " is expressing a  " + test_sentiment[i] + " opinion about " + test_aspect_categories[i])
print("Total number Review are ",len(test_reviews))       
print("Total number of Positive Review are ",positive)
print("Total number of Negative Review are ",negative)
print("Total number of Neutral Review are ",neutral)



Review 1 is expressing a  Negative opinion about BATTERY#GENERAL
Review 2 is expressing a  Positive opinion about CAMERA#GENERAL
Review 3 is expressing a  Positive opinion about BATTERY#CHARGING
Review 4 is expressing a  Positive opinion about DISPLAY#GENRAL
Review 5 is expressing a  Negative opinion about WORKING#GENRAL
Review 6 is expressing a  Negative opinion about FUNCTIONALITY#BATTERY
Review 7 is expressing a  Positive opinion about SERVICE#GENERAL
Review 8 is expressing a  Positive opinion about BATTERY#GENERAL
Review 9 is expressing a  Negative opinion about FUNCTIONALITY#BATTERY
Review 10 is expressing a  Positive opinion about SERVICE#GENERAL
Review 11 is expressing a  Negative opinion about FUNCTIONALITY#BATTERY
Review 12 is expressing a  Positive opinion about BATTERY#GENERAL
Review 13 is expressing a  Negative opinion about FUNCTIONALITY#CAMERA
Review 14 is expressing a  Negative opinion about WORKING#DOWNLOAD
Review 15 is expressing a  Positive opinion about DISPLAY#GENRA

In [24]:
import pandas as pd
df = pd.read_csv(r'C:/Users/LENOVO/Documents/Mobile_review_test_data2.csv',engine='python')
df = df.rename(index=str, columns={ 0: "Review"})

test_reviews = df.Review
neutral1=0
negative1=0
positive1=0

# Aspect preprocessing
test_reviews = [review.lower() for review in test_reviews]
test_aspect_terms = []
for review in nlp.pipe(test_reviews):
    chunks = [(chunk.root.text) for chunk in review.noun_chunks if chunk.root.pos_ == 'NOUN']
    test_aspect_terms.append(' '.join(chunks))
test_aspect_terms = pd.DataFrame(tokenizer.texts_to_matrix(test_aspect_terms))
                             
# Sentiment preprocessing
test_sentiment_terms = []
for review in nlp.pipe(test_reviews):
        if review.is_parsed:
            test_sentiment_terms.append(' '.join([token.lemma_ for token in review if (not token.is_stop and not token.is_punct and (token.pos_ == "ADJ" or token.pos_ == "VERB"))]))
        else:
            test_sentiment_terms.append('') 
test_sentiment_terms = pd.DataFrame(tokenizer.texts_to_matrix(test_sentiment_terms))
test_reviews1=len(test_reviews)
# Models output
test_aspect_categories = label_encoder.inverse_transform(aspect_categories_model.predict_classes(test_aspect_terms))
test_sentiment = label_encoder_2.inverse_transform(sentiment_model.predict_classes(test_sentiment_terms))
for i in range(len(test_reviews)):
    a=test_sentiment[i] 
    #print(a)
    if a == "Negative":
            negative1=negative1+1 
    elif a== "Positive":
            positive1=positive1+1
    else: neutral1=neutral1+1
  
    print("Review " + str(i+1) + " is expressing a  " + test_sentiment[i] + " opinion about " + test_aspect_categories[i])
print("Total number Review are ",test_reviews1)   
print("Total number of Positive Review are ",positive1)
print("Total number of Negative Review are ",negative1)
print("Total number of Neutral Review are ",neutral1)

Review 1 is expressing a  Negative opinion about FUNCTIONALITY#BATTERY
Review 2 is expressing a  Positive opinion about FUNCTIONALITY#CAMERA
Review 3 is expressing a  Positive opinion about BATTERY#CHARGING
Review 4 is expressing a  Negative opinion about FUNCTIONALITY#BATTERY
Review 5 is expressing a  Positive opinion about SERVICE#GENERAL
Review 6 is expressing a  Negative opinion about FUNCTIONALITY#BATTERY
Review 7 is expressing a  Positive opinion about CAMERA#GENERAL
Review 8 is expressing a  Positive opinion about BATTERY#GENERAL
Review 9 is expressing a  Negative opinion about FUNCTIONALITY#CAMERA
Review 10 is expressing a  Positive opinion about DISPLAY#GENRAL
Review 11 is expressing a  Positive opinion about BATTERY#GENERAL
Review 12 is expressing a  Positive opinion about FUNCTIONALITY#BATTERY
Review 13 is expressing a  Positive opinion about FUNCTIONALITY#BATTERY
Review 14 is expressing a  Positive opinion about FUNCTIONALITY#BATTERY
Review 15 is expressing a  Positive opin

In [32]:
#comparision
if(len(test_reviews)!=test_reviews1): print("---WARNING---\nThe number of Reviews of two product are not equal")
if (positive > positive1):
    print("Product 1 has got "+ str(positive-positive1)+" positive review more than Product 2" )
else : print("Product 2 has got "+ str(positive1-positive)+" positive review more than Product 1" )
if (negative >negative1):
    print("Product 1 has got "+ str(negative-negative1)+" negative review more than Product 2" )
else : print("Product 2 has got "+ str(negative1-negative)+" negative review more than Product 1" )

Product 2 has got 4 positive review more than Product 1
Product 1 has got 4 negative review more than Product 2
