### Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import spacy
import warnings
warnings.filterwarnings('ignore')

### Analyze Dataset

In [2]:
train_dataset = '/home/ayush/Graduate_Study/Natural_Language_Processing_Course/Assignment_2/exercise2/data/traindata.csv'
dev_dataset = '/home/ayush/Graduate_Study/Natural_Language_Processing_Course/Assignment_2/exercise2/data/devdata.csv'

In [3]:
col_names = ['Polarity','Aspect_Category','Specific_Target_Aspect_Term','Character_Offset','Sentence']
train_df = pd.read_csv(train_dataset,sep='\t',names=col_names)
dev_df = pd.read_csv(dev_dataset,sep='\t',names=col_names)

spacy_parser = spacy.load('en')
vocab_size = 8000
num_aspect_categories = 12 # There are 12 Aspect Categories
num_sentiments = 3 # Positive, Negative and Neutral

#Drop the character Offset Dataframe
train_df = train_df.drop(columns=['Character_Offset'])
dev_df = dev_df.drop(columns=['Character_Offset'])

In [4]:
print('Train Data Shape: ',train_df.shape)
print('Dev Data Shape: ',dev_df.shape)

Train Data Shape:  (1503, 4)
Dev Data Shape:  (376, 4)


In [5]:
print(train_df.head(30))

    Polarity           Aspect_Category Specific_Target_Aspect_Term  \
0   positive          AMBIENCE#GENERAL                     seating   
1   positive          AMBIENCE#GENERAL                   trattoria   
2   positive              FOOD#QUALITY                        food   
3   negative           SERVICE#GENERAL                       STAFF   
4   positive        FOOD#STYLE_OPTIONS                        menu   
5   positive              FOOD#QUALITY                        tuna   
6   negative           SERVICE#GENERAL                       staff   
7   negative           SERVICE#GENERAL                     service   
8   positive        FOOD#STYLE_OPTIONS                    BBQ ribs   
9   positive          AMBIENCE#GENERAL                       place   
10  negative              FOOD#QUALITY         appetizer of olives   
11  negative              FOOD#QUALITY                       foods   
12  positive            DRINKS#QUALITY                      drinks   
13  positive        

In [6]:
print(dev_df.head(30))

    Polarity           Aspect_Category      Specific_Target_Aspect_Term  \
0   positive          LOCATION#GENERAL                     neighborhood   
1   negative        RESTAURANT#GENERAL                            place   
2   positive              FOOD#QUALITY                             Fish   
3   negative           SERVICE#GENERAL                          manager   
4    neutral            DRINKS#QUALITY                       margaritas   
5   negative          AMBIENCE#GENERAL                            decor   
6   negative        RESTAURANT#GENERAL                   Haru on Park S   
7   positive           SERVICE#GENERAL                            staff   
8   positive           SERVICE#GENERAL                          hostess   
9   negative  RESTAURANT#MISCELLANEOUS                              BFC   
10  positive           SERVICE#GENERAL                          service   
11  negative        FOOD#STYLE_OPTIONS  salt encrusted shrimp appetizer   
12  positive            D

### 1. Build the Aspect  Categories Model

In [7]:
from keras.models import load_model
from keras.models import Sequential
from keras.layers import Dense, Activation

aspect_categories_model = Sequential()
aspect_categories_model.add(Dense(512, input_shape=(vocab_size,), activation='relu'))
aspect_categories_model.add(Dense(num_aspect_categories, activation='softmax'))
aspect_categories_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

Using TensorFlow backend.


### Building Bag of Words Representation

In [8]:
from keras.preprocessing.text import Tokenizer

tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(train_df.Sentence)
aspect_tokenized = pd.DataFrame(tokenizer.texts_to_matrix(train_df.Specific_Target_Aspect_Term))

### Encoding the Aspect Category to Binary Variables

In [9]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

label_encoder = LabelEncoder()
aspect_integer_category = label_encoder.fit_transform(train_df.Aspect_Category)
aspect_dummy_category = to_categorical(aspect_integer_category)

### Train the Aspect Categories Model

In [10]:
history = aspect_categories_model.fit(aspect_tokenized, aspect_dummy_category , epochs=40, verbose=1)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


### Test the Aspect Category Model

In [11]:
new_review = "This dessert is delicious!!!"

chunks = [(chunk.root.text) for chunk in spacy_parser(new_review).noun_chunks if chunk.root.pos_ == 'NOUN']
new_review_aspect_terms = ' '.join(chunks)
new_review_aspect_tokenized = tokenizer.texts_to_matrix([new_review_aspect_terms])

new_review_category = label_encoder.inverse_transform(aspect_categories_model.predict_classes(new_review_aspect_tokenized))
print(new_review_category)

['FOOD#QUALITY']


  if diff:


### Extracting the Sentiment Terms

In [14]:
Specific_Target_Sentiment_Term = []
for review in spacy_parser.pipe(train_df['Sentence']):
        if review.is_parsed:
            Specific_Target_Sentiment_Term.append(' '.join([token.lemma_ for token in review if (not token.is_stop and not token.is_punct and (token.pos_ == "ADJ" or token.pos_ == "VERB"))]))
        else:
            Specific_Target_Sentiment_Term.append('')  
train_df['Specific_Target_Sentiment_Term'] = Specific_Target_Sentiment_Term
train_df.head(10)

Unnamed: 0,Polarity,Aspect_Category,Specific_Target_Aspect_Term,Sentence,Specific_Target_Sentiment_Term
0,positive,AMBIENCE#GENERAL,seating,short and sweet – seating is great:it's romant...,short sweet great be romantic cozy private
1,positive,AMBIENCE#GENERAL,trattoria,This quaint and romantic trattoria is at the t...,quaint romantic
2,positive,FOOD#QUALITY,food,The have over 100 different beers to offer thi...,different offer thi happy delicious recommend
3,negative,SERVICE#GENERAL,STAFF,THIS STAFF SHOULD BE FIRED.,should be
4,positive,FOOD#STYLE_OPTIONS,menu,"The menu looked great, and the waiter was very...",look great nice come average
5,positive,FOOD#QUALITY,tuna,The tuna and wasabe potatoes are excellent.,excellent
6,negative,SERVICE#GENERAL,staff,The whole set up is truly unprofessional and I...,unprofessional wish good current great
7,negative,SERVICE#GENERAL,service,"sometimes i get bad food and bad service, some...",bad bad good good bad
8,positive,FOOD#STYLE_OPTIONS,BBQ ribs,This place has the best Chinese style BBQ ribs...,good chinese
9,positive,AMBIENCE#GENERAL,place,Great place to relax and enjoy your dinner,great relax enjoy


### Build the Sentiment Model

In [15]:
sentiment_model = Sequential()
sentiment_model.add(Dense(512, input_shape=(vocab_size,), activation='relu'))
sentiment_model.add(Dense(num_sentiments, activation='softmax'))
sentiment_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [16]:
tokenized_sentiment = pd.DataFrame(tokenizer.texts_to_matrix(train_df.Specific_Target_Sentiment_Term))

In [17]:
label_encoder_2 = LabelEncoder()
sentiment_integer_category = label_encoder_2.fit_transform(train_df.Polarity)
sentiment_dummy_category = to_categorical(sentiment_integer_category)

In [18]:
sentiment_model.fit(tokenized_sentiment, sentiment_dummy_category, epochs=40, verbose=1)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.callbacks.History at 0x7ff50c37b0f0>

### Testing Sentiment Model

In [20]:
new_review = "This italian place is nice and cosy"

chunks = [(chunk.root.text) for chunk in spacy_parser(new_review).noun_chunks if chunk.root.pos_ == 'NOUN']
new_review_aspect_terms = ' '.join(chunks)
new_review_aspect_tokenized = tokenizer.texts_to_matrix([new_review_aspect_terms])

new_review_category = label_encoder_2.inverse_transform(sentiment_model.predict_classes(new_review_aspect_tokenized))
print(new_review_category)

['positive']


  if diff:


### Testing the Full Pipeline

In [23]:
test_reviews = [
    "Good, fast service.",
    "The hostess was very pleasant.",
    "The bread was stale, the salad was overpriced and empty.",
    "The food we ordered was excellent, although I wouldn't say the margaritas were anything to write home about.",
    "This place has totally weird decor, stairs going up with mirrored walls - I am surprised how no one yet broke their head or fall off the stairs"
]

# Aspect preprocessing
test_reviews = [review.lower() for review in test_reviews]
test_aspect_terms = []
for review in spacy_parser.pipe(test_reviews):
    chunks = [(chunk.root.text) for chunk in review.noun_chunks if chunk.root.pos_ == 'NOUN']
    test_aspect_terms.append(' '.join(chunks))
test_aspect_terms = pd.DataFrame(tokenizer.texts_to_matrix(test_aspect_terms))
                             
# Sentiment preprocessing
test_sentiment_terms = []
for review in spacy_parser.pipe(test_reviews):
        if review.is_parsed:
            test_sentiment_terms.append(' '.join([token.lemma_ for token in review if (not token.is_stop and not token.is_punct and (token.pos_ == "ADJ" or token.pos_ == "VERB"))]))
        else:
            test_sentiment_terms.append('') 
test_sentiment_terms = pd.DataFrame(tokenizer.texts_to_matrix(test_sentiment_terms))

# Models output
test_aspect_categories = label_encoder.inverse_transform(aspect_categories_model.predict_classes(test_aspect_terms))
test_sentiment = label_encoder_2.inverse_transform(sentiment_model.predict_classes(test_sentiment_terms))
for i in range(5):
    print("Review " + str(i+1) + " is expressing a  " + test_sentiment[i] + " opinion about " + test_aspect_categories[i])


Review 1 is expressing a  positive opinion about SERVICE#GENERAL
Review 2 is expressing a  positive opinion about SERVICE#GENERAL
Review 3 is expressing a  negative opinion about FOOD#QUALITY
Review 4 is expressing a  positive opinion about FOOD#QUALITY
Review 5 is expressing a  negative opinion about AMBIENCE#GENERAL


  if diff:
  if diff:
