### Movie Review Sentiment Analysis Using Google Cloud NLP API
We use Google Cloud NLP API [https://cloud.google.com/natural-language/docs] to perform sentiment analysis

#### Import statements

In [1]:
import os

from time import time

#### Data read

In [2]:
# Folder with movie review files
data_dir = "/Users/i337036/Documents/Data/review_polarity/txt_sentoken"
# Class names for classification
classes = ['pos', 'neg']

# Read the data from the path
train_data = []
train_labels = []
test_data = []
test_labels = []
for curr_class in classes:
    dirname = os.path.join(data_dir, curr_class)
    for fname in os.listdir(dirname):
        with open(os.path.join(dirname, fname), 'r') as f:
            content = f.read()
            # File names are of the form cvxxx_xxxxx.txt
            # Use files that start with the cv9xx_xxxxx.txt for test
            if fname.startswith('cv9'):
                test_data.append(content)
                test_labels.append(curr_class)
            else:
                train_data.append(content)
                train_labels.append(curr_class)

In [3]:
# Let's see how many samples are available and view some data
print("**" * 30)
print("# of train data samples:\t%d\n# of test  data samples:\t%d" % 
      (len(train_data), len(test_data)))
print("**" * 30)

idx = 150 # Some random index between 0, 199
print("Train review: [%s] with sentiment: [%s]" % (train_data[idx][:100], train_labels[idx]))
print("Test review: [%s] with sentiment: [%s]" % (test_data[idx][:100], test_labels[idx]))

************************************************************
# of train data samples:	1800
# of test  data samples:	200
************************************************************
Train review: [in recent years , harrison ford has been such a grave screen presence , scowling through the likes o] with sentiment: [pos]
Test review: [the most interesting part of " can't hardly wait " just happens to be not only the most human , but ] with sentiment: [neg]


#### Import Google Cloud NLP

In [4]:
from google.cloud import language

In [5]:
lang_client = language.Client()

#### Test a sentence for sentiment tags

In [6]:
dir(lang_client)

['SCOPE',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_connection',
 '_credentials',
 '_http',
 '_http_internal',
 'document_from_gcs_url',
 'document_from_html',
 'document_from_text',
 'document_from_url',
 'from_service_account_json']

In [14]:
doc = lang_client.document_from_text(train_data[idx])
dir(doc)

['HTML',
 'PLAIN_TEXT',
 'TYPE_UNSPECIFIED',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_to_dict',
 'analyze_entities',
 'analyze_sentiment',
 'analyze_syntax',
 'annotate_text',
 'client',
 'content',
 'doc_type',
 'encoding',
 'gcs_url',
 'language']

In [15]:
annotate = doc.annotate_text(include_sentiment=True, include_syntax=False, include_entities=False)

In [12]:
def print_result(annotations):
    score = annotations.sentiment.score
    magnitude = annotations.sentiment.magnitude

    for index, sentence in enumerate(annotations.sentences):
        sentence_sentiment = sentence.sentiment.score
        print('Sentence {} has a sentiment score of {}'.format(index, sentence_sentiment))

    print('Overall Sentiment: score of {} with magnitude of {}'.format(score, magnitude))

In [16]:
print_result(annotate)

Sentence 0 has a sentiment score of -0.2
Sentence 1 has a sentiment score of 0.1
Sentence 2 has a sentiment score of -0.4
Sentence 3 has a sentiment score of -0.8
Sentence 4 has a sentiment score of 0.6
Sentence 5 has a sentiment score of -0.4
Sentence 6 has a sentiment score of -0.2
Sentence 7 has a sentiment score of -0.3
Sentence 8 has a sentiment score of 0.8
Sentence 9 has a sentiment score of 0.5
Sentence 10 has a sentiment score of 0.8
Sentence 11 has a sentiment score of 0.4
Sentence 12 has a sentiment score of -0.9
Sentence 13 has a sentiment score of 0
Sentence 14 has a sentiment score of 0
Overall Sentiment: score of 0 with magnitude of 7.2


0