### Google Cloud ML

In [None]:
# Imports the Google Cloud client library
from google.cloud import language
from google.cloud.language import enums
from google.cloud.language import types
import os
from google.oauth2 import service_account

In [None]:
credential = service_account.Credentials.from_service_account_file('/Users/panherbert/google-auth/_google-key.json')

In [None]:
# Instantiates a client
client = language.LanguageServiceClient(credentials=credential)

# The text to analyze
text = u'''
Defective card

After trying and trying repeatedly to use this microcard as a replacement in my Samsung S7 - hours of frustration, seeking guidance from internet, friends, etc. I gave up, thinking it was just my ineptitude. Tried again several times, it would never work.
Finally, decided to try a different/new card. Sure enough...worked right away! So, by now the 128 GB card ordered May 2, is well past its "return by" date, when I determine it is defective. Now what do I do? Searched on Amazon, but found no instructions for this situation.
Giving it 1 star in hopes this can be rectified. I use SanDisk cards all the time and have never had a problem like this before.

Just found out from SanDisk that this card is a fake - not their product. Guess I am screwed unless Amazon steps up and makes it
good. Need to somehow punish this seller "Lecu Computer" for giving Amazon a bad name and customers a bad product!
'''

In [None]:
document = types.Document(
    content='text',
    type=enums.Document.Type.PLAIN_TEXT)

# Detects the sentiment of the text
sentiment = client.analyze_sentiment(document=document).document_sentiment

print('Text: {}'.format(text))
print('Sentiment: {}, {}'.format(sentiment.score, sentiment.magnitude))

### NLTK

In [None]:
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()

In [None]:
sid.polarity_scores('heck yes')

### Textblob

In [None]:
from textblob import TextBlob
TextBlob('heck yes').seheckntiment

### Flair

In [None]:
import flair
from flair import data
flair_sentiment = flair.models.TextClassifier.load('en-sentiment')

In [None]:
data.Sentence(text)

In [None]:
s = flair.data.Sentence(text)
flair_sentiment.predict(s)
total_sentiment = s.labels
total_sentiment

In [None]:
def senti(text):
    s = data.Sentence(text)
    flair_sentiment.predict(s)
    return s.labels

In [None]:
senti('heck yes')

### AWS

In [1]:
import boto3

In [2]:
client = boto3.client('comprehend')

In [4]:
response = client.batch_detect_sentiment(
    TextList=[
        'Hello World'
    ],
    LanguageCode='en'
)

In [45]:
import pprint
p = lambda x: pprint.PrettyPrinter(depth=6).pprint(x)

In [46]:
p(response)

{'ErrorList': [],
 'ResponseMetadata': {'HTTPHeaders': {'content-length': '205',
                                      'content-type': 'application/x-amz-json-1.1',
                                      'date': 'Thu, 22 Aug 2019 04:15:18 GMT',
                                      'x-amzn-requestid': 'd43e3291-56a7-4f6f-b470-bc7e0397adc2'},
                      'HTTPStatusCode': 200,
                      'RequestId': 'd43e3291-56a7-4f6f-b470-bc7e0397adc2',
                      'RetryAttempts': 0},
 'ResultList': [{'Index': 0,
                 'Sentiment': 'NEUTRAL',
                 'SentimentScore': {'Mixed': 0.0078077297657728195,
                                    'Negative': 0.02092457003891468,
                                    'Neutral': 0.8059531450271606,
                                    'Positive': 0.16531452536582947}}]}


In [47]:
p(response['ResultList'])

[{'Index': 0,
  'Sentiment': 'NEUTRAL',
  'SentimentScore': {'Mixed': 0.0078077297657728195,
                     'Negative': 0.02092457003891468,
                     'Neutral': 0.8059531450271606,
                     'Positive': 0.16531452536582947}}]


### Analysing with AWS

In [48]:
from pymongo import MongoClient
from bson.objectid import ObjectId

In [49]:
db = MongoClient('mongodb://@54.183.229.143', 27017)

In [50]:
questions = db.project.questions.find({})

In [51]:
reviews = db.project.reviews.find({})

In [129]:
import pandas as pd
import numpy as np
from IPython.display import clear_output

In [323]:
col = db.project.reviews.find({},{
    '_id':1,
    'review_id':1,
    'title':1,
    'content':1
})

In [324]:
df = pd.DataFrame(col)

In [332]:
df = df[['_id', 'review_id', 'title', 'content']]
df['corpus'] = df[['title','content']].apply(lambda x: ' '.join(x), axis=1)
combined_df = df[['_id', 'review_id', 'corpus']]
combined_df

Unnamed: 0,_id,review_id,corpus
0,5d53b24de7f56061ea1f9581,RKQOUIP1W33UT,Good Good
1,5d53b24de7f56061ea1f9582,R1358Q1HMSSK1T,Needed this memory card for my Action Camera. ...
2,5d53b24de7f56061ea1f9583,R2BU293VXBCL1H,I just got it today works good Works good
3,5d53b24de7f56061ea1f9584,R24XBDBSZ6AKZ3,Samsung it’s really good quality Bought it for...
4,5d53b24de7f56061ea1f9585,R2CLQIHDGUI6TR,No Lamentablemente pedí que enviaran todos los...
5,5d53b24de7f56061ea1f9586,R3CCE9NXE07U46,👍🏽 👍🏽
6,5d53b24de7f56061ea1f9587,R3SLM91EEAYEOJ,GOOD LIKE IT
7,5d53b24de7f56061ea1f9588,R3E909V7N93QU7,Thank you very good Thank you very good
8,5d53b24de7f56061ea1f9589,R837T97HK8XBD,Very good product It is used to keep data out ...
9,5d53b24de7f56061ea1f958a,RDQSLQALENZDK,bon merci


In [319]:
# pipeline =    [
#         { '$project': {'corpus': {'$concat': [ "$title", " ", "$content" ]}},
#          '$review_id': 1
#         }
#     ]

# corpus = list(db.project.reviews.aggregate(pipeline))

In [309]:
corpus

[{'_id': ObjectId('5d53b24de7f56061ea1f9581'), 'corpus': 'Good Good'},
 {'_id': ObjectId('5d53b24de7f56061ea1f9582'),
  'corpus': 'Needed this memory card for my Action Camera. Works great in my Full HD Action Camera. I highly recommend this memory card. :))'},
 {'_id': ObjectId('5d53b24de7f56061ea1f9583'),
  'corpus': 'I just got it today works good Works good'},
 {'_id': ObjectId('5d53b24de7f56061ea1f9584'),
  'corpus': 'Samsung it’s really good quality Bought it for a friend and really liked it'},
 {'_id': ObjectId('5d53b24de7f56061ea1f9585'),
  'corpus': 'No Lamentablemente pedí que enviaran todos los artículos en un solo paquete y no fue así.'},
 {'_id': ObjectId('5d53b24de7f56061ea1f9586'), 'corpus': '👍🏽 👍🏽'},
 {'_id': ObjectId('5d53b24de7f56061ea1f9587'), 'corpus': 'GOOD LIKE IT'},
 {'_id': ObjectId('5d53b24de7f56061ea1f9588'),
  'corpus': 'Thank you very good Thank you very good'},
 {'_id': ObjectId('5d53b24de7f56061ea1f9589'),
  'corpus': 'Very good product It is used to keep 

In [54]:
df = pd.DataFrame(col)
df.head(3)

Unnamed: 0,_id,corpus
0,5d53b24de7f56061ea1f9581,Good Good
1,5d53b24de7f56061ea1f9582,Needed this memory card for my Action Camera. ...
2,5d53b24de7f56061ea1f9583,I just got it today works good Works good


In [55]:
corpus = [i['corpus'] for i in col]
corpus[0:10]

['Good Good',
 'Needed this memory card for my Action Camera. Works great in my Full HD Action Camera. I highly recommend this memory card. :))',
 'I just got it today works good Works good',
 'Samsung it’s really good quality Bought it for a friend and really liked it',
 'No Lamentablemente pedí que enviaran todos los artículos en un solo paquete y no fue así.',
 '👍🏽 👍🏽',
 'GOOD LIKE IT',
 'Thank you very good Thank you very good',
 'Very good product It is used to keep data out from my phone',
 'bon merci']

In [56]:
df.info()
len(corpus)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35667 entries, 0 to 35666
Data columns (total 2 columns):
_id       35667 non-null object
corpus    35667 non-null object
dtypes: object(2)
memory usage: 557.4+ KB


35667

In [245]:
from ratelimiter import RateLimiter

In [57]:
import pickle

In [60]:
import copy

In [231]:
col = copy.deepcopy(corpus)

In [62]:
import sys

In [148]:
# Dividing Task
n = 25

final = [col[i * n:(i + 1) * n] for i in range((len(col) + n - 1) // n )]  

In [64]:
db.project.temp.insert({'batch_num': 999, 'batch_obj': final[0]})

  """Entry point for launching an IPython kernel.


ObjectId('5d5e173dee5dc46c829612de')

In [158]:
list(db.project.temp.find())

[]

In [159]:
db.project.temp.delete_one({'batch_num': 999})
list(db.project.temp.find())

[]

In [79]:
response = lambda x: client.batch_detect_sentiment(
    TextList=x,
    LanguageCode='en'
)

In [145]:
df.head()

Unnamed: 0,_id,corpus
0,5d53b24de7f56061ea1f9581,Good Good
1,5d53b24de7f56061ea1f9582,Needed this memory card for my Action Camera. ...
2,5d53b24de7f56061ea1f9583,I just got it today works good Works good
3,5d53b24de7f56061ea1f9584,Samsung it’s really good quality Bought it for...
4,5d53b24de7f56061ea1f9585,No Lamentablemente pedí que enviaran todos los...


In [160]:


rate_limiter = RateLimiter(max_calls=20, period=1)

['It\'s a winner! The genuine Samsung Evo Select line of Micro SD cards are great. They are reliable and they live up to the speed and capacity classifications they claim to. I\'ve been using Samsung cards for years and they are the only brand that consistently doesn\'t give me any issues year after year.This 32GB card is no exception. However, by default the card is formatted as FAT32, but I prefer exFAT as FAT32 won\'t allow you to transfer a folder containing more than 4GB to the card. That\'s annoying if you are moving data from an old card to a new one. FAT32 has 4GB file size limitations, but on a transfer it recognizes a directory containing lots of files as 1 thing, and applies the 4GB limit. This will give you an error (in Windows anyway) that there isn\'t enough space available for the transfer. But if you copy the files individually it works fine. Or just format the card to exFAT and it will accept the folder that exceeds 4GB easily. exFAT will work just fine in an Android p