In [7]:
import requests
import matplotlib.pyplot as plt
import random

# Using APIs to Train Models

In [8]:
YELP_TOKEN = ""

In [9]:
r = requests.get("https://api.yelp.com/v3/businesses/search?location=Toronto&limit=50", headers={"Authorization": "Bearer %s" % YELP_TOKEN})

In [4]:
print(r.status_code, r.reason, r.content)

400 Bad Request b'{"error": {"code": "VALIDATION_ERROR", "description": "\'Bearer\' does not match \'^(?i)Bearer [A-Za-z0-9\\\\\\\\-\\\\\\\\_]{128}$\'", "field": "Authorization", "instance": "Bearer"}}'


In [5]:
r.json()

{'error': {'code': 'VALIDATION_ERROR',
  'description': "'Bearer' does not match '^(?i)Bearer [A-Za-z0-9\\\\-\\\\_]{128}$'",
  'field': 'Authorization',
  'instance': 'Bearer'}}

In [6]:
review_labels = []
for business in r.json()['businesses']:
    reviews = requests.get("https://api.yelp.com/v3/businesses/%s/reviews" % business['id'], headers={"Authorization": "Bearer %s" % YELP_TOKEN}).json()
    for review in reviews['reviews']:
        review_labels.append((review['text'].rstrip('.'), review['rating']))
review_labels

KeyError: ignored

In [None]:
review_features = [(x.split(' '), 'positive' if y > 3 else 'negative') for (x, y) in review_labels]
review_features

In [None]:
from nltk.sentiment import SentimentAnalyzer
import nltk.sentiment.util
from nltk.classify import NaiveBayesClassifier

random.shuffle(review_features)
training_docs = review_features[:120]
test_docs = review_features[120:]

print("Training: %d, Testing: %d" % (len(training_docs), len(test_docs)))

sentim_analyzer = SentimentAnalyzer()

In [None]:
all_words_neg = sentim_analyzer.all_words([nltk.sentiment.util.mark_negation(doc) for doc in training_docs])
all_words_neg

In [None]:
unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg, min_freq=4)
sentim_analyzer.add_feat_extractor(nltk.sentiment.util.extract_unigram_feats, unigrams=unigram_feats)

In [None]:
training_set = sentim_analyzer.apply_features(training_docs)
test_set = sentim_analyzer.apply_features(test_docs)

In [None]:
trainer = NaiveBayesClassifier.train
classifier = sentim_analyzer.train(trainer, training_set)
for key,value in sorted(sentim_analyzer.evaluate(test_set).items()):
     print('{0}: {1}'.format(key, value))

In [None]:
from collections import defaultdict

c = defaultdict(int)
for x in review_labels:
    c[x[1]] += 1

plt.bar(c.keys(), c.values())

Our results would probably be a lot better if we had more data. In addition to simply grabbing _more_ reviews, it would also help if we had **complete** reviews.

Unfortunately, the Yelp API doesn't help us out here. It only gives the first few lines of a review, enough to show in your app, but they want you to redirect your users to Yelp.com to read the whole thing.

If only there was a way...

In [None]:
review

In [None]:
print(requests.get(review['url']).text)