In [7]:
from datetime import datetime
import json
import requests
import xmltodict

In [8]:
with open("data/appstore_metadata_and_reviews_47apps_110418.json", "r") as f:
    file = f.read()
    data = json.loads(file)

In [15]:
def fetch_reviews(app_id, country = 'us', sortBy = 'mostRecent', page = 1):
    """ Get max 500 user reviews for a given app.
    
    country = default to United States (us).
    sorty = 'mostRecent'(default) or 'mostHelpful'.
    page = page number. default = 1, max = 10.
    """
    
    url = 'https://itunes.apple.com/%s/rss/customerreviews/id=%s/sortBy=%s/page=%s/xml' % (country, 
                                                                                           str(app_id), 
                                                                                           sortBy,
                                                                                           str(page))
    r = requests.get(url)
    reviews_dict = xmltodict.parse(r.text)
    
    try:  # If there are no reviews on this page, break out of loop
        reviews_list = reviews_dict['feed']['entry']
    except:
        return []
    
    reviews = []
    for review in reviews_list:
        try:
            reviews.append({'title': review['title'],
                           'author': review['author']['name'],
                           'authorUrl': review['author']['uri'],
                           'rating': review['im:rating'],
                           'date': review['updated'],
                           'voteSum': review['im:voteSum'],
                           'voteCount': review['im:voteCount'],
                           'content': review['content'][0]['#text'].replace('\n', ' ')
                           })
        except:
            break
    return reviews

In [21]:
app_metadata = []
# Get most recent and most helpful reviews (separately) for app (up to 500)
# NOTE: If the app does not have many reviews, the same reviews may be included in both recent
# and helpful reviews.
for app in data:
    # If the app in our previous data has no reviews, skip and remove from list
    try:
        last_review = app['recent_reviews'][0]
    except:
        continue
        
    recent_reviews = []
    print(app['name'])

    for i in range(1, 11): 
        print(i)
        new_reviews = fetch_reviews(app['id'], page=i)
        
        # Stop once we have passed thae last review we have stored (by date)
        last_review_in_new = False
        for j, review in enumerate(new_reviews):
            if datetime.strptime(review['date'][:-6], "%Y-%m-%dT%H:%M:%S") <= datetime.strptime(last_review['date'][:-6], "%Y-%m-%dT%H:%M:%S"):
                last_review_in_new = True
                break
        
        if last_review_in_new == False:
            recent_reviews += new_reviews
        else:
            recent_reviews += new_reviews[:j]
            break

    app['recent_reviews'] += recent_reviews
    app_metadata.append(app)

FollowMyHealth®
1
{'title': 'Awesome', 'author': 'Jake 1242367', 'authorUrl': 'https://itunes.apple.com/us/reviews/id313379422', 'rating': '5', 'date': '2018-11-04T10:49:21-07:00', 'voteSum': '0', 'voteCount': '0', 'content': 'Great app to keep updates on bloodworms and tests'}
healow
1
{'title': 'HEALOW does not work!', 'author': 'Texassidewinder', 'authorUrl': 'https://itunes.apple.com/us/reviews/id36324930', 'rating': '1', 'date': '2018-11-04T09:33:05-07:00', 'voteSum': '0', 'voteCount': '0', 'content': 'Impossible to locate my doctor. App has significant glitches. I have downloaded, deleted, re-downloaded, restarted my iPhone repeatedly and it continuously puts me in mode to search for doctor by name. Skips the screen that allows alternative searches (by practice name, etc) and dumps me into a list of random doctors in my area.  Setting up a new account is impossible. First time users are put into a screen with user name and password with no new user setup - just lost password and 

In [28]:
with open('data/appstore_metadata_and_reviews_47apps_110418.json', 'w', encoding='utf-8') as f:
    f.write(json.dumps(app_metadata))