# Part 1.1 - Scrapping App Reviews

Scrapping App Store app reviews from:
- Syfe
- Endowus
- StashAway

Reference: 
https://python.plainenglish.io/scraping-app-store-reviews-with-python-90e4117ccdfb

In [17]:
import pandas as pd
import numpy as np

# for scraping app info from App Store
from itunes_app_scraper.scraper import AppStoreScraper

# for scraping app reviews from App Store
from app_store_scraper import AppStore

# for pretty printing data structures
from pprint import pprint

# for keeping track of timing
import datetime as dt
from tzlocal import get_localzone

# for building in wait times
import random
import time

In [14]:
## Read in file containing app names and IDs
app_df = pd.read_excel('app_info.xlsx', sheet_name='apple')
app_df.head()

Unnamed: 0,app_name,iOS_app_name,iOS_app_id
0,Syfe,syfe-invest-better,1497156434
1,Endowus,endowus-invest-cpf-srs-cash,1531067679
2,StashAway,stashaway-invest-and-save,1229966330


In [15]:
## Get list of app names and app IDs
app_names = list(app_df['iOS_app_name'])
app_ids = list(app_df['iOS_app_id'])

In [18]:
## Set up App Store Scraper
scraper = AppStoreScraper()
app_store_list = list(scraper.get_multiple_app_details(app_ids))

## Pretty print the data for the first app
pprint(app_store_list[0])

https://itunes.apple.com/lookup?id=1497156434&country=nl&entity=software
https://itunes.apple.com/lookup?id=1531067679&country=nl&entity=software
https://itunes.apple.com/lookup?id=1229966330&country=nl&entity=software
{'advisories': '',
 'appletvScreenshotUrls': '',
 'artistId': 1497156433,
 'artistName': 'Syfe Pte. Ltd.',
 'artistViewUrl': 'https://apps.apple.com/nl/developer/syfe-pte-ltd/id1497156433?uo=4',
 'artworkUrl100': 'https://is5-ssl.mzstatic.com/image/thumb/Purple125/v4/6a/1a/c5/6a1ac563-637d-c81c-ecc5-8ecd4014f9c3/source/100x100bb.jpg',
 'artworkUrl512': 'https://is5-ssl.mzstatic.com/image/thumb/Purple125/v4/6a/1a/c5/6a1ac563-637d-c81c-ecc5-8ecd4014f9c3/source/512x512bb.jpg',
 'artworkUrl60': 'https://is5-ssl.mzstatic.com/image/thumb/Purple125/v4/6a/1a/c5/6a1ac563-637d-c81c-ecc5-8ecd4014f9c3/source/60x60bb.jpg',
 'averageUserRating': 0,
 'averageUserRatingForCurrentVersion': 0,
 'bundleId': 'com.syfe',
 'contentAdvisoryRating': '4+',
 'currency': 'EUR',
 'currentVersionRel

In [19]:
# Converting list into dataframe
app_info_df = pd.DataFrame(app_store_list)
# app_info_df.to_csv('Data/app_info.csv', index=False)
app_info_df.head()

Unnamed: 0,screenshotUrls,ipadScreenshotUrls,appletvScreenshotUrls,artworkUrl60,artworkUrl512,artworkUrl100,artistViewUrl,supportedDevices,advisories,isGameCenterEnabled,...,currency,version,wrapperType,artistId,artistName,genres,price,description,bundleId,userRatingCount
0,https://is5-ssl.mzstatic.com/image/thumb/Purpl...,,,https://is5-ssl.mzstatic.com/image/thumb/Purpl...,https://is5-ssl.mzstatic.com/image/thumb/Purpl...,https://is5-ssl.mzstatic.com/image/thumb/Purpl...,https://apps.apple.com/nl/developer/syfe-pte-l...,"iPhone5s-iPhone5s,iPadAir-iPadAir,iPadAirCellu...",,False,...,EUR,4.72,software,1497156433,Syfe Pte. Ltd.,Financiën,0.0,"Invest better, faster and smarter with Syfe. S...",com.syfe,0
1,https://is4-ssl.mzstatic.com/image/thumb/Purpl...,,,https://is4-ssl.mzstatic.com/image/thumb/Purpl...,https://is4-ssl.mzstatic.com/image/thumb/Purpl...,https://is4-ssl.mzstatic.com/image/thumb/Purpl...,https://apps.apple.com/nl/developer/endowus/id...,"iPhone5s-iPhone5s,iPadAir-iPadAir,iPadAirCellu...",,False,...,EUR,1.3.1,software,1531067681,Endowus,"Financiën,Lifestyle",0.0,"Thousands invest and grow their Cash, CPF & SR...",com.endowus.mobileapp,0
2,https://is2-ssl.mzstatic.com/image/thumb/Purpl...,https://is1-ssl.mzstatic.com/image/thumb/Purpl...,,https://is1-ssl.mzstatic.com/image/thumb/Purpl...,https://is1-ssl.mzstatic.com/image/thumb/Purpl...,https://is1-ssl.mzstatic.com/image/thumb/Purpl...,https://apps.apple.com/nl/developer/asia-wealt...,"iPhone5s-iPhone5s,iPadAir-iPadAir,iPadAirCellu...",,False,...,EUR,11.95.2,software,1229625364,Asia Wealth Platform Pte Ltd,Financiën,0.0,StashAway is where intelligent investing meets...,com.awp.stashaway,0


Given that there are no user rating counts, we can ignore itunes ratings in our analysis. 

## Scrapping App Reviews

In [23]:
# Empty list for storing reviews
app_reviews = []

## Set up loop to go through all apps
for app_name, app_id in zip(app_names, app_ids):
    
    # Get start time
    start = dt.datetime.now(tz=get_localzone())
    fmt= "%m/%d/%y - %T %p"
    
    # Print starting output for app
    print('---'*20)
    print('---'*20)    
    print(f'***** {app_name} started at {start.strftime(fmt)}')
    print()
    
    # Instantiate AppStore for app
    app_ = AppStore(country='sg', app_name=app_name, app_id=app_id)
    
    # Scrape reviews posted since February 28, 2020 and limit to 10,000 reviews
    app_.review(how_many=10000,
                after=dt.datetime(2020, 2, 28),
                sleep=random.randint(20,25))
    
    reviews = app_.reviews
    
    # Add keys to store information about which app each review is for
    for rvw in reviews:
        rvw['app_name'] = app_name
        rvw['app_id'] = app_id
    
    # Print update that scraping was completed
    print(f"""Done scraping {app_name}. 
    Scraped a total of {app_.reviews_count} reviews.\n""")

     # Convert list of dicts to Pandas DataFrame and write to csv
    review_df = pd.DataFrame(reviews)
    app_reviews.append(review_df)
    
    # Get end time
    end = dt.datetime.now(tz=get_localzone())
    
    # Print ending output for app
    print(f"""Successfully wrote {app_name} reviews to df
    at {end.strftime(fmt)}.\n""")
    print(f'Time elapsed for {app_name}: {end-start}')
    print('---'*20)
    print('---'*20)
    print('\n')
    
    # Wait 5 to 10 seconds to start scraping next app
    time.sleep(random.randint(5,10))

------------------------------------------------------------
------------------------------------------------------------
***** syfe-invest-better started at 08/29/21 - 23:49:53 PM



2021-08-29 23:49:55,347 [INFO] Base - Initialised: AppStore('sg', 'syfe-invest-better', 1497156434)
2021-08-29 23:49:55,348 [INFO] Base - Ready to fetch reviews from: https://apps.apple.com/sg/app/syfe-invest-better/id1497156434
2021-08-29 23:50:17,638 [INFO] Base - [id:1497156434] Fetched 20 reviews (20 fetched in total)
2021-08-29 23:51:02,252 [INFO] Base - [id:1497156434] Fetched 57 reviews (57 fetched in total)
2021-08-29 23:51:02,553 [INFO] Base - [id:1497156434] Fetched 64 reviews (64 fetched in total)


Done scraping syfe-invest-better. 
    Scraped a total of 64 reviews.

Successfully wrote syfe-invest-better reviews to df
    at 08/29/21 - 23:51:02 PM.

Time elapsed for syfe-invest-better: 0:01:08.761335
------------------------------------------------------------
------------------------------------------------------------


------------------------------------------------------------
------------------------------------------------------------
***** endowus-invest-cpf-srs-cash started at 08/29/21 - 23:51:11 PM



2021-08-29 23:51:13,212 [INFO] Base - Initialised: AppStore('sg', 'endowus-invest-cpf-srs-cash', 1531067679)
2021-08-29 23:51:13,213 [INFO] Base - Ready to fetch reviews from: https://apps.apple.com/sg/app/endowus-invest-cpf-srs-cash/id1531067679
2021-08-29 23:51:34,519 [INFO] Base - [id:1531067679] Fetched 20 reviews (20 fetched in total)
2021-08-29 23:52:17,097 [INFO] Base - [id:1531067679] Fetched 60 reviews (60 fetched in total)
2021-08-29 23:52:17,352 [INFO] Base - [id:1531067679] Fetched 64 reviews (64 fetched in total)


Done scraping endowus-invest-cpf-srs-cash. 
    Scraped a total of 64 reviews.

Successfully wrote endowus-invest-cpf-srs-cash reviews to df
    at 08/29/21 - 23:52:17 PM.

Time elapsed for endowus-invest-cpf-srs-cash: 0:01:05.797735
------------------------------------------------------------
------------------------------------------------------------


------------------------------------------------------------
------------------------------------------------------------
***** stashaway-invest-and-save started at 08/29/21 - 23:52:25 PM



2021-08-29 23:52:27,558 [INFO] Base - Initialised: AppStore('sg', 'stashaway-invest-and-save', 1229966330)
2021-08-29 23:52:27,559 [INFO] Base - Ready to fetch reviews from: https://apps.apple.com/sg/app/stashaway-invest-and-save/id1229966330
2021-08-29 23:52:51,839 [INFO] Base - [id:1229966330] Fetched 17 reviews (17 fetched in total)
2021-08-29 23:53:40,263 [INFO] Base - [id:1229966330] Fetched 51 reviews (51 fetched in total)
2021-08-29 23:54:29,010 [INFO] Base - [id:1229966330] Fetched 83 reviews (83 fetched in total)
2021-08-29 23:55:17,567 [INFO] Base - [id:1229966330] Fetched 119 reviews (119 fetched in total)
2021-08-29 23:56:06,208 [INFO] Base - [id:1229966330] Fetched 148 reviews (148 fetched in total)
2021-08-29 23:56:54,852 [INFO] Base - [id:1229966330] Fetched 180 reviews (180 fetched in total)
2021-08-29 23:57:43,588 [INFO] Base - [id:1229966330] Fetched 213 reviews (213 fetched in total)
2021-08-29 23:58:32,227 [INFO] Base - [id:1229966330] Fetched 243 reviews (243 fetch

Done scraping stashaway-invest-and-save. 
    Scraped a total of 385 reviews.

Successfully wrote stashaway-invest-and-save reviews to df
    at 08/30/21 - 00:02:11 AM.

Time elapsed for stashaway-invest-and-save: 0:09:46.534071
------------------------------------------------------------
------------------------------------------------------------




In [32]:
 # Convert list of dfs to Pandas DataFrame and write to csv
review_df = pd.concat(app_reviews)
review_df.to_csv('applestore_reviews.csv', index=False, header=True)