### 1. Import needed packages for scraping-process

In [1]:
import pandas as pd
import numpy as np
from langdetect import detect
from google_play_scraper import Sort, reviews

### 2. Name the apps, which should be scraped

Example-ID of 'Samsung Health':

https://play.google.com/store/apps/details?id=com.sec.android.app.shealth&gl=DE (whole URL)
-> 'com.sec.android.app.shealth' (ID for scraping-process)

In [2]:
apps = {
    'Samsung Health':'com.sec.android.app.shealth',
    'MyFitnessPal: Kalorienzähler':'com.myfitnesspal.android',
    'WW (Weight Watchers)':'com.weightwatchers.mobile',
    'Kalorienzähler von FatSecret':'com.fatsecret.android',
    'YAZIO: Kalorienzähler & Fasten':'com.yazio.android',
    'HealthifyMe - Calorie Counter':'com.healthifyme.basic',
    'Lifesum: Gesunde Ernährung':'com.sillens.shapeupclub',
    'Noom: Gewicht & Health':'com.wsl.noom',
    'BetterMe: Gesundheits-Coaching':'com.gen.workoutme',
    'Lose It! - Kalorienzähler':'com.fitnow.loseit',
    'Kalorienzähler - EasyFit':'com.marioherzberg.swipeviews_tutorial1',
    'Kalorien, Fett & Eiweißzähler':'digifit.virtuagym.foodtracker',
    'Fitatu Kalorienzähler & Diät':'com.fitatu.tracker',
    'Gesundheit, Ernährung, Fitness- Kalorienzähler':'com.droidinfinity.healthplus',
    'Ernährungstagebuch':'com.dailybits.foodjournal',
    'Kalorienzähler HiKi':'ru.hikisoft.calories',
    'Calorie Counter - MyNetDiary':'com.fourtechnologies.mynetdiary.ad',
    'MyPlate Calorie Tracker':'com.livestrong.tracker',
    'Fddb - Kalorienzähler & Diät':'com.fddb',
    'Kalorienzähler':'com.nutratech.app.android',
    'Cronometer - CalorieTracker':'com.cronometer.android.gold',
    'FITTR: Fitness&Weigth Loss':'com.squats.fittr',
    'Macros - Kalorienzähler':'com.josmantek.macros',
    'Kalorienzähler Kalorien!':'org.digitalcure.ccnf.app',
    'Abnehmen ohne Diät':'ru.harmonicsoft.caloriecounter'
}

### 3. Define criterias for scraping-process
-> Language = English ; Country = USA ; Sort & count = define in step 5

In [3]:
def scrape_reviews(app_id, sort, count):
    result, continuation_token = reviews(
        app_id,
        lang='en', # defaults to 'en'
        country='us', # defaults to 'us'
        sort=sort, # defaults to Sort.NEWESTs
        count=count, # defaults to 100
    )
    return result   

### 4. Use langdetect-package
define language exception as 'unknown' because of emojis, flags, which can't be detected

In [4]:
def detect_language(text):
    try:
        lang = detect(text)
    except Exception:
        lang = 'unknown'
    return lang 

### 5. Use packages pandas and numpy for building a data frame; define criterias for 'sort' and 'count'

In [6]:
reviews_df = pd.DataFrame()
for app_name, app_id in apps.items():
    for sort in [Sort.MOST_RELEVANT, Sort.NEWEST]:      #first sort for 'most relevant', second sort for 'newest'
        result = scrape_reviews(app_id, sort=sort, count=500)       #count 500 of 'most relevant' and 'newest'
        if len(result) == 0:
            continue
        app_review_df = pd.DataFrame(np.array(result),columns=['review'])
        app_review_df = app_review_df.join(pd.DataFrame(app_review_df.pop('review').to_list()))
        app_review_df['lang'] = app_review_df['content'].apply(lambda x: detect_language(x))        #add language detection to data frame
        app_review_df['app_name'] = app_name
        app_review_df['source'] = 'most_relevant' if sort is Sort.MOST_RELEVANT else 'newest'       #first sort for 'most relevant', second sort for 'newest'
        reviews_df = pd.concat([reviews_df, app_review_df])

### 6. Export as CSV file

In [7]:
reviews_df.to_csv('reviews.csv')