In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google_play_scraper import app, Sort, reviews_all, permissions

import pickle


def getReviewDf(appId):
    user_reviews = reviews_all(
        appId,
        sleep_milliseconds=0,
        lang='en',
        country='us',
        sort=Sort.NEWEST
    )
    # convert to pandas pd
    df_reviews = pd.DataFrame( np.array(user_reviews), columns=['review'])
    df_reviews = df_reviews.join(pd.DataFrame(df_reviews.pop('review').tolist()))
    return df_reviews 


Matplotlib created a temporary config/cache directory at /tmp/matplotlib-oq2rl9ls because the default path (/.config/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


In [2]:

top35 = ['com.miHoYo.GenshinImpact',
'com.bandainamcogames.dbzdokkanww',
'com.aniplex.fategrandorder.en',
'com.plarium.raidlegends',
'jp.konami.masterduel',
'jp.gungho.padEN',
'com.com2us.smon.normal.freefull.google.kr.android.common',
'com.crunchyroll.princessconnectredive',
'com.stove.epic7.google',
'com.sega.ColorfulStage.en',
'com.lilithgame.hgame.gp',
'com.nexon.maplem.global',
'com.YoStarEN.AzurLane',
'com.nintendo.zaba',
'com.nintendo.zaka',
'com.kakaogames.gdts',
'com.netmarble.nanagb',
'com.bandainamcoent.dblegends_ww',
'com.droidhang.ad',
'com.dena.a12026418',
'jp.konami.duellinks',
'com.YoStarEN.Arknights',
'com.square_enix.android_googleplay.WOTVffbeww',
'com.miHoYo.bh3global',
'com.bandainamcoent.opbrww',
'com.bandainamcoent.tensuramrkww',
'jp.co.ponos.battlecatsen',
'games.wfs.anothereden',
'com.namcobandaigames.spmoja010E',
'com.zigzagame.evertale',
'com.square_enix.android_googleplay.FFBEWW',
'com.aniplex.twst.en',
'com.square_enix.android_googleplay.DFFOperaOmnian',
'com.netease.idv.googleplay',
'com.netmarble.mherosgb',
'com.bandainamcoent.hiroacawwus']


In [4]:

directoryToSave = 'topReviews/'
detailsForApps = [ 'genre', 'ganreId','title','description','summary','minInstalls','price',
                  'free','currency','offersIAP', 'size','androidVersion','developer',
                  'contentRating','contentRatingDescription', 'adSupported','containsAds',
                  'updated','released','appId', 'similarApps', 'moreByDeveloper'
]

from datetime import date,datetime
    
final_df = None
for currentApp in top35:
    temp_df = None
    
    print(f'Current App {currentApp}')
    try:
        temp_df = getReviewDf(currentApp)

        if temp_df is None:
            continue
        print(f'Fetched {len(temp_df)} reviews')

    # get app details
        appData = app(
            currentApp,
            lang='en', # defaults to 'en'
            country='us' # defaults to 'us'
        )
        
        
       # display(appData)
       
    
    except Exception as ex:
        # exception, so move on...
        # app not found
        print('app not found')
        print(ex)
        print(currentApp)
        continue
    
    # go through the list if fields we want and add it to the data frame
    for detail in detailsForApps:
        if detail in 'similarApps' or detail in 'moreByDeveloper':
            temp_df[detail] = pd.Series([appData[detail]] * len(temp_df))  
        elif detail in appData:
            temp_df[detail] = appData[detail]
        else:
            temp_df[detail] = ''
            
    temp_df['updated'] = temp_df['updated'].apply(lambda x: datetime.fromtimestamp(x))
    temp_df['released'] = temp_df['released'].apply(lambda x: datetime.strptime(x, '%b %d, %Y') )
    
#     # let's put some review dates here
#     temp_df['review_posted_year'] = temp_df['at'].dt.year
#     temp_df['review_posted_month'] = temp_df['at'].dt.month
#     temp_df['review_posted_day'] = temp_df['at'].dt.day
#     temp_df['review_posted_dayofweek'] = temp_df['at'].dt.dayofweek
#     temp_df['review_posted_yearmonth'] = str(temp_df['at'].dt.year)+str(temp_df['at'].dt.month)

#     temp_df['app_created_year'] = temp_df['released'].dt.year
#     temp_df['app_lastupdated_year'] = temp_df['updated'].dt.year

#     # to help with analysis we can base the reviews on year 0, year 1...etc
#     temp_df['review_posted_year_norm'] = temp_df['review_posted_year'] - temp_df['app_created_year'] 
#     temp_df['review_posted_year_norm_months'] = temp_df['review_posted_year_norm'] +'-' + temp_df['review_posted_month'].str

    temp_df.to_pickle(directoryToSave+str(currentApp)+'.pkl')
        
        
#     if final_df is None:
#         final_df = temp_df
#     else:
#         final_df = pd.concat([final_df, temp_df], ignore_index=True)
        
# final_df.to_pickle('all_reviews.pkl')
# final_df.head()

Current App com.miHoYo.GenshinImpact
Fetched 358772 reviews
Current App com.bandainamcogames.dbzdokkanww
Fetched 173571 reviews
Current App com.aniplex.fategrandorder.en
Fetched 19168 reviews
Current App com.plarium.raidlegends
Fetched 179251 reviews
Current App jp.konami.masterduel
Fetched 8583 reviews
Current App jp.gungho.padEN
Fetched 31487 reviews
Current App com.com2us.smon.normal.freefull.google.kr.android.common
Fetched 232053 reviews
Current App com.crunchyroll.princessconnectredive
Fetched 8214 reviews
Current App com.stove.epic7.google
Fetched 52221 reviews
Current App com.sega.ColorfulStage.en
Fetched 2229 reviews
Current App com.lilithgame.hgame.gp
Fetched 9950 reviews
Current App com.nexon.maplem.global
Fetched 3184 reviews
Current App com.YoStarEN.AzurLane
Fetched 6169 reviews
Current App com.nintendo.zaba
Fetched 1592 reviews
Current App com.nintendo.zaka
Fetched 1393 reviews
Current App com.kakaogames.gdts
Fetched 1194 reviews
Current App com.netmarble.nanagb
Fetched 5

In [None]:
appData[detail]

In [None]:
## Fix date columns

In [None]:
detail

In [None]:
print(type(appData[detail]))
# temp_df['similarApps'] = temp_df['title']
temp_df['similarApps'] = pd.Series([appData[detail]] * len(temp_df))  

In [None]:
temp_df.head()