# App Profile Recommendation
I will pretend as I am a part of a company that builds both Android and iOS mobile apps. My goal is to analyse data to help developers understand what type of apps are likely to attract more users.

In [1]:
from csv import reader
app_store = open('AppleStore.csv', encoding='utf8')
app_store = list(reader(app_store)) # 16, 7198

play_store = open('googleplaystore.csv', encoding='utf8')
play_store = list(reader(play_store)) # 13, 10842

In [2]:
def explore_data(dataset, start, end, rows_and_columns=False):
    dataset_slice = dataset[start:end]    
    for row in dataset_slice:
        print(row)
        print('\n') # adds a new (empty) line after each row

    if rows_and_columns:
        print('Number of rows:', len(dataset))
        print('Number of columns:', len(dataset[0]))

In [3]:
explore_data(play_store, 0, 3, True)

['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type', 'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver', 'Android Ver']


['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up']


['Coloring book moana', 'ART_AND_DESIGN', '3.9', '967', '14M', '500,000+', 'Free', '0', 'Everyone', 'Art & Design;Pretend Play', 'January 15, 2018', '2.0.0', '4.0.3 and up']


Number of rows: 10842
Number of columns: 13


In [4]:
explore_data(app_store, 0, 10, True)

['id', 'track_name', 'size_bytes', 'currency', 'price', 'rating_count_tot', 'rating_count_ver', 'user_rating', 'user_rating_ver', 'ver', 'cont_rating', 'prime_genre', 'sup_devices.num', 'ipadSc_urls.num', 'lang.num', 'vpp_lic']


['284882215', 'Facebook', '389879808', 'USD', '0.0', '2974676', '212', '3.5', '3.5', '95.0', '4+', 'Social Networking', '37', '1', '29', '1']


['389801252', 'Instagram', '113954816', 'USD', '0.0', '2161558', '1289', '4.5', '4.0', '10.23', '12+', 'Photo & Video', '37', '0', '29', '1']


['529479190', 'Clash of Clans', '116476928', 'USD', '0.0', '2130805', '579', '4.5', '4.5', '9.24.12', '9+', 'Games', '38', '5', '18', '1']


['420009108', 'Temple Run', '65921024', 'USD', '0.0', '1724546', '3842', '4.5', '4.0', '1.6.2', '9+', 'Games', '40', '5', '1', '1']


['284035177', 'Pandora - Music & Radio', '130242560', 'USD', '0.0', '1126879', '3594', '4.0', '4.5', '8.4.1', '12+', 'Music', '37', '4', '1', '1']


['429047995', 'Pinterest', '74778624', 'USD', '0.0', '1061

## Displaying the columns

In [5]:
app_store_header = app_store[0]
app_store_header

['id',
 'track_name',
 'size_bytes',
 'currency',
 'price',
 'rating_count_tot',
 'rating_count_ver',
 'user_rating',
 'user_rating_ver',
 'ver',
 'cont_rating',
 'prime_genre',
 'sup_devices.num',
 'ipadSc_urls.num',
 'lang.num',
 'vpp_lic']

In [6]:
play_store_header = play_store[0]
play_store_header

['App',
 'Category',
 'Rating',
 'Reviews',
 'Size',
 'Installs',
 'Type',
 'Price',
 'Content Rating',
 'Genres',
 'Last Updated',
 'Current Ver',
 'Android Ver']

### Documentation for datasets

https://www.kaggle.com/lava18/google-play-store-apps

https://www.kaggle.com/ramamet4/app-store-apple-data-set-10k-apps

## Testing if dataset contains any empty values

In [7]:
for i in range(1,len(play_store)):
    for x in range (0 , 12):
        test_value = str(play_store[i][x])
    if not test_value:
        print ("Index" , i , " is not correct")
        print (play_store[i][x])
        print (play_store[i])
    
print ("finished")

Index 1554  is not correct

['Market Update Helper', 'LIBRARIES_AND_DEMO', '4.1', '20145', '11k', '1,000,000+', 'Free', '0', 'Everyone', 'Libraries & Demo', 'February 12, 2013', '', '1.5 and up']
finished


In [8]:
del play_store[1553]
#Index 1553 was incorrect

## Duplicates
When looking through the data we see that there are many cases where there are duplicates of apps. Example given below 

In [9]:
for app in play_store:
    name = app[0]
    if name == "Instagram":
        print (app)

['Instagram', 'SOCIAL', '4.5', '66577313', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device']
['Instagram', 'SOCIAL', '4.5', '66577446', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device']
['Instagram', 'SOCIAL', '4.5', '66577313', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device']
['Instagram', 'SOCIAL', '4.5', '66509917', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device']


It can be seen that there are 4 duplicates for Instagram. They all seem the same except for the Rating column, so we can base our pick from the bunch on that (highest number means the latest add so we take that one)

### Getting all duplicate apps

In [10]:
duplicate_apps = []
unique_apps = []


for app in play_store:
    name = app[0]
    
    if name in unique_apps:
        duplicate_apps.append(name)
    else :
        unique_apps.append(name)
        
print ("Number of duplicate apps: ", len(duplicate_apps))
print ("\n")
print ("Example of duplicate apps: ", duplicate_apps[:15])

Number of duplicate apps:  1181


Example of duplicate apps:  ['Quick PDF Scanner + OCR FREE', 'Box', 'Google My Business', 'ZOOM Cloud Meetings', 'join.me - Simple Meetings', 'Box', 'Zenefits', 'Google Ads', 'Google My Business', 'Slack', 'FreshBooks Classic', 'Insightly CRM', 'QuickBooks Accounting: Invoicing & Expenses', 'HipChat - Chat Built for Teams', 'Xero Accounting Software']


### Selecting the best pick


In [11]:
del play_store[0]
del app_store[0]
del play_store[10471]


In [12]:
print (play_store[10471])

['osmino Wi-Fi: free WiFi', 'TOOLS', '4.2', '134203', '4.1M', '10,000,000+', 'Free', '0', 'Everyone', 'Tools', 'August 7, 2018', '6.06.14', '4.4 and up']


In [13]:
reviews = {}
x = 0

for app in play_store:
    name = app[0]
    x+=1
    n_reviews = float(app[3])

    if name in reviews and reviews[name] < n_reviews:
        reviews[name] = n_reviews

    elif name not in reviews:
        reviews[name] = n_reviews


In [14]:
android_clean = []
already_added = []

for app in play_store:
    name = app[0]
    
    n_reviews = float(app[3])
    

    if (reviews[name] == n_reviews) and (name not in already_added):
        android_clean.append(app)
        already_added.append(name)
                

In [15]:
explore_data(android_clean, 0, 3, True)


['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up']


['U Launcher Lite – FREE Live Cool Themes, Hide Apps', 'ART_AND_DESIGN', '4.7', '87510', '8.7M', '5,000,000+', 'Free', '0', 'Everyone', 'Art & Design', 'August 1, 2018', '1.2.4', '4.0.3 and up']


['Sketch - Draw & Paint', 'ART_AND_DESIGN', '4.5', '215644', '25M', '50,000,000+', 'Free', '0', 'Teen', 'Art & Design', 'June 8, 2018', 'Varies with device', '4.2 and up']


Number of rows: 9658
Number of columns: 13


## removes all the non english apps from play store

In [100]:
iterator = 0
rowCounter = 0
def isEnglish (string):
    nonEnglishCounter = 0
    for char in string:
        if (ord(char) > 127):
            nonEnglishCounter += 1
    if nonEnglishCounter > 3:
        return False
    else:
        return True

for x in android_clean:
    
    if isEnglish(x[0]) is False:
        rowCounter += 1
        del android_clean[iterator]
    iterator += 1    
print ("\n\n")        
print ("There are " , rowCounter, " apps that are non-english")




There are  1  apps that are non-english


## removes all the non english apps from app store

In [101]:
iterator = 0
rowCounter = 0
def isEnglish (string):
    nonEnglishCounter = 0
    for char in string:
        if (ord(char) > 127):
            nonEnglishCounter += 1
    if nonEnglishCounter > 3:
        return False
    else:
        return True

for x in app_store:
    if isEnglish(x[1]) is False:
        rowCounter += 1
        del app_store[iterator]
    iterator += 1    
print ("\n\n")        
print ("There are " , rowCounter, " apps that are non-english")




There are  0  apps that are non-english


## Getting only the free apps from play store

In [102]:
free_apps_android = []
paid_android = []

for x in range(len(android_clean)) :
    if android_clean[x][6] == "Free" :
        free_apps_android.append(android_clean[x])
    else:
        paid_android.append(android_clean[x])
        
        
print ("There are " , len(free_apps_android) , " free apps on play store")
print ("There are " , len(paid_android) , " paid apps on play store")

There are  8862  free apps on play store
There are  751  paid apps on play store


## Getting only the free apps from play store

In [103]:
free_apps_ios = []
paid_ios = []

for x in range(len(app_store)) :
    if app_store[x][4] == "0.0" :
        free_apps_ios.append(app_store[x])
    else:
        paid_ios.append(app_store[x])
        
print ("There are " , len(free_apps_ios) , " free apps on app store")
print ("There are " , len(paid_ios) , " paid apps on app store")

There are  3222  free apps on app store
There are  2961  paid apps on app store


## We want to find an app profile that would suit both App Store and Google Play store so we could play on both markets and make use of the wider audience that comes from 2 biggest online app stores. 

In [104]:
print(app_store_header)

['id', 'track_name', 'size_bytes', 'currency', 'price', 'rating_count_tot', 'rating_count_ver', 'user_rating', 'user_rating_ver', 'ver', 'cont_rating', 'prime_genre', 'sup_devices.num', 'ipadSc_urls.num', 'lang.num', 'vpp_lic']


In [105]:
print(play_store_header)

['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type', 'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver', 'Android Ver']


## We want to choose columns to put into our frequency table to get the most common genres in each market. 

### App store : prime_genre

### Play store : category,genres 

# Creating frequency tables 

In [106]:
def freq_table(dataset, index):
    f_table = {}
    total = 0
    
    for row in dataset:
        total+=1
        value = row[index]
        if value in f_table:
            f_table[value] += 1
        else:
            f_table[value] = 1
    
    percentages = {}
    
    
    for key in f_table:
        percentage = (f_table[key] / total) * 100
        percentages[key] = percentage 
    
    return percentages

In [107]:
def display_table(dataset, index):
    table = freq_table(dataset, index)
    table_display = []
    for key in table:
        key_val_as_tuple = (table[key], key)
        table_display.append(key_val_as_tuple)

    table_sorted = sorted(table_display, reverse = True)
    for entry in table_sorted:
        print(entry[1], ':', entry[0])

In [108]:
display_table(free_apps_ios, -5)

Games : 58.16263190564867
Entertainment : 7.883302296710118
Photo & Video : 4.9658597144630665
Education : 3.662321539416512
Social Networking : 3.2898820608317814
Shopping : 2.60707635009311
Utilities : 2.5139664804469275
Sports : 2.1415270018621975
Music : 2.0484171322160147
Health & Fitness : 2.0173805090006205
Productivity : 1.7380509000620732
Lifestyle : 1.5828677839851024
News : 1.3345747982619491
Travel : 1.2414649286157666
Finance : 1.1173184357541899
Weather : 0.8690254500310366
Food & Drink : 0.8069522036002483
Reference : 0.5586592178770949
Business : 0.5276225946617008
Book : 0.4345127250155183
Navigation : 0.186219739292365
Medical : 0.186219739292365
Catalogs : 0.12414649286157665


### As it can be seen Games is the most common genre with a strong 55.65% majority, with the runner up being Entertainment with only 8.23% It can be seen though free time and leisure activities are the most frequenctly downloaded. The overall impression is that entertainment is the most popular genre in app store with majority of apps falling unde the Games column, but that does not mean that they have the biggest user base, that is just that they have a large offer.

## Getting the frequency table for Google Play store

### Based on Category

In [109]:
display_table(free_apps_android, 1)

FAMILY : 18.900925299029563
GAME : 9.726923944933423
TOOLS : 8.463100880162491
BUSINESS : 4.5926427443015125
LIFESTYLE : 3.9043105393816293
PRODUCTIVITY : 3.8930264048747465
FINANCE : 3.7011961182577298
MEDICAL : 3.5319341006544795
SPORTS : 3.39652448657188
PERSONALIZATION : 3.3175355450236967
COMMUNICATION : 3.238546603475513
HEALTH_AND_FITNESS : 3.080568720379147
PHOTOGRAPHY : 2.945159106296547
NEWS_AND_MAGAZINES : 2.798465357707064
SOCIAL : 2.663055743624464
TRAVEL_AND_LOCAL : 2.335815842924848
SHOPPING : 2.2455427668697814
BOOKS_AND_REFERENCE : 2.143985556307831
DATING : 1.8618821936357481
VIDEO_PLAYERS : 1.7941773865944481
MAPS_AND_NAVIGATION : 1.399232678853532
FOOD_AND_DRINK : 1.2412547957571656
EDUCATION : 1.162265854208982
ENTERTAINMENT : 0.9591514330850823
LIBRARIES_AND_DEMO : 0.9252990295644324
AUTO_AND_VEHICLES : 0.9252990295644324
HOUSE_AND_HOME : 0.8237418190024826
WEATHER : 0.8011735499887158
EVENTS : 0.7109004739336493
PARENTING : 0.6544798013992327
ART_AND_DESIGN : 0.6

## It can be seen that the most popular Category is Family with 18.9% and Game coming in second with 9.7% and Tools and Business coming after it with 8.5% and 4.6% respectively. Here it seems to be of a wider audience of categories, with a mix of free time, leisure and productivity purposes. 

### Based on Genres


In [110]:
display_table(free_apps_android, -4)

Tools : 8.451816745655607
Entertainment : 6.070864364703228
Education : 5.348679756262695
Business : 4.5926427443015125
Productivity : 3.8930264048747465
Lifestyle : 3.8930264048747465
Finance : 3.7011961182577298
Medical : 3.5319341006544795
Sports : 3.4642292936131795
Personalization : 3.3175355450236967
Communication : 3.238546603475513
Action : 3.1031369893929135
Health & Fitness : 3.080568720379147
Photography : 2.945159106296547
News & Magazines : 2.798465357707064
Social : 2.663055743624464
Travel & Local : 2.324531708417964
Shopping : 2.2455427668697814
Books & Reference : 2.143985556307831
Simulation : 2.0424283457458814
Dating : 1.8618821936357481
Arcade : 1.8505980591288649
Video Players & Editors : 1.7716091175806816
Casual : 1.7603249830737984
Maps & Navigation : 1.399232678853532
Food & Drink : 1.2412547957571656
Puzzle : 1.128413450688332
Racing : 0.9930038366057323
Role Playing : 0.9365831640713158
Libraries & Demo : 0.9252990295644324
Auto & Vehicles : 0.92529902956443

## It can be seen here that the most popular Genre is Tools with 8.5% and Entairtainment with 6.0% coming in close second and Education and Business following them with 5.3% and 4.6% respectively. Compared to App Store it seems to also have a wider varierty with practical purposes reigning over entertainment for the top spot, but not with too much of a percentage difference. Up to this point, we found that the App Store is dominated by apps designed for fun, while Google Play shows a more balanced landscape of both practical and for-fun apps. Now we'd like to get an idea about the kind of apps that have most users.

# Calculating the average number of user ratings per app genre, App Store

In [111]:
def bubbleSort(arr, name_arr):
    n = len(arr)
  
    # Traverse through all array elements
    for i in range(n-1):
    # range(n) also work but outer loop will repeat one time more than needed.
  
        # Last i elements are already in place
        for j in range(0, n-i-1):
  
            # traverse the array from 0 to n-i-1
            # Swap if the element found is greater
            # than the next element
            if arr[j] > arr[j + 1] :
                arr[j], arr[j + 1] = arr[j + 1], arr[j]
                name_arr[j], name_arr[j + 1] = name_arr[j + 1], name_arr[j]

In [112]:
app_genres = freq_table(free_apps_ios, -5)
values = []
names = []

for genre in app_genres.keys():
    total = 0
    len_genre = 0
   
    for genre_app in free_apps_ios:
        
        if genre_app[-5] == genre:
            num_user_ratings = float(genre_app[-11])
            total += num_user_ratings
            len_genre += 1
    average_total_lengenre = total / len_genre
    
    values.append(average_total_lengenre)
    names.append(genre)
    
bubbleSort(values, names)

for i in range(len(values)):
    print('{} : {}'.format(names[i], values[i]))
    

Medical : 612.0
Catalogs : 4004.0
Education : 7003.983050847458
Business : 7491.117647058823
Entertainment : 14029.830708661417
Lifestyle : 16485.764705882353
Utilities : 18684.456790123455
Productivity : 21028.410714285714
News : 21248.023255813954
Games : 22788.6696905016
Sports : 23008.898550724636
Health & Fitness : 23298.015384615384
Shopping : 26919.690476190477
Travel : 28243.8
Photo & Video : 28441.54375
Finance : 31467.944444444445
Food & Drink : 33333.92307692308
Book : 39758.5
Weather : 52279.892857142855
Music : 57326.530303030304
Social Networking : 71548.34905660378
Reference : 74942.11111111111
Navigation : 86090.33333333333


### Reference takes the top spot for most user ratings ( Reference apps include language translators, collaborative dictionaries, visual and computational search engines, and algebraic and scientific calculators. ), below we print some apps to get an idea what they contain

In [164]:
def getAppsOnCategory(applist, index, columnName, count):
    
    result = []
    
    counter = 0
    for app in applist:
        if app[index] == columnName and count > counter:
            result.append(app)
    return result
    

In [167]:
def getAppsOnCategoryNoReturn(applist, index, columnName, count):
    
    result = []
    
    counter = 0
    for app in applist:
        if app[index] == columnName and count > counter:
            result.append(app)
    for x in result:
        print('{} : {}'.format(x[1], x[5]))

In [152]:
def getTopAppsIOS(applist, index, columnName, count):
    
    result = []
    
    suitableApps = getAppsOnCategory(applist, index, columnName, 100000000)
    
    
    for app in suitableApps:
        indexCounter = 0
        if len(result) < count:
            result.append(app)
        else :
            for x in result:
                if app[5] > x[5]:
                    x.pop(indexCounter)
                    x.append(app)
                    break
                indexCounter += 1
                
    for x in result:
        print('{} : {}'.format(x[1], x[5]))

In [153]:
#getAppsOnCategory(free_apps_ios, -5, "Reference", 10)

### They take the top spot but we have to take into discussion that this also includes Google translate that has a very large user base

In [156]:
getTopAppsIOS(free_apps_ios, -5, "Reference", 1)

Bible : 985920


### We can see that the top apps is bible that skews the results, but it still shows potential that we could create another popular book, as we saw before that for fun apps are more popular in app store.

# Calculating the average number of user ratings per app genre, Play Store

In [162]:
app_genres = freq_table(free_apps_android, 1)
values = []
names = []

for genre in app_genres.keys():
    total = 0
    len_genre = 0
   
    for genre_app in free_apps_android:
        
        if genre_app[1] == genre:
            n_installs = genre_app[5]
            n_installs = n_installs.replace('+', '')
            n_installs = n_installs.replace(',', '')
            n_installs = float(n_installs)
            total += n_installs
            len_genre += 1
    average_total_lengenre = total / len_genre
    
    values.append(average_total_lengenre)
    names.append(genre)
    
bubbleSort(values, names)

for i in range(len(values)):
    print('{} : {}'.format(names[i], values[i]))
    

MEDICAL : 120550.61980830671
EVENTS : 253542.22222222222
BEAUTY : 513151.88679245283
PARENTING : 542603.6206896552
LIBRARIES_AND_DEMO : 646168.4146341464
AUTO_AND_VEHICLES : 647317.8170731707
COMICS : 817657.2727272727
DATING : 854028.8303030303
HOUSE_AND_HOME : 1331540.5616438356
FINANCE : 1387692.475609756
LIFESTYLE : 1437816.2687861272
BUSINESS : 1712290.1474201474
EDUCATION : 1833495.145631068
FOOD_AND_DRINK : 1924897.7363636363
ART_AND_DESIGN : 1986335.0877192982
SPORTS : 3638640.1428571427
FAMILY : 3697848.1731343283
MAPS_AND_NAVIGATION : 4056941.7741935486
HEALTH_AND_FITNESS : 4188821.9853479853
WEATHER : 5074486.197183099
PERSONALIZATION : 5201482.6122448975
SHOPPING : 7036877.311557789
BOOKS_AND_REFERENCE : 8767811.894736841
NEWS_AND_MAGAZINES : 9549178.467741935
TOOLS : 10801391.298666667
ENTERTAINMENT : 11640705.88235294
TRAVEL_AND_LOCAL : 13984077.710144928
GAME : 15588015.603248259
PRODUCTIVITY : 16787331.344927534
PHOTOGRAPHY : 17840110.40229885
SOCIAL : 23253652.12711864

### We can see that communication takes the top spot so lets investigate that closer

In [170]:
for app in free_apps_android:
    if app[1] == 'COMMUNICATION' and (app[5] == '1,000,000,000+'
                                      or app[5] == '500,000,000+'
                                      or app[5] == '100,000,000+'):
        print(app[0], ':', app[5])

WhatsApp Messenger : 1,000,000,000+
imo beta free calls and text : 100,000,000+
Android Messages : 100,000,000+
Google Duo - High Quality Video Calls : 500,000,000+
Messenger – Text and Video Chat for Free : 1,000,000,000+
imo free video calls and chat : 500,000,000+
Skype - free IM & video calls : 1,000,000,000+
Who : 100,000,000+
GO SMS Pro - Messenger, Free Themes, Emoji : 100,000,000+
LINE: Free Calls & Messages : 500,000,000+
Google Chrome: Fast & Secure : 1,000,000,000+
Firefox Browser fast & private : 100,000,000+
UC Browser - Fast Download Private & Secure : 500,000,000+
Gmail : 1,000,000,000+
Hangouts : 1,000,000,000+
Messenger Lite: Free Calls & Messages : 100,000,000+
Kik : 100,000,000+
KakaoTalk: Free Calls & Text : 100,000,000+
Opera Mini - fast web browser : 100,000,000+
Opera Browser: Fast and Secure : 100,000,000+
Telegram : 100,000,000+
Truecaller: Caller ID, SMS spam blocking & Dialer : 100,000,000+
UC Browser Mini -Tiny Fast Private & Secure : 100,000,000+
Viber Mess

### As we can see the results are heavily skewed by very highly popular apps such as whatsapp, messenger, skype, gmail, hangouts that have over a billion downloads

### However, if we decided to remove all communication apps over 100M downloads we get a better average

In [171]:
under_100_m = []

for app in free_apps_android:
    n_installs = app[5]
    n_installs = n_installs.replace(',', '')
    n_installs = n_installs.replace('+', '')
    if (app[1] == 'COMMUNICATION') and (float(n_installs) < 100000000):
        under_100_m.append(float(n_installs))
        
sum(under_100_m) / len(under_100_m)

3603485.3884615386

### We can see that the average was reduced by roughly ten times, and a similar pattern would follow for video players category. There the biggest apps are such of the likes of YouTube, Google PLay Movies & TV and MX player. The same pattern follows for social apps (Facebook, Instagram), photography apps (Google photos) or producutvity apps (Microsoft word, dropbox)

### This makes it seem like the app genres are more popular than they really are, but rather there are just a few bigger ones that skew the results

### The books and reference seems popular as well that is not too saturated and also has a potential market on app store

In [173]:

for app in free_apps_android:
    if app[1] == 'BOOKS_AND_REFERENCE':
        print(app[0], ':', app[5])

E-Book Read - Read Book for free : 50,000+
Download free book with green book : 100,000+
Wikipedia : 10,000,000+
Cool Reader : 10,000,000+
Free Panda Radio Music : 100,000+
Book store : 1,000,000+
FBReader: Favorite Book Reader : 10,000,000+
English Grammar Complete Handbook : 500,000+
Free Books - Spirit Fanfiction and Stories : 1,000,000+
Google Play Books : 1,000,000,000+
AlReader -any text book reader : 5,000,000+
Offline English Dictionary : 100,000+
Offline: English to Tagalog Dictionary : 500,000+
FamilySearch Tree : 1,000,000+
Cloud of Books : 1,000,000+
Recipes of Prophetic Medicine for free : 500,000+
ReadEra – free ebook reader : 1,000,000+
Anonymous caller detection : 10,000+
Ebook Reader : 5,000,000+
Litnet - E-books : 100,000+
Read books online : 5,000,000+
English to Urdu Dictionary : 500,000+
eBoox: book reader fb2 epub zip : 1,000,000+
English Persian Dictionary : 500,000+
Flybook : 500,000+
All Maths Formulas : 1,000,000+
Ancestry : 5,000,000+
HTC Help : 10,000,000+
E

### There are a few apps that are a lot more popular and skew out the results, but there are only a few and it has a great variety of different books such as software books, religous books, learning books etc. 

In [175]:

for app in free_apps_android:
    if app[1] == 'BOOKS_AND_REFERENCE' and (app[5] == '1,000,000,000+'
                                            or app[5] == '500,000,000+'
                                            or app[5] == '100,000,000+'):
        print(app[0], ':', app[5])

Google Play Books : 1,000,000,000+
Bible : 100,000,000+
Amazon Kindle : 100,000,000+
Wattpad 📖 Free Books : 100,000,000+
Audiobooks from Audible : 100,000,000+


In [176]:
for app in free_apps_android:
    if app[1] == 'BOOKS_AND_REFERENCE' and (app[5] == '1,000,000+'
                                            or app[5] == '5,000,000+'
                                            or app[5] == '10,000,000+'
                                            or app[5] == '50,000,000+'):
        print(app[0], ':', app[5])

Wikipedia : 10,000,000+
Cool Reader : 10,000,000+
Book store : 1,000,000+
FBReader: Favorite Book Reader : 10,000,000+
Free Books - Spirit Fanfiction and Stories : 1,000,000+
AlReader -any text book reader : 5,000,000+
FamilySearch Tree : 1,000,000+
Cloud of Books : 1,000,000+
ReadEra – free ebook reader : 1,000,000+
Ebook Reader : 5,000,000+
Read books online : 5,000,000+
eBoox: book reader fb2 epub zip : 1,000,000+
All Maths Formulas : 1,000,000+
Ancestry : 5,000,000+
HTC Help : 10,000,000+
Moon+ Reader : 10,000,000+
English-Myanmar Dictionary : 1,000,000+
Golden Dictionary (EN-AR) : 1,000,000+
All Language Translator Free : 1,000,000+
Aldiko Book Reader : 10,000,000+
Dictionary - WordWeb : 5,000,000+
50000 Free eBooks & Free AudioBooks : 5,000,000+
Al-Quran (Free) : 10,000,000+
Al Quran Indonesia : 10,000,000+
Al'Quran Bahasa Indonesia : 10,000,000+
Al Quran Al karim : 1,000,000+
Al Quran : EAlim - Translations & MP3 Offline : 5,000,000+
Koran Read &MP3 30 Juz Offline : 1,000,000+
H

### There are quite a few books about the Quran, that suggest that uploading an app about a popular book