# App Store Analysis Project

## Import Libraries

In [7]:
import pandas as pd
import numpy as np

## Open and Explore Datasets

Read datasets for Apple Store and Google Play Store

In [8]:
apple = pd.read_csv('AppleStore.csv')
google = pd.read_csv('googleplaystore.csv')

Dataset Exploration
- Shape
- Headings
- Datatypes

In [10]:
def explore(dataset, title):
    print(title)
    print(dataset.shape)
    print(dataset.columns)
    print(dataset.dtypes)

Explore Apple Store Dataset

In [11]:
explore(apple, 'Apple')

Apple
(7197, 17)
Index(['Unnamed: 0', 'id', 'track_name', 'size_bytes', 'currency', 'price',
       'rating_count_tot', 'rating_count_ver', 'user_rating',
       'user_rating_ver', 'ver', 'cont_rating', 'prime_genre',
       'sup_devices.num', 'ipadSc_urls.num', 'lang.num', 'vpp_lic'],
      dtype='object')
Unnamed: 0            int64
id                    int64
track_name           object
size_bytes            int64
currency             object
price               float64
rating_count_tot      int64
rating_count_ver      int64
user_rating         float64
user_rating_ver     float64
ver                  object
cont_rating          object
prime_genre          object
sup_devices.num       int64
ipadSc_urls.num       int64
lang.num              int64
vpp_lic               int64
dtype: object


In [21]:
apple.head()

Unnamed: 0,id,track_name,size_bytes,currency,price,rating_count_tot,rating_count_ver,user_rating,user_rating_ver,ver,cont_rating,prime_genre,sup_devices.num,ipadSc_urls.num,lang.num,vpp_lic
0,281656475,PAC-MAN Premium,100788224,USD,3.99,21292,26,4.0,4.5,6.3.5,4+,Games,38,5,10,1
1,281796108,Evernote - stay organized,158578688,USD,0.0,161065,26,4.0,3.5,8.2.2,4+,Productivity,37,5,23,1
2,281940292,"WeatherBug - Local Weather, Radar, Maps, Alerts",100524032,USD,0.0,188583,2822,3.5,4.5,5.0.0,4+,Weather,37,5,3,1
3,282614216,"eBay: Best App to Buy, Sell, Save! Online Shop...",128512000,USD,0.0,262241,649,4.0,4.5,5.10.0,12+,Shopping,37,5,9,1
4,282935706,Bible,92774400,USD,0.0,985920,5320,4.5,5.0,7.5.1,4+,Reference,37,5,45,1


Remove row # column

In [20]:
apple.drop("Unnamed: 0", axis=1, inplace=True)

Unique Values in Categorical Columns

In [23]:
apple_cat = ['currency','cont_rating','prime_genre']
for cat in apple_cat:
    print(cat)
    print(apple[cat].unique())

currency
['USD']
cont_rating
['4+' '12+' '17+' '9+']
prime_genre
['Games' 'Productivity' 'Weather' 'Shopping' 'Reference' 'Finance' 'Music'
 'Utilities' 'Travel' 'Social Networking' 'Sports' 'Business'
 'Health & Fitness' 'Entertainment' 'Photo & Video' 'Navigation'
 'Education' 'Lifestyle' 'Food & Drink' 'News' 'Book' 'Medical' 'Catalogs']


Explore Google Play Store Dataset

In [12]:
explore(google, 'Google Play')

Google Play
(10841, 13)
Index(['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type',
       'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver',
       'Android Ver'],
      dtype='object')
App                object
Category           object
Rating            float64
Reviews            object
Size               object
Installs           object
Type               object
Price              object
Content Rating     object
Genres             object
Last Updated       object
Current Ver        object
Android Ver        object
dtype: object


In [14]:
google.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up


Convert datatypes for Reviews, Price

In [55]:
google['Reviews'] = google['Reviews'].astype(str).astype(int)

In [66]:
google['Price'] = google['Price'].str.replace('$','').astype(float)

Updated Column Datatypes

In [68]:
google.dtypes

App                object
Category           object
Rating            float64
Reviews             int32
Size               object
Installs           object
Type               object
Price             float64
Content Rating     object
Genres             object
Last Updated       object
Current Ver        object
Android Ver        object
dtype: object

Drop row 10472 due to unmatching columns

In [27]:
google.loc[google['Price'] == 'Everyone']

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
10472,Life Made WI-Fi Touchscreen Photo Frame,1.9,19.0,3.0M,"1,000+",Free,0,Everyone,,"February 11, 2018",1.0.19,4.0 and up,


In [35]:
i = google[(google.App == 'Life Made WI-Fi Touchscreen Photo Frame')].index

In [38]:
google.drop(i, axis=0, inplace=True)

Updated Google Play Dataset Dimensions

In [39]:
google.shape

(10840, 13)

Unique Variables in Categorical Columns

In [69]:
google_cat = ['Category','Type','Content Rating','Genres']
for cat in google_cat:
    print(cat)
    print(google[cat].unique())

Category
['ART_AND_DESIGN' 'AUTO_AND_VEHICLES' 'BEAUTY' 'BOOKS_AND_REFERENCE'
 'BUSINESS' 'COMICS' 'COMMUNICATION' 'DATING' 'EDUCATION' 'ENTERTAINMENT'
 'EVENTS' 'FINANCE' 'FOOD_AND_DRINK' 'HEALTH_AND_FITNESS' 'HOUSE_AND_HOME'
 'LIBRARIES_AND_DEMO' 'LIFESTYLE' 'GAME' 'FAMILY' 'MEDICAL' 'SOCIAL'
 'SHOPPING' 'PHOTOGRAPHY' 'SPORTS' 'TRAVEL_AND_LOCAL' 'TOOLS'
 'PERSONALIZATION' 'PRODUCTIVITY' 'PARENTING' 'WEATHER' 'VIDEO_PLAYERS'
 'NEWS_AND_MAGAZINES' 'MAPS_AND_NAVIGATION']
Type
['Free' 'Paid' nan]
Content Rating
['Everyone' 'Teen' 'Everyone 10+' 'Mature 17+' 'Adults only 18+' 'Unrated']
Genres
['Art & Design' 'Art & Design;Pretend Play' 'Art & Design;Creativity'
 'Art & Design;Action & Adventure' 'Auto & Vehicles' 'Beauty'
 'Books & Reference' 'Business' 'Comics' 'Comics;Creativity'
 'Communication' 'Dating' 'Education;Education' 'Education'
 'Education;Creativity' 'Education;Music & Video'
 'Education;Action & Adventure' 'Education;Pretend Play'
 'Education;Brain Games' 'Entertainment' '

Find row with null Type and fill cell

In [70]:
google[google['Type'].isnull()]

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
9148,Command & Conquer: Rivals,FAMILY,,0,Varies with device,0,,0.0,Everyone 10+,Strategy,"June 28, 2018",Varies with device,Varies with device


In [72]:
google.at[9148, 'Type'] = 'Free'

In [73]:
google.loc[9148]

App               Command & Conquer: Rivals
Category                             FAMILY
Rating                                  NaN
Reviews                                   0
Size                     Varies with device
Installs                                  0
Type                                   Free
Price                                   0.0
Content Rating                 Everyone 10+
Genres                             Strategy
Last Updated                  June 28, 2018
Current Ver              Varies with device
Android Ver              Varies with device
Name: 9148, dtype: object

## Find and Remove Duplicates

Apple Store Dataset

In [78]:
apple.duplicated(subset = ['id']).any()

False

In [77]:
apple.duplicated(subset = ['track_name']).any()

True

In [80]:
apple.duplicated(subset=['track_name']).sum()

2

In [81]:
apple.loc[apple.duplicated(subset=['track_name']) == True]

Unnamed: 0,id,track_name,size_bytes,currency,price,rating_count_tot,rating_count_ver,user_rating,user_rating_ver,ver,cont_rating,prime_genre,sup_devices.num,ipadSc_urls.num,lang.num,vpp_lic
5603,1089824278,VR Roller Coaster,240964608,USD,0.0,67,44,3.5,4.0,0.81,4+,Games,38,0,1,1
7128,1178454060,Mannequin Challenge,59572224,USD,0.0,105,58,4.0,4.5,1.0.1,4+,Games,38,5,1,1


In [82]:
apple.loc[apple['track_name'] == 'VR Roller Coaster']

Unnamed: 0,id,track_name,size_bytes,currency,price,rating_count_tot,rating_count_ver,user_rating,user_rating_ver,ver,cont_rating,prime_genre,sup_devices.num,ipadSc_urls.num,lang.num,vpp_lic
3319,952877179,VR Roller Coaster,169523200,USD,0.0,107,102,3.5,3.5,2.0.0,4+,Games,37,5,1,1
5603,1089824278,VR Roller Coaster,240964608,USD,0.0,67,44,3.5,4.0,0.81,4+,Games,38,0,1,1


In [83]:
apple.loc[apple['track_name'] == 'Mannequin Challenge']

Unnamed: 0,id,track_name,size_bytes,currency,price,rating_count_tot,rating_count_ver,user_rating,user_rating_ver,ver,cont_rating,prime_genre,sup_devices.num,ipadSc_urls.num,lang.num,vpp_lic
7092,1173990889,Mannequin Challenge,109705216,USD,0.0,668,87,3.0,3.0,1.4,9+,Games,37,4,1,1
7128,1178454060,Mannequin Challenge,59572224,USD,0.0,105,58,4.0,4.5,1.0.1,4+,Games,38,5,1,1


Google Play Dataset

In [85]:
google.duplicated().sum()

483

In [87]:
google.loc[google.duplicated() == True]

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
229,Quick PDF Scanner + OCR FREE,BUSINESS,4.2,80805,Varies with device,"5,000,000+",Free,0.0,Everyone,Business,"February 26, 2018",Varies with device,4.0.3 and up
236,Box,BUSINESS,4.2,159872,Varies with device,"10,000,000+",Free,0.0,Everyone,Business,"July 31, 2018",Varies with device,Varies with device
239,Google My Business,BUSINESS,4.4,70991,Varies with device,"5,000,000+",Free,0.0,Everyone,Business,"July 24, 2018",2.19.0.204537701,4.4 and up
256,ZOOM Cloud Meetings,BUSINESS,4.4,31614,37M,"10,000,000+",Free,0.0,Everyone,Business,"July 20, 2018",4.1.28165.0716,4.0 and up
261,join.me - Simple Meetings,BUSINESS,4.0,6989,Varies with device,"1,000,000+",Free,0.0,Everyone,Business,"July 16, 2018",4.3.0.508,4.4 and up
...,...,...,...,...,...,...,...,...,...,...,...,...,...
8643,Wunderlist: To-Do List & Tasks,PRODUCTIVITY,4.6,404610,Varies with device,"10,000,000+",Free,0.0,Everyone,Productivity,"April 6, 2018",Varies with device,Varies with device
8654,"TickTick: To Do List with Reminder, Day Planner",PRODUCTIVITY,4.6,25370,Varies with device,"1,000,000+",Free,0.0,Everyone,Productivity,"August 6, 2018",Varies with device,Varies with device
8658,ColorNote Notepad Notes,PRODUCTIVITY,4.6,2401017,Varies with device,"100,000,000+",Free,0.0,Everyone,Productivity,"June 27, 2018",Varies with device,Varies with device
10049,Airway Ex - Intubate. Anesthetize. Train.,MEDICAL,4.3,123,86M,"10,000+",Free,0.0,Everyone,Medical,"June 1, 2018",0.6.88,5.0 and up


In [90]:
google.drop_duplicates(inplace=True)

In [91]:
google.duplicated().sum()

0

In [93]:
google.duplicated(subset=['App']).sum()

698

In [94]:
google.loc[google.duplicated(subset=['App']) == True]

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
285,Quick PDF Scanner + OCR FREE,BUSINESS,4.2,80804,Varies with device,"5,000,000+",Free,0.0,Everyone,Business,"February 26, 2018",Varies with device,4.0.3 and up
293,OfficeSuite : Free Office + PDF Editor,BUSINESS,4.3,1002859,35M,"100,000,000+",Free,0.0,Everyone,Business,"August 2, 2018",9.7.14188,4.1 and up
294,Slack,BUSINESS,4.4,51510,Varies with device,"5,000,000+",Free,0.0,Everyone,Business,"August 2, 2018",Varies with device,Varies with device
382,Messenger – Text and Video Chat for Free,COMMUNICATION,4.0,56646578,Varies with device,"1,000,000,000+",Free,0.0,Everyone,Communication,"August 1, 2018",Varies with device,Varies with device
383,imo free video calls and chat,COMMUNICATION,4.3,4785988,11M,"500,000,000+",Free,0.0,Everyone,Communication,"June 8, 2018",9.8.000000010501,4.0 and up
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10647,Motorola FM Radio,VIDEO_PLAYERS,3.9,54815,Varies with device,"100,000,000+",Free,0.0,Everyone,Video Players & Editors,"May 2, 2018",Varies with device,Varies with device
10715,FarmersOnly Dating,DATING,3.0,1145,1.4M,"100,000+",Free,0.0,Mature 17+,Dating,"February 25, 2016",2.2,4.0 and up
10720,Firefox Focus: The privacy browser,COMMUNICATION,4.4,36981,4.0M,"1,000,000+",Free,0.0,Everyone,Communication,"July 6, 2018",5.2,5.0 and up
10730,FP Notebook,MEDICAL,4.5,410,60M,"50,000+",Free,0.0,Everyone,Medical,"March 24, 2018",2.1.0.372,4.4 and up


In [101]:
google.loc[google['App'] == 'Slack']

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
240,Slack,BUSINESS,4.4,51507,Varies with device,"5,000,000+",Free,0.0,Everyone,Business,"August 2, 2018",Varies with device,Varies with device
294,Slack,BUSINESS,4.4,51510,Varies with device,"5,000,000+",Free,0.0,Everyone,Business,"August 2, 2018",Varies with device,Varies with device


In [102]:
google.drop_duplicates(subset=['App'], inplace=True)

In [103]:
google.duplicated(subset=['App']).sum()

0

## Separate Free Apps

In [106]:
apple_free = apple.loc[apple['price'] == 0]

In [107]:
apple_free.head()

Unnamed: 0,id,track_name,size_bytes,currency,price,rating_count_tot,rating_count_ver,user_rating,user_rating_ver,ver,cont_rating,prime_genre,sup_devices.num,ipadSc_urls.num,lang.num,vpp_lic
1,281796108,Evernote - stay organized,158578688,USD,0.0,161065,26,4.0,3.5,8.2.2,4+,Productivity,37,5,23,1
2,281940292,"WeatherBug - Local Weather, Radar, Maps, Alerts",100524032,USD,0.0,188583,2822,3.5,4.5,5.0.0,4+,Weather,37,5,3,1
3,282614216,"eBay: Best App to Buy, Sell, Save! Online Shop...",128512000,USD,0.0,262241,649,4.0,4.5,5.10.0,12+,Shopping,37,5,9,1
4,282935706,Bible,92774400,USD,0.0,985920,5320,4.5,5.0,7.5.1,4+,Reference,37,5,45,1
6,283646709,PayPal - Send and request money safely,227795968,USD,0.0,119487,879,4.0,4.5,6.12.0,4+,Finance,37,0,19,1


In [111]:
apple_free.shape

(4056, 16)

In [112]:
google_free = google.loc[google['Price'] == 0]

In [113]:
google_free.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0.0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0.0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0.0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0.0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up


In [114]:
google_free.shape

(8903, 13)

# Frequencies by Genre

Apple Free Dataset

In [119]:
apple_free['prime_genre'].value_counts()/len(apple_free['prime_genre'])

prime_genre
Games                0.556460
Entertainment        0.082347
Photo & Video        0.041174
Social Networking    0.035256
Education            0.032544
Shopping             0.029832
Utilities            0.026874
Lifestyle            0.023176
Finance              0.020710
Sports               0.019477
Health & Fitness     0.018738
Music                0.016519
Book                 0.016272
Productivity         0.015286
News                 0.014300
Travel               0.013807
Food & Drink         0.010602
Weather              0.007643
Business             0.004931
Reference            0.004931
Navigation           0.004931
Catalogs             0.002219
Medical              0.001972
Name: count, dtype: float64

Google Play Free Dataset

In [121]:
google_free['Category'].value_counts()/len(google_free['Category'])

Category
FAMILY                 0.185218
GAME                   0.098506
TOOLS                  0.084129
BUSINESS               0.045827
LIFESTYLE              0.039313
PRODUCTIVITY           0.038863
FINANCE                0.036842
MEDICAL                0.035044
SPORTS                 0.033809
PERSONALIZATION        0.033135
COMMUNICATION          0.032349
HEALTH_AND_FITNESS     0.030664
PHOTOGRAPHY            0.029428
NEWS_AND_MAGAZINES     0.028305
SOCIAL                 0.026508
TRAVEL_AND_LOCAL       0.023251
SHOPPING               0.022464
BOOKS_AND_REFERENCE    0.021790
DATING                 0.018533
VIDEO_PLAYERS          0.017859
MAPS_AND_NAVIGATION    0.014153
EDUCATION              0.012917
FOOD_AND_DRINK         0.012355
ENTERTAINMENT          0.011232
LIBRARIES_AND_DEMO     0.009323
AUTO_AND_VEHICLES      0.009210
HOUSE_AND_HOME         0.008312
WEATHER                0.007975
EVENTS                 0.007076
ART_AND_DESIGN         0.006852
PARENTING              0.006515

In [123]:
google_free['Genres'].value_counts()/len(google_free['Genres'])

Genres
Tools                          0.084017
Entertainment                  0.060878
Education                      0.053914
Business                       0.045827
Lifestyle                      0.039200
                                 ...   
Strategy;Action & Adventure    0.000112
Art & Design;Pretend Play      0.000112
Arcade;Pretend Play            0.000112
Entertainment;Education        0.000112
Strategy;Creativity            0.000112
Name: count, Length: 114, dtype: float64

Apple Dataset

In [None]:
apple['prime_genre'].value_counts()/len(apple['prime_genre'])

prime_genre
Games                0.536612
Entertainment        0.074337
Education            0.062943
Photo & Video        0.048492
Utilities            0.034459
Health & Fitness     0.025010
Productivity         0.024733
Social Networking    0.023204
Lifestyle            0.020008
Music                0.019175
Shopping             0.016952
Sports               0.015840
Book                 0.015562
Finance              0.014450
Travel               0.011255
News                 0.010421
Weather              0.010004
Reference            0.008893
Food & Drink         0.008754
Business             0.007920
Navigation           0.006392
Medical              0.003196
Catalogs             0.001389
Name: count, dtype: float64

Google Play Dataset

In [122]:
google['Category'].value_counts()/len(google['Category'])

Category
FAMILY                 0.189668
GAME                   0.099286
TOOLS                  0.085620
BUSINESS               0.043483
MEDICAL                0.040895
PERSONALIZATION        0.038927
PRODUCTIVITY           0.038720
LIFESTYLE              0.038203
FINANCE                0.035718
SPORTS                 0.033647
COMMUNICATION          0.032612
HEALTH_AND_FITNESS     0.029817
PHOTOGRAPHY            0.029092
NEWS_AND_MAGAZINES     0.026297
SOCIAL                 0.024744
BOOKS_AND_REFERENCE    0.022984
TRAVEL_AND_LOCAL       0.022673
SHOPPING               0.020913
DATING                 0.017704
VIDEO_PLAYERS          0.016875
MAPS_AND_NAVIGATION    0.013562
EDUCATION              0.012320
FOOD_AND_DRINK         0.011595
ENTERTAINMENT          0.010560
AUTO_AND_VEHICLES      0.008800
LIBRARIES_AND_DEMO     0.008697
WEATHER                0.008179
HOUSE_AND_HOME         0.007661
EVENTS                 0.006626
ART_AND_DESIGN         0.006626
PARENTING              0.006212

In [124]:
google['Genres'].value_counts()/len(google['Genres'])

Genres
Tools                              0.085516
Entertainment                      0.058081
Education                          0.052800
Business                           0.043483
Medical                            0.040895
                                     ...   
Art & Design;Pretend Play          0.000104
Lifestyle;Pretend Play             0.000104
Comics;Creativity                  0.000104
Art & Design;Action & Adventure    0.000104
Strategy;Creativity                0.000104
Name: count, Length: 118, dtype: float64

## Top Apps By Genre

In [129]:
apple['prime_genre'].unique()

array(['Games', 'Productivity', 'Weather', 'Shopping', 'Reference',
       'Finance', 'Music', 'Utilities', 'Travel', 'Social Networking',
       'Sports', 'Business', 'Health & Fitness', 'Entertainment',
       'Photo & Video', 'Navigation', 'Education', 'Lifestyle',
       'Food & Drink', 'News', 'Book', 'Medical', 'Catalogs'],
      dtype=object)

In [138]:
google['Category'].unique()

array(['ART_AND_DESIGN', 'AUTO_AND_VEHICLES', 'BEAUTY',
       'BOOKS_AND_REFERENCE', 'BUSINESS', 'COMICS', 'COMMUNICATION',
       'DATING', 'EDUCATION', 'ENTERTAINMENT', 'EVENTS', 'FINANCE',
       'FOOD_AND_DRINK', 'HEALTH_AND_FITNESS', 'HOUSE_AND_HOME',
       'LIBRARIES_AND_DEMO', 'LIFESTYLE', 'GAME', 'FAMILY', 'MEDICAL',
       'SOCIAL', 'SHOPPING', 'PHOTOGRAPHY', 'SPORTS', 'TRAVEL_AND_LOCAL',
       'TOOLS', 'PERSONALIZATION', 'PRODUCTIVITY', 'PARENTING', 'WEATHER',
       'VIDEO_PLAYERS', 'NEWS_AND_MAGAZINES', 'MAPS_AND_NAVIGATION'],
      dtype=object)

Order Apple Store  By Number of Reviews

In [141]:
apple_sorted = apple.sort_values('rating_count_tot', ascending=False)

In [142]:
apple_sorted.head()

Unnamed: 0,id,track_name,size_bytes,currency,price,rating_count_tot,rating_count_ver,user_rating,user_rating_ver,ver,cont_rating,prime_genre,sup_devices.num,ipadSc_urls.num,lang.num,vpp_lic
16,284882215,Facebook,389879808,USD,0.0,2974676,212,3.5,3.5,95.0,4+,Social Networking,37,1,29,1
519,389801252,Instagram,113954816,USD,0.0,2161558,1289,4.5,4.0,10.23,12+,Photo & Video,37,0,29,1
1346,529479190,Clash of Clans,116476928,USD,0.0,2130805,579,4.5,4.5,9.24.12,9+,Games,38,5,18,1
707,420009108,Temple Run,65921024,USD,0.0,1724546,3842,4.5,4.0,1.6.2,9+,Games,40,5,1,1
7,284035177,Pandora - Music & Radio,130242560,USD,0.0,1126879,3594,4.0,4.5,8.4.1,12+,Music,37,4,1,1


Extract Top App For Each Genre and Save in New DataFrame

In [157]:
apple_top = pd.DataFrame()

In [158]:
apple_top['Genre'] = apple['prime_genre'].unique()

In [165]:
apple_top['App'] = ''
apple_top['Reviews'] = ''

In [171]:
for genre in apple_top['Genre']:
    apple_top.loc[apple_top['Genre'] == genre, 'App'] = apple_sorted.loc[apple_sorted['prime_genre'] == genre, 'track_name'].iloc[0]
    apple_top.loc[apple_top['Genre'] == genre, 'Reviews'] = apple_sorted.loc[apple_sorted['prime_genre'] == genre, 'rating_count_tot'].iloc[0]

In [172]:
apple_top

Unnamed: 0,Genre,App,Reviews
0,Games,Clash of Clans,2130805
1,Productivity,Evernote - stay organized,161065
2,Weather,"The Weather Channel: Forecast, Radar & Alerts",495626
3,Shopping,"Groupon - Deals, Coupons & Discount Shopping App",417779
4,Reference,Bible,985920
5,Finance,Chase Mobile℠,233270
6,Music,Pandora - Music & Radio,1126879
7,Utilities,Google – Search made just for mobile,479440
8,Travel,Google Earth,446185
9,Social Networking,Facebook,2974676


Order Google Play Dataset by Number of Installs

In [178]:
google_sorted = google

In [188]:
google_sorted['Installs'] = google_sorted['Installs'].str.replace('+','')
google_sorted['Installs'] = google_sorted['Installs'].str.replace(',','').astype(int)

In [195]:
google_sorted.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
2544,Facebook,SOCIAL,4.1,78158306,Varies with device,1000000000,Free,0.0,Teen,Social,"August 3, 2018",Varies with device,Varies with device
336,WhatsApp Messenger,COMMUNICATION,4.4,69119316,Varies with device,1000000000,Free,0.0,Everyone,Communication,"August 3, 2018",Varies with device,Varies with device
2545,Instagram,SOCIAL,4.5,66577313,Varies with device,1000000000,Free,0.0,Teen,Social,"July 31, 2018",Varies with device,Varies with device
335,Messenger – Text and Video Chat for Free,COMMUNICATION,4.0,56642847,Varies with device,1000000000,Free,0.0,Everyone,Communication,"August 1, 2018",Varies with device,Varies with device
1654,Subway Surfers,GAME,4.5,27722264,76M,1000000000,Free,0.0,Everyone 10+,Arcade,"July 12, 2018",1.90.0,4.1 and up


In [194]:
google_sorted = google_sorted.sort_values(['Installs','Reviews'], ascending=False)

Extract Top App by Category and Save Into New DataFrame

In [211]:
google_top = pd.DataFrame()

In [212]:
google_top['cat'] = google['Category'].unique()

In [226]:
google_top['app_name'] = ''
google_top['num_installs'] = ''
google_top['num_reviews'] = ''

In [227]:
for category in google_top['cat']:
    google_top.loc[google_top['cat'] == category, 'app_name'] = google_sorted.loc[google_sorted['Category'] == category, 'App'].iloc[0]
    google_top.loc[google_top['cat'] == category, 'num_installs'] = google_sorted.loc[google_sorted['Category'] == category, 'Installs'].iloc[0]
    google_top.loc[google_top['cat'] == category, 'num_reviews'] = google_sorted.loc[google_sorted['Category'] == category, 'Reviews'].iloc[0]

In [228]:
google_top

Unnamed: 0,cat,app_name,num_installs,num_reviews
0,ART_AND_DESIGN,Sketch - Draw & Paint,50000000,215644
1,AUTO_AND_VEHICLES,"Android Auto - Maps, Media, Messaging & Voice",10000000,271920
2,BEAUTY,Beauty Camera - Selfie Camera,10000000,113715
3,BOOKS_AND_REFERENCE,Google Play Books,1000000000,1433233
4,BUSINESS,OfficeSuite : Free Office + PDF Editor,100000000,1002861
5,COMICS,LINE WEBTOON - Free Comics,10000000,1013635
6,COMMUNICATION,WhatsApp Messenger,1000000000,69119316
7,DATING,Zoosk Dating App: Meet Singles,10000000,516801
8,EDUCATION,Duolingo: Learn Languages Free,100000000,6289924
9,ENTERTAINMENT,Google Play Games,1000000000,7165362
