In [1]:
# Import Dependencies
import pandas as pd
import os

In [2]:
# Read in Apple store data
apple = pd.read_csv('Raw_Data/Apple/AppleStore.csv')
apple.head()

Unnamed: 0.1,Unnamed: 0,id,track_name,size_bytes,currency,price,rating_count_tot,rating_count_ver,user_rating,user_rating_ver,ver,cont_rating,prime_genre,sup_devices.num,ipadSc_urls.num,lang.num,vpp_lic
0,1,281656475,PAC-MAN Premium,100788224,USD,3.99,21292,26,4.0,4.5,6.3.5,4+,Games,38,5,10,1
1,2,281796108,Evernote - stay organized,158578688,USD,0.0,161065,26,4.0,3.5,8.2.2,4+,Productivity,37,5,23,1
2,3,281940292,"WeatherBug - Local Weather, Radar, Maps, Alerts",100524032,USD,0.0,188583,2822,3.5,4.5,5.0.0,4+,Weather,37,5,3,1
3,4,282614216,"eBay: Best App to Buy, Sell, Save! Online Shop...",128512000,USD,0.0,262241,649,4.0,4.5,5.10.0,12+,Shopping,37,5,9,1
4,5,282935706,Bible,92774400,USD,0.0,985920,5320,4.5,5.0,7.5.1,4+,Reference,37,5,45,1


In [3]:
# Read in Play Store data
google = pd.read_csv('Raw_Data/Google/googleplaystore.csv')
google.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up


In [4]:
# Rename Apple columns
a_cols = ['track_name', 'size_bytes', 'price', 'user_rating', 'cont_rating', 'prime_genre']
apple = apple[a_cols]
apple = apple.rename(columns = {'track_name':'a_name', 
                                'size_bytes':'a_size_bytes', 
                                'price':'a_price', 
                                'user_rating':'a_user_rating', 
                                'cont_rating':'a_content_rating', 
                                'prime_genre':'a_category'})
apple.head()

Unnamed: 0,a_name,a_size_bytes,a_price,a_user_rating,a_content_rating,a_category
0,PAC-MAN Premium,100788224,3.99,4.0,4+,Games
1,Evernote - stay organized,158578688,0.0,4.0,4+,Productivity
2,"WeatherBug - Local Weather, Radar, Maps, Alerts",100524032,0.0,3.5,4+,Weather
3,"eBay: Best App to Buy, Sell, Save! Online Shop...",128512000,0.0,4.0,12+,Shopping
4,Bible,92774400,0.0,4.5,4+,Reference


In [5]:
google.loc[google.g_name.str.contains('eBay')]

AttributeError: 'DataFrame' object has no attribute 'g_name'

In [6]:
# Rename Google columns
g_cols = ['App', 'Size', 'Price', 'Rating', 'Content Rating', 'Category']
google = google[g_cols]
google = google.rename(columns = {'App':'g_name', 
                        'Size':'g_size_mb', 
                        'Price':'g_price', 
                        'Rating':'g_user_rating', 
                        'Content Rating':'g_content_rating', 
                        'Category':'g_category'})
google.head()

Unnamed: 0,g_name,g_size_mb,g_price,g_user_rating,g_content_rating,g_category
0,Photo Editor & Candy Camera & Grid & ScrapBook,19M,0,4.1,Everyone,ART_AND_DESIGN
1,Coloring book moana,14M,0,3.9,Everyone,ART_AND_DESIGN
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",8.7M,0,4.7,Everyone,ART_AND_DESIGN
3,Sketch - Draw & Paint,25M,0,4.5,Teen,ART_AND_DESIGN
4,Pixel Draw - Number Art Coloring Book,2.8M,0,4.3,Everyone,ART_AND_DESIGN


In [7]:
# Drop duplicates
google = google.drop_duplicates(subset = 'g_name')
apple = apple.drop_duplicates(subset = 'a_name')

In [8]:
# Remove apps that do not appear in both datasets
apple_name = apple.a_name
apple.rename(columns = {'a_name':'name'})

google_name = google.g_name
google.rename(columns = {'g_name':'name'})


apps_all = pd.merge(apple, google, on='name', how='left', indicator=True)
apps_all

KeyError: 'name'

In [9]:
# Convert Apple app size from bytes to megabytes
a_size_mb = []

for byte in list(apple.a_size_bytes):
    megabyte = byte / 1024 / 1024
    a_size_mb.append(megabyte)

apple.a_size_bytes = a_size_mb
apple = apple.rename(columns = {'a_size_bytes':'a_size_mb'})

In [12]:
# Remove 'M' from Google app sizes
google.g_size_mb = google.g_size_mb.str[:-1]

Unnamed: 0,g_name,g_size_mb,g_price,g_user_rating,g_content_rating,g_category
0,Photo Editor & Candy Camera & Grid & ScrapBook,19,0,4.1,Everyone,ART_AND_DESIGN
1,Coloring book moana,14,0,3.9,Everyone,ART_AND_DESIGN
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",8.7,0,4.7,Everyone,ART_AND_DESIGN
3,Sketch - Draw & Paint,25,0,4.5,Teen,ART_AND_DESIGN
4,Pixel Draw - Number Art Coloring Book,2.8,0,4.3,Everyone,ART_AND_DESIGN
5,Paper flowers instructions,5.6,0,4.4,Everyone,ART_AND_DESIGN
6,Smoke Effect Photo Maker - Smoke Editor,19,0,3.8,Everyone,ART_AND_DESIGN
7,Infinite Painter,29,0,4.1,Everyone,ART_AND_DESIGN
8,Garden Coloring Book,33,0,4.4,Everyone,ART_AND_DESIGN
9,Kids Paint Free - Drawing Fun,3.1,0,4.7,Everyone,ART_AND_DESIGN
