# Profit mobile apps analysis

### Opening datasets from different folder and defining open_dataset() function

In [2]:
import os
from csv import reader

fileDir = os.path.dirname(os.path.realpath('__file__'))
data_dir = os.path.join(fileDir, '../00-datasets')
data_dir = os.path.abspath(os.path.realpath(data_dir))

def open_dataset(file_name):        
    opened_file = open(file_name, encoding="utf8")
    read_file = reader(opened_file)
    data = list(read_file)
    return data

### Open both datasets and save in 'google_store' and 'apple_store'

In [3]:
google_store = open_dataset(data_dir + '/googleplaystore.csv')
apple_store = open_dataset(data_dir + '/AppleStore.csv')

### Explore_data() function for displaying datatsets in readable format

In [7]:
def explore_data(dataset, start, end, rows_and_columns=False):
    dataset_slice = dataset[start:end]    
    for row in dataset_slice:
        print(row)
        print('\n') # adds a new (empty) line after each row

    if rows_and_columns:
        print('Number of rows:', len(dataset))
        print('Number of columns:', len(dataset[0]))

In [13]:
google_store_header = google_store[0]
apple_store_header = apple_store[0]

print('Google Store header and first 3 rows:\n')
print('Header:', google_store_header, '\n')
explore_data(google_store[1:], 0, 3, True)

print('\nApple Store header and first 3 rows:\n')
print('Header:', apple_store_header, '\n')
explore_data(apple_store[1:], 0, 3, True)

Google Store header and first 3 rows:

Header: ['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type', 'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver', 'Android Ver'] 

['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up']


['Coloring book moana', 'ART_AND_DESIGN', '3.9', '967', '14M', '500,000+', 'Free', '0', 'Everyone', 'Art & Design;Pretend Play', 'January 15, 2018', '2.0.0', '4.0.3 and up']


['U Launcher Lite – FREE Live Cool Themes, Hide Apps', 'ART_AND_DESIGN', '4.7', '87510', '8.7M', '5,000,000+', 'Free', '0', 'Everyone', 'Art & Design', 'August 1, 2018', '1.2.4', '4.0.3 and up']


Number of rows: 10841
Number of columns: 13

Apple Store header and first 3 rows:

Header: ['id', 'track_name', 'size_bytes', 'currency', 'price', 'rating_count_tot', 'rating_count_ver', 'user_rating', 'user_rating_ver', 'ver', 'cont_rati

## Descriptions based on info from sources:
- https://www.kaggle.com/lava18/google-play-store-apps
- https://www.kaggle.com/ramamet4/app-store-apple-data-set-10k-apps


### For google_store cols:

| Column name | Description |
|:-:|:-:|
| 'App'      | Application name    |
| 'Category' | Category the app belongs to |
| 'Rating' | Overall user rating of the app (as when scraped)|
| 'Reviews'|Number of user reviews for the app (as when scraped)|
| 'Size'| Size of the app (as when scraped)|
| 'Installs'| Number of user downloads/installs for the app (as when scraped)|
| 'Type'| Paid or Free|
| 'Price'| Price of the app (as when scraped)|
| 'Content Rating'| Age group the app is targeted at - Children / Mature 21+ / Adult |
| 'Genres'| An app can belong to multiple genres (apart from its main category). For eg, a musical family game will belong to Music, Game, Family genres. |
| 'Last Updated'| Date when the app was last updated on Play Store (as when scraped)|
| 'Current Ver' |Current version of the app available on Play Store (as when scraped)|
| 'Android Ver'| Min required Android version (as when scraped)|

### For apple_store cols:


| Column name | Description |
|:-:|:-:|
| 'id'      | App ID 	| 
| 'track_name'   | App Name        |
| 'size_bytes' | Size (bytes)|
|"currency" | Currency Type|
|"price" | Price amount|
| "rating_count_tot" | User Rating counts (for all version)|
| "rating_count_ver" | User Rating counts (for current version)|
| "user_rating" | Average User Rating value (for all version) |
| "user_rating_ver" | Average User Rating value (for current version) |
| "ver" | Latest version code|
| "cont_rating" | Content Rating |
| "prime_genre" | Primary Genre |
| "sup_devices.num" | Number of supporting devices |
| "ipadSc_urls.num" | Number of screenshots showed for display |
| "lang.num" | Number of supported languages |
| "vpp_lic" | Vpp Device Based Licensing Enabled ||


### Deleting row number 10473 from google_store because of missing data in Category column

In [16]:
print(google_store_header,'\nmissing "Category"\n', google_store[10473])

['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type', 'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver', 'Android Ver'] 
missing "Category"
 ['Life Made WI-Fi Touchscreen Photo Frame', '1.9', '19', '3.0M', '1,000+', 'Free', '0', 'Everyone', '', 'February 11, 2018', '1.0.19', '4.0 and up']


In [19]:
del google_store[10473]