# Google Play App Store Analysis

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("googleplaystore.csv")

In [3]:
df.head(5)

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up


In [4]:
df.columns

Index(['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type',
       'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver',
       'Android Ver'],
      dtype='object')

# App with large number of reviews
    * Which application has the largest number of reviews?
    * Top 10 list of apps that have many reviews.

In [5]:
df.drop(df.index[10472] , inplace=True)
len(df)

10840

In [6]:
df['Reviews'] = df['Reviews'].astype('int64')

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10840 entries, 0 to 10840
Data columns (total 13 columns):
App               10840 non-null object
Category          10840 non-null object
Rating            9366 non-null float64
Reviews           10840 non-null int64
Size              10840 non-null object
Installs          10840 non-null object
Type              10839 non-null object
Price             10840 non-null object
Content Rating    10840 non-null object
Genres            10840 non-null object
Last Updated      10840 non-null object
Current Ver       10832 non-null object
Android Ver       10838 non-null object
dtypes: float64(1), int64(1), object(11)
memory usage: 1.2+ MB


In [8]:
columns = ['App','Reviews']
most_review_list = df[columns].sort_values('Reviews',axis=0,ascending=False)
most_review_list.drop_duplicates(subset ="App",keep ='first', inplace = True)
most_review_list.head(10)

Unnamed: 0,App,Reviews
2544,Facebook,78158306
381,WhatsApp Messenger,69119316
2604,Instagram,66577446
382,Messenger – Text and Video Chat for Free,56646578
1879,Clash of Clans,44893888
4005,Clean Master- Space Cleaner & Antivirus,42916526
1917,Subway Surfers,27725352
3665,YouTube,25655305
7536,"Security Master - Antivirus, VPN, AppLock, Boo...",24900999
1878,Clash Royale,23136735


# What is the most popular category that has the largest number of installs?

In [9]:
df['Installs'] = df['Installs'].str.replace("+","").str.replace(",","")

In [10]:
df['Installs'] = df['Installs'].astype("int64")

In [11]:
most_pop_cat = df.groupby("Category")["Installs"].sum()

In [12]:
most_pop_cat.sort_values(ascending=False)

Category
GAME                   35086024415
COMMUNICATION          32647276251
PRODUCTIVITY           14176091369
SOCIAL                 14069867902
TOOLS                  11452771915
FAMILY                 10258263505
PHOTOGRAPHY            10088247655
NEWS_AND_MAGAZINES      7496317760
TRAVEL_AND_LOCAL        6868887146
VIDEO_PLAYERS           6222002720
SHOPPING                3247848785
ENTERTAINMENT           2869160000
PERSONALIZATION         2325494782
BOOKS_AND_REFERENCE     1921469576
SPORTS                  1751174498
HEALTH_AND_FITNESS      1583072512
BUSINESS                1001914865
FINANCE                  876648734
EDUCATION                871452000
MAPS_AND_NAVIGATION      724281890
LIFESTYLE                537643539
WEATHER                  426100520
FOOD_AND_DRINK           273898751
DATING                   264310807
HOUSE_AND_HOME           168712461
ART_AND_DESIGN           124338100
LIBRARIES_AND_DEMO        62995910
COMICS                    56086150
MEDICAL    

Most Pupular Category is "Game" with "35086024415" installation.

In [13]:
df.isnull().sum()

App                  0
Category             0
Rating            1474
Reviews              0
Size                 0
Installs             0
Type                 1
Price                0
Content Rating       0
Genres               0
Last Updated         0
Current Ver          8
Android Ver          2
dtype: int64

# Separating Non-English Apps

In [14]:
def is_english(string):
    non_ascii = 0
    
    for character in string:
        if ord(character) > 127:
            non_ascii += 1
    if non_ascii > 3:
        return False
    else:
        return True
    
english_apps = list()
non_english_apps = list()

for i in df["App"]:
    if is_english(i) is True:
        english_apps.append(i)
    else:
        non_english_apps.append(i)
        
english_apps

['Photo Editor & Candy Camera & Grid & ScrapBook',
 'Coloring book moana',
 'U Launcher Lite – FREE Live Cool Themes, Hide Apps',
 'Sketch - Draw & Paint',
 'Pixel Draw - Number Art Coloring Book',
 'Paper flowers instructions',
 'Smoke Effect Photo Maker - Smoke Editor',
 'Infinite Painter',
 'Garden Coloring Book',
 'Kids Paint Free - Drawing Fun',
 'Text on Photo - Fonteee',
 'Name Art Photo Editor - Focus n Filters',
 'Tattoo Name On My Photo Editor',
 'Mandala Coloring Book',
 '3D Color Pixel by Number - Sandbox Art Coloring',
 'Learn To Draw Kawaii Characters',
 'Photo Designer - Write your name with shapes',
 '350 Diy Room Decor Ideas',
 'FlipaClip - Cartoon animation',
 'ibis Paint X',
 'Logo Maker - Small Business',
 "Boys Photo Editor - Six Pack & Men's Suit",
 'Superheroes Wallpapers | 4K Backgrounds',
 'Mcqueen Coloring pages',
 'HD Mickey Minnie Wallpapers',
 'Harley Quinn wallpapers HD',
 'Colorfit - Drawing & Coloring',
 'Animated Photo Editor',
 'Pencil Sketch Drawing',

In [15]:
non_english_apps

['Flame - درب عقلك يوميا',
 'သိင်္ Astrology - Min Thein Kha BayDin',
 'РИА Новости',
 'صور حرف H',
 'L.POINT - 엘포인트 [ 포인트, 멤버십, 적립, 사용, 모바일 카드, 쿠폰, 롯데]',
 'RMEduS - 음성인식을 활용한 R 프로그래밍 실습 시스템',
 'AJ렌터카 법인 카셰어링',
 'Al Quran Free - القرآن (Islam)',
 '中国語 AQリスニング',
 '日本AV历史',
 'Ay Yıldız Duvar Kağıtları',
 'বাংলা টিভি প্রো BD Bangla TV',
 'Cъновник BG',
 'CSCS BG (в български)',
 '뽕티비 - 개인방송, 인터넷방송, BJ방송',
 'BL 女性向け恋愛ゲーム◆俺プリクロス',
 'SecondSecret ‐「恋を読む」BLノベルゲーム‐',
 'BL 女性向け恋愛ゲーム◆ごくメン',
 'あなカレ【BL】無料ゲーム',
 '감성학원 BL 첫사랑',
 'BQ-መጽሐፍ ቅዱሳዊ ጥያቄዎች',
 'BS Calendar / Patro / पात्रो',
 'Vip视频免费看-BT磁力搜索',
 'Билеты ПДД CD 2019 PRO',
 'Offline Jízdní řády CG Transit',
 'Bonjour 2017 Abidjan CI ❤❤❤❤❤',
 'CK 初一 十五',
 'الفاتحون Conquerors',
 'DG ग्राम / Digital Gram Panchayat',
 'DM הפקות',
 'DW فارسی By dw-arab.com',
 'لعبة تقدر تربح DZ',
 'বাংলাflix',
 'RPG ブレイジング ソウルズ アクセレイト',
 '英漢字典 EC Dictionary',
 'ECナビ×シュフー',
 'أحداث وحقائق | خبر عاجل في اخبار العالم',
 'EG SIM CARD (EGSIMCARD, 이지심카드)',
 'パーリーゲイツ公式通販

In [18]:
df["Price"] = df["Price"].str.replace("$" , "").astype(float)

In [20]:
df["Price"].dtypes

dtype('float64')