# Analysis with multiple columns

In [2]:
import numpy as np
import pandas as pd

from sklearn.impute import SimpleImputer

In [3]:
df = pd.read_csv('Datasets/googleplaystore.csv')

impute = SimpleImputer(missing_values = np.nan , strategy = 'mean')
impute.fit(df.iloc[ : , 2:3 ].values)
df.iloc[ : , 2:3 ] = impute.transform(df.iloc[ : , 2:3 ].values)

df = df.dropna()

In [4]:
df.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up


In [5]:
df.isnull().sum()

App               0
Category          0
Rating            0
Reviews           0
Size              0
Installs          0
Type              0
Price             0
Content Rating    0
Genres            0
Last Updated      0
Current Ver       0
Android Ver       0
dtype: int64

## Q1. How many free apps are there in ART_AND_DESIGN?

In [6]:
df1 = df.values # converts into numpyarray

In [7]:
# main code to solve question. 
c = 0

for i in df1:
    if(i[1] == 'ART_AND_DESIGN' and i[6] == 'Free'): #choose column
        c += 1
        
print("There are",c,'free apps from ART_AND_DESIGN')

There are 61 free apps from ART_AND_DESIGN


## Q2. How many apps are there in ART_AND_DESIGN with rating more then 4.5?

In [8]:
c = 0
for i in df1:
    if(i[1] == 'ART_AND_DESIGN' and i[2] > 4.5): #choose column
        c += 1
print(c)

22


## Q3. How many apps are there in FAMILY with rating more then 4.5 and Free?

In [9]:
c = 0
for i in df1:
    if(i[1] == 'ART_AND_DESIGN' and i[2] > 4.5 and i[6]== 'Free'): #choose column
        c += 1
print(c)

19


## Q4. List all the free apps with rating more then 4.5 and category is FAMILY?

In [10]:
print("*"*100)
print("        These are the free apps where Category is FAMILY with rating more than 4.5.")
print("*"*100)

for i in df1:
    if(i[1] == 'FAMILY' and i[2] > 4.5 and i[6] == 'Free'):
        print(i[0])
print("-"*100)

****************************************************************************************************
        These are the free apps where Category is FAMILY with rating more than 4.5.
****************************************************************************************************
Super ABC! Learning games for kids! Preschool apps
Candy Pop Story
Dog Run - Pet Dog Simulator
Puzzle Kids - Animals Shapes and Jigsaw Puzzles
No. Color - Color by Number, Number Coloring
Garden Fruit Legend
Piano Kids - Music & Songs
Duolingo: Learn Languages Free
Super ABC! Learning games for kids! Preschool apps
Drawing for Kids Learning Games for Toddlers age 3
Baby ABC in box! Kids alphabet games for toddlers!
Henry Danger Crime Warp
Fruit Cube Blast
Toddler Kids Puzzles PUZZINGO
Fuzzy Seasons: Animal Forest
Dog Run - Pet Dog Simulator
My Oasis - Calming and Relaxing Idle Clicker Game
Ninja Dash - Ronin Jump RPG
Monster High™ Minis Mania
Monica Toy TV
Sworkit Kids - Fitness Meets Fun
Crazy Colors: Bu

# Analysis Using Conditions

## Q1. How many free apps are there in ART_AND_DESIGN?

In [11]:
df = pd.DataFrame(df1, columns=['App','Category','Rating','Reviews','Size','Installs','Type','Price','Content Rating','Genres','Last Updated','Current Ver','Android Ver'])


In [12]:
df_pr = df[df['Category'] == 'ART_AND_DESIGN']
print(len(df_pr[df_pr['Type'] == 'Free'])) # display only the rows which have art and design

61


# Q2. How many apps are there in ART_AND_DESIGN with rating more then 4.5?

In [13]:
df_pr = df[df['Category'] == 'ART_AND_DESIGN']
print(len(df_pr[df_pr['Rating'] > 4.5]))

22


# Q3. How many apps are there in FAMILY with rating more then 4.5 and Free?

In [14]:
df_pr = df[df['Category'] == 'FAMILY']
df_pr = df_pr[df_pr['Rating'] > 4.5]

len(df_pr[df_pr['Type'] == 'Free'])

314

# Q4. List all the free apps with rating more then 4.5 and category is FAMILY?

In [15]:
df_pr = df[df['Category'] == 'FAMILY']
df_pr = df_pr[df_pr['Rating'] > 4.5]

result = df_pr[df_pr['Type'] == 'Free'].values

for i in result:
    print(i[0])

Super ABC! Learning games for kids! Preschool apps
Candy Pop Story
Dog Run - Pet Dog Simulator
Puzzle Kids - Animals Shapes and Jigsaw Puzzles
No. Color - Color by Number, Number Coloring
Garden Fruit Legend
Piano Kids - Music & Songs
Duolingo: Learn Languages Free
Super ABC! Learning games for kids! Preschool apps
Drawing for Kids Learning Games for Toddlers age 3
Baby ABC in box! Kids alphabet games for toddlers!
Henry Danger Crime Warp
Fruit Cube Blast
Toddler Kids Puzzles PUZZINGO
Fuzzy Seasons: Animal Forest
Dog Run - Pet Dog Simulator
My Oasis - Calming and Relaxing Idle Clicker Game
Ninja Dash - Ronin Jump RPG
Monster High™ Minis Mania
Monica Toy TV
Sworkit Kids - Fitness Meets Fun
Crazy Colors: Bubbles Matching
Pino chess
Duolingo: Learn Languages Free
Animal Jam - Play Wild!
Dog Sim Online: Raise a Family
Color by Number - Draw Sandbox Pixel Art
Build a Bridge!
Find a Way: Addictive Puzzle
CompTIA Exam Training
Gymnastics Superstar - Spin your way to gold!
Hungry Hearts Diner:

# GroupBy in Pandas

### Q1. Name the apps that are in ART_AND_DESIGN with rating more then 4.5 in descending order WRT Ratings?

In [17]:
df_pr = df[df['Category'] == 'ART_AND_DESIGN']
df_pr = df_pr[df_pr['Rating'] > 4.5]

df_pr.sort_values( by = 'Rating', ascending = True )Q2. Which category is having maximum average rating Descending order?

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
13,Mandala Coloring Book,ART_AND_DESIGN,4.6,4326,21M,"100,000+",Free,0,Everyone,Art & Design,"June 26, 2018",1.0.4,4.4 and up
18,ibis Paint X,ART_AND_DESIGN,4.6,224399,31M,"10,000,000+",Free,0,Everyone,Art & Design,"July 30, 2018",5.5.4,4.1 and up
45,Install images with music to make video withou...,ART_AND_DESIGN,4.6,1070,26M,"100,000+",Free,0,Everyone,Art & Design,"November 14, 2017",1.6,4.1 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
4760,X Launcher Prime: With OS Style Theme & No Ads,ART_AND_DESIGN,4.7,149,3.5M,"1,000+",Paid,$1.99,Everyone,Art & Design,"July 30, 2018",1.2.2,4.1 and up
4751,"X Launcher Pro: PhoneX Theme, OS11 Control Center",ART_AND_DESIGN,4.7,801,3.5M,"5,000+",Paid,$1.99,Everyone,Art & Design,"July 30, 2018",2.1.2,4.1 and up
4745,X Launcher: With OS11 Style Theme & Control Ce...,ART_AND_DESIGN,4.7,5754,4.4M,"100,000+",Free,0,Everyone,Art & Design,"July 30, 2018",2.1.2,4.1 and up
44,"Canva: Poster, banner, card maker & graphic de...",ART_AND_DESIGN,4.7,174531,24M,"10,000,000+",Free,0,Everyone,Art & Design,"July 31, 2018",1.6.1,4.1 and up
37,PIP Camera - PIP Collage Maker,ART_AND_DESIGN,4.7,158,11M,"10,000+",Free,0,Everyone,Art & Design,"November 29, 2017",1.3,4.0.3 and up
35,UNICORN - Color By Number & Pixel Art Coloring,ART_AND_DESIGN,4.7,8145,24M,"500,000+",Free,0,Everyone,Art & Design;Creativity,"August 2, 2018",1.0.9,4.4 and up


### Q2. Which category is having maximum average rating Descending order?

In [37]:
# df['Rating'] = pd.to_numeric(df['Rating'] , errors='coerce') # converted Reting column from string to numeric
# df_clean = df.dropna(subset = ['Rating']) # dropped the NaN values 
# result = df_clean.groupby('Category')['Rating'].mean().sort_values(ascending = False)

# No of apps in each category
result = df_clean.groupby('Category')['Type'].count().sort_values(ascending = False)

In [38]:
result

Category
FAMILY                 1968
GAME                   1144
TOOLS                   841
MEDICAL                 463
BUSINESS                460
PRODUCTIVITY            424
PERSONALIZATION         390
COMMUNICATION           387
SPORTS                  384
LIFESTYLE               382
FINANCE                 366
HEALTH_AND_FITNESS      341
PHOTOGRAPHY             335
SOCIAL                  295
NEWS_AND_MAGAZINES      283
SHOPPING                260
TRAVEL_AND_LOCAL        258
DATING                  234
BOOKS_AND_REFERENCE     230
VIDEO_PLAYERS           175
EDUCATION               156
ENTERTAINMENT           149
MAPS_AND_NAVIGATION     137
FOOD_AND_DRINK          127
HOUSE_AND_HOME           88
AUTO_AND_VEHICLES        85
LIBRARIES_AND_DEMO       84
WEATHER                  82
EVENTS                   64
ART_AND_DESIGN           64
PARENTING                60
COMICS                   60
BEAUTY                   53
Name: Type, dtype: int64

## Q3. How many paid apps are there in each category in Descending Order?

In [39]:
df_pr = df[df['Type'] == 'Paid']

df_pr.groupby('Category').count()['Type'].sort_values(ascending = False)

Category
FAMILY                 190
MEDICAL                109
GAME                    83
PERSONALIZATION         82
TOOLS                   77
BOOKS_AND_REFERENCE     28
PRODUCTIVITY            28
COMMUNICATION           27
SPORTS                  24
PHOTOGRAPHY             22
LIFESTYLE               19
FINANCE                 17
HEALTH_AND_FITNESS      16
BUSINESS                14
TRAVEL_AND_LOCAL        12
WEATHER                  8
DATING                   7
MAPS_AND_NAVIGATION      5
EDUCATION                4
VIDEO_PLAYERS            4
AUTO_AND_VEHICLES        3
SOCIAL                   3
ART_AND_DESIGN           3
NEWS_AND_MAGAZINES       2
PARENTING                2
FOOD_AND_DRINK           2
SHOPPING                 2
ENTERTAINMENT            2
LIBRARIES_AND_DEMO       1
EVENTS                   1
Name: Type, dtype: int64