In [2]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [8]:
df=pd.read_csv(r"D:\DataCleaning\googleplaystore.csv")

# Cleaning

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10841 entries, 0 to 10840
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   App             10841 non-null  object 
 1   Category        10841 non-null  object 
 2   Rating          9367 non-null   float64
 3   Reviews         10841 non-null  object 
 4   Size            10841 non-null  object 
 5   Installs        10841 non-null  object 
 6   Type            10840 non-null  object 
 7   Price           10841 non-null  object 
 8   Content Rating  10840 non-null  object 
 9   Genres          10841 non-null  object 
 10  Last Updated    10841 non-null  object 
 11  Current Ver     10833 non-null  object 
 12  Android Ver     10838 non-null  object 
dtypes: float64(1), object(12)
memory usage: 1.1+ MB


## Which of the following column(s) has/have null values?

In [12]:
df.isnull().sum()

App                  0
Category             0
Rating            1474
Reviews              0
Size                 0
Installs             0
Type                 1
Price                0
Content Rating       1
Genres               0
Last Updated         0
Current Ver          8
Android Ver          3
dtype: int64

## Clean the Rating column and the other columns containing null values

In [14]:
df.loc[df['Rating']>5]

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
10472,Life Made WI-Fi Touchscreen Photo Frame,1.9,19.0,3.0M,"1,000+",Free,0,Everyone,,"February 11, 2018",1.0.19,4.0 and up,


In [16]:
df['Rating'].loc[df['Rating']>5]=np.nan

In [18]:
df['Rating'].isna().sum()

1475

In [20]:
rating_mean=df['Rating'].mean()
rating_mean

4.191757420456972

In [22]:
df['Rating'].fillna(rating_mean,inplace=True)

In [24]:
df['Rating'].isna().sum()

0

In [26]:
df.isna().sum()

App               0
Category          0
Rating            0
Reviews           0
Size              0
Installs          0
Type              1
Price             0
Content Rating    1
Genres            0
Last Updated      0
Current Ver       8
Android Ver       3
dtype: int64

## Clean the column Reviews and make it numeric

In [28]:
df['Reviews_check']=pd.to_numeric(df['Reviews'],errors='coerce')

In [30]:
df.loc[df['Reviews_check'].isna()]

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Reviews_check
10472,Life Made WI-Fi Touchscreen Photo Frame,1.9,4.191757,3.0M,"1,000+",Free,0,Everyone,,"February 11, 2018",1.0.19,4.0 and up,,


In [32]:
df['Reviews'].loc[df['Reviews'].str.contains('M')]=(pd.to_numeric(df['Reviews'].str.replace('M',''))*1000000).astype(str)

In [34]:
df.loc[10472,'Reviews']

'3000000.0'

In [36]:
df['Reviews']=pd.to_numeric(df['Reviews'])

In [38]:
df=df.drop(columns='Reviews_check')

## How many duplicated apps are there?

In [40]:
df['App'].duplicated(keep=False).sum()

1979

## Drop duplicated apps keeping only the ones with the greatest number of reviews

In [42]:
df.loc[df['App'].duplicated(keep=False)].sort_values(by=['App','Reviews'],inplace=True)

In [44]:
df.loc[df['App'].duplicated(keep=False)].sort_values(by=['App','Reviews'])

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
1393,10 Best Foods for You,HEALTH_AND_FITNESS,4.0,2490.0,3.8M,"500,000+",Free,0,Everyone 10+,Health & Fitness,"February 17, 2017",1.9,2.3.3 and up
1407,10 Best Foods for You,HEALTH_AND_FITNESS,4.0,2490.0,3.8M,"500,000+",Free,0,Everyone 10+,Health & Fitness,"February 17, 2017",1.9,2.3.3 and up
2322,1800 Contacts - Lens Store,MEDICAL,4.7,23160.0,26M,"1,000,000+",Free,0,Everyone,Medical,"July 27, 2018",7.4.1,5.0 and up
2543,1800 Contacts - Lens Store,MEDICAL,4.7,23160.0,26M,"1,000,000+",Free,0,Everyone,Medical,"July 27, 2018",7.4.1,5.0 and up
2256,2017 EMRA Antibiotic Guide,MEDICAL,4.4,12.0,3.8M,"1,000+",Paid,$16.99,Everyone,Medical,"January 27, 2017",1.0.5,4.0.3 and up
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3103,trivago: Hotels & Travel,TRAVEL_AND_LOCAL,4.2,219848.0,Varies with device,"50,000,000+",Free,0,Everyone,Travel & Local,"August 2, 2018",Varies with device,Varies with device
3118,trivago: Hotels & Travel,TRAVEL_AND_LOCAL,4.2,219848.0,Varies with device,"50,000,000+",Free,0,Everyone,Travel & Local,"August 2, 2018",Varies with device,Varies with device
3202,trivago: Hotels & Travel,TRAVEL_AND_LOCAL,4.2,219848.0,Varies with device,"50,000,000+",Free,0,Everyone,Travel & Local,"August 2, 2018",Varies with device,Varies with device
8291,wetter.com - Weather and Radar,WEATHER,4.2,189310.0,38M,"10,000,000+",Free,0,Everyone,Weather,"August 6, 2018",Varies with device,Varies with device


In [46]:
df.drop_duplicates(subset='App',keep='last',inplace=True)

In [48]:
df.loc[df['App'].duplicated(keep=False)].sort_values(by=['App','Reviews'])

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver


## Format the Category column

In [50]:
df['Category'].value_counts()

Category
FAMILY                 1906
GAME                    926
TOOLS                   829
BUSINESS                419
MEDICAL                 396
PERSONALIZATION         376
PRODUCTIVITY            374
LIFESTYLE               369
FINANCE                 345
SPORTS                  327
COMMUNICATION           316
HEALTH_AND_FITNESS      287
PHOTOGRAPHY             281
NEWS_AND_MAGAZINES      254
SOCIAL                  239
BOOKS_AND_REFERENCE     222
TRAVEL_AND_LOCAL        219
SHOPPING                202
DATING                  170
VIDEO_PLAYERS           163
MAPS_AND_NAVIGATION     131
FOOD_AND_DRINK          112
EDUCATION               102
AUTO_AND_VEHICLES        85
LIBRARIES_AND_DEMO       84
WEATHER                  79
ENTERTAINMENT            79
HOUSE_AND_HOME           73
EVENTS                   64
ART_AND_DESIGN           61
PARENTING                60
COMICS                   56
BEAUTY                   53
1.9                       1
Name: count, dtype: int64

In [52]:
df['Category']=df['Category'].str.replace('_',' ')

In [54]:
df['Category']=df['Category'].str.capitalize()

## Clean and convert the Installs column to numeric type

In [56]:
df['Installs']

0            10,000+
2         5,000,000+
3        50,000,000+
4           100,000+
5            50,000+
            ...     
10836         5,000+
10837           100+
10838         1,000+
10839         1,000+
10840    10,000,000+
Name: Installs, Length: 9660, dtype: object

In [348]:
#df_copy=df.copy()

In [350]:
df_copy

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,Art and design,4.100000,159.0,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",Art and design,4.700000,87510.0,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,Art and design,4.500000,215644.0,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,Art and design,4.300000,967.0,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up
5,Paper flowers instructions,Art and design,4.400000,167.0,5.6M,"50,000+",Free,0,Everyone,Art & Design,"March 26, 2017",1.0,2.3 and up
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10836,Sya9a Maroc - FR,Family,4.500000,38.0,53M,"5,000+",Free,0,Everyone,Education,"July 25, 2017",1.48,4.1 and up
10837,Fr. Mike Schmitz Audio Teachings,Family,5.000000,4.0,3.6M,100+,Free,0,Everyone,Education,"July 6, 2018",1.0,4.1 and up
10838,Parkinson Exercices FR,Medical,4.191757,3.0,9.5M,"1,000+",Free,0,Everyone,Medical,"January 20, 2017",1.0,2.2 and up
10839,The SCP Foundation DB fr nn5n,Books and reference,4.500000,114.0,Varies with device,"1,000+",Free,0,Mature 17+,Books & Reference,"January 19, 2015",Varies with device,Varies with device


In [58]:
df['Installs']=df['Installs'].str.replace('+','').str.replace(',','')

In [60]:
df['Installs']=df['Installs'].str.replace('Free','')

In [62]:
df['Installs']=pd.to_numeric(df['Installs'])

## Clean and convert the Size column to numeric (representing bytes)

In [64]:
df['Size']=df['Size'].str.replace('Varies with device','')

In [66]:
df['Size'].loc[df['Size'].str.contains('M')]=(pd.to_numeric(df['Size'].loc[df['Size'].str.contains('M')].str.replace('M',''))*(1024*1024)).astype(str)

In [68]:
df['Size'].loc[df['Size'].str.contains('k')]=(pd.to_numeric(df['Size'].loc[df['Size'].str.contains('k')].str.replace('k',''))*1024).astype(str)

In [70]:
df['Size']=df['Size'].str.replace('+','').str.replace(',','')

In [72]:
df['Size']=pd.to_numeric(df['Size'])

In [74]:
df['Size'].fillna(0,inplace=True)

## Clean and convert the Price column to numeric

In [76]:
df['Price']=pd.to_numeric(df['Price'].str.replace('$','').str.replace('Everyone','0'))

## Make a column (paid/free) based on app's price

In [78]:
df['Pay Status']=np.where(df['Price']>0,'Paid','Free')

In [509]:
df

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Pay Status
0,Photo Editor & Candy Camera & Grid & ScrapBook,Art and design,4.100000,159.0,19922944.0,10000.0,Free,0.0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up,Free
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",Art and design,4.700000,87510.0,9122611.2,5000000.0,Free,0.0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up,Free
3,Sketch - Draw & Paint,Art and design,4.500000,215644.0,26214400.0,50000000.0,Free,0.0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up,Free
4,Pixel Draw - Number Art Coloring Book,Art and design,4.300000,967.0,2936012.8,100000.0,Free,0.0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up,Free
5,Paper flowers instructions,Art and design,4.400000,167.0,5872025.6,50000.0,Free,0.0,Everyone,Art & Design,"March 26, 2017",1.0,2.3 and up,Free
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10836,Sya9a Maroc - FR,Family,4.500000,38.0,55574528.0,5000.0,Free,0.0,Everyone,Education,"July 25, 2017",1.48,4.1 and up,Free
10837,Fr. Mike Schmitz Audio Teachings,Family,5.000000,4.0,3774873.6,100.0,Free,0.0,Everyone,Education,"July 6, 2018",1.0,4.1 and up,Free
10838,Parkinson Exercices FR,Medical,4.191757,3.0,9961472.0,1000.0,Free,0.0,Everyone,Medical,"January 20, 2017",1.0,2.2 and up,Free
10839,The SCP Foundation DB fr nn5n,Books and reference,4.500000,114.0,0.0,1000.0,Free,0.0,Mature 17+,Books & Reference,"January 19, 2015",Varies with device,Varies with device,Free


In [80]:
df[df['Price']>0]

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Pay Status
290,TurboScan: scan documents and receipts in PDF,Business,4.700000,11442.0,7130316.8,100000.0,Paid,4.99,Everyone,Business,"March 25, 2018",1.5.2,4.0 and up,Paid
291,Tiny Scanner Pro: PDF Doc Scan,Business,4.800000,10295.0,40894464.0,100000.0,Paid,4.99,Everyone,Business,"April 11, 2017",3.4.6,3.0 and up,Paid
427,Puffin Browser Pro,Communication,4.000000,18247.0,0.0,100000.0,Paid,3.99,Everyone,Communication,"July 5, 2018",7.5.3.20547,4.1 and up,Paid
478,Truth or Dare Pro,Dating,4.191757,0.0,20971520.0,50.0,Paid,1.49,Teen,Dating,"September 1, 2017",1.0,4.0 and up,Paid
479,"Private Dating, Hide App- Blue for PrivacyHider",Dating,4.191757,0.0,18432.0,100.0,Paid,2.99,Everyone,Dating,"July 25, 2017",1.0.1,4.0 and up,Paid
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10735,FP VoiceBot,Family,4.191757,17.0,160768.0,100.0,Paid,0.99,Mature 17+,Entertainment,"November 25, 2015",1.2,2.1 and up,Paid
10760,Fast Tract Diet,Health and fitness,4.400000,35.0,2516582.4,1000.0,Paid,7.99,Everyone,Health & Fitness,"August 8, 2018",1.9.3,4.2 and up,Paid
10782,Trine 2: Complete Story,Game,3.800000,252.0,11534336.0,10000.0,Paid,16.99,Teen,Action,"February 27, 2015",2.22,5.0 and up,Paid
10785,"sugar, sugar",Family,4.200000,1405.0,9961472.0,10000.0,Paid,1.20,Everyone,Puzzle,"June 5, 2018",2.7,2.3 and up,Paid


# Analysis

## Which app has the most reviews?

In [82]:
df.sort_values(by='Reviews',ascending=False)

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Pay Status
3943,Facebook,Social,4.100000,78128208.0,0.0,1.000000e+09,Free,0.0,Teen,Social,"August 3, 2018",Varies with device,Varies with device,Free
3904,WhatsApp Messenger,Communication,4.400000,69109672.0,0.0,1.000000e+09,Free,0.0,Everyone,Communication,"August 3, 2018",Varies with device,Varies with device,Free
3909,Instagram,Social,4.500000,66509917.0,0.0,1.000000e+09,Free,0.0,Teen,Social,"July 31, 2018",Varies with device,Varies with device,Free
4104,Messenger – Text and Video Chat for Free,Communication,4.000000,56642847.0,0.0,1.000000e+09,Free,0.0,Everyone,Communication,"August 1, 2018",Varies with device,Varies with device,Free
3986,Clash of Clans,Family,4.600000,44881447.0,102760448.0,1.000000e+08,Free,0.0,Everyone 10+,Strategy,"July 15, 2018",10.322.16,4.1 and up,Free
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8985,DW,News and magazines,4.191757,0.0,5033164.8,1.000000e+04,Free,0.0,Everyone 10+,News & Magazines,"July 13, 2018",1.0.5,5.0 and up,Free
5688,Au Rubis les bijoutiers,Lifestyle,4.191757,0.0,10485760.0,1.000000e+02,Free,0.0,Everyone,Lifestyle,"July 24, 2018",1.00.33,4.1 and up,Free
8973,BlueDV AMBE,Communication,4.191757,0.0,0.0,1.000000e+03,Free,0.0,Everyone,Communication,"May 23, 2018",Varies with device,Varies with device,Free
8965,DV Portfolio,Tools,4.191757,0.0,16777216.0,5.000000e+01,Free,0.0,Everyone,Tools,"October 23, 2017",1.0,4.1 and up,Free


## What category has the highest number of apps uploaded to the store?

In [84]:
df['Category'].value_counts()

Category
Family                 1906
Game                    926
Tools                   829
Business                419
Medical                 396
Personalization         376
Productivity            374
Lifestyle               369
Finance                 345
Sports                  327
Communication           316
Health and fitness      287
Photography             281
News and magazines      254
Social                  239
Books and reference     222
Travel and local        219
Shopping                202
Dating                  170
Video players           163
Maps and navigation     131
Food and drink          112
Education               102
Auto and vehicles        85
Libraries and demo       84
Weather                  79
Entertainment            79
House and home           73
Events                   64
Art and design           61
Parenting                60
Comics                   56
Beauty                   53
1.9                       1
Name: count, dtype: int64

## To which category belongs the most expensive app?

In [86]:
df.sort_values(by='Price',ascending=False)

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Pay Status
4367,I'm Rich - Trump Edition,Lifestyle,3.600000,275.0,7654604.8,10000.0,Paid,400.00,Everyone,Lifestyle,"May 3, 2018",1.0.1,4.1 and up,Paid
5369,I am Rich,Finance,4.300000,180.0,3984588.8,5000.0,Paid,399.99,Everyone,Finance,"March 22, 2018",1.0,4.2 and up,Paid
5358,I am Rich!,Finance,3.800000,93.0,23068672.0,1000.0,Paid,399.99,Everyone,Finance,"December 11, 2017",1.0,4.1 and up,Paid
9934,I'm Rich/Eu sou Rico/أنا غني/我很有錢,Lifestyle,4.191757,0.0,41943040.0,0.0,Paid,399.99,Everyone,Lifestyle,"December 1, 2017",MONEY,4.1 and up,Paid
5364,I am rich (Most expensive app),Finance,4.100000,129.0,2831155.2,1000.0,Paid,399.99,Teen,Finance,"December 6, 2017",2,4.0.3 and up,Paid
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4480,G-Pix [Android P] EMUI 8/5 THEME,Personalization,4.100000,720.0,10485760.0,100000.0,Free,0.00,Everyone,Personalization,"March 13, 2018",12,4.4 and up,Free
4481,Android-P Mono Grey EMUI 8/5 Theme,Personalization,4.500000,61.0,7864320.0,10000.0,Free,0.00,Everyone,Personalization,"April 4, 2018",4,4.4 and up,Free
4482,P XPERIA Theme™ | PURPLE - Design For SONY 🎨,Personalization,4.191757,3.0,13631488.0,500.0,Free,0.00,Everyone,Personalization,"June 5, 2018",1.0.0,4.4 and up,Free
4483,Materialistic P Wallpapers,Personalization,4.800000,24.0,3774873.6,1000.0,Free,0.00,Everyone,Personalization,"August 3, 2018",2.244,5.0 and up,Free


##  What's the name of the most expensive game?

In [88]:
df.loc[df['Category']=='Game'].sort_values(by='Price',ascending=False)

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Pay Status
4203,The World Ends With You,Game,4.6,4108.0,13631488.0,10000.0,Paid,17.99,Everyone 10+,Arcade,"December 14, 2015",1.0.4,4.0 and up,Paid
10782,Trine 2: Complete Story,Game,3.8,252.0,11534336.0,10000.0,Paid,16.99,Teen,Action,"February 27, 2015",2.22,5.0 and up,Paid
6341,Blackjack Verite Drills,Game,4.6,17.0,4928307.2,100.0,Paid,14.00,Teen,Casino,"July 9, 2017",1.1.10,3.0 and up,Paid
1838,Star Wars ™: DIRTY,Game,4.5,38207.0,15728640.0,100000.0,Paid,9.99,Teen,Role Playing,"October 19, 2015",1.0.6,4.1 and up,Paid
6198,Backgammon NJ for Android,Game,4.4,1644.0,15728640.0,10000.0,Paid,7.99,Everyone,Board,"April 5, 2017",4.1,2.3.3 and up,Paid
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5027,AE Master Moto,Game,4.0,17876.0,38797312.0,1000000.0,Free,0.00,Everyone,Racing,"December 10, 2015",1.8.6,2.3 and up,Free
5032,AE Fishing Hunter,Game,4.4,14.0,6920601.6,1000.0,Free,0.00,Everyone,Adventure,"March 15, 2016",0.0.3,3.0 and up,Free
5074,Gun Strike Shoot,Game,4.1,94761.0,17825792.0,10000000.0,Free,0.00,Teen,Action,"February 21, 2017",1.1.4,2.3 and up,Free
5085,AG Drive 3D,Game,3.8,164.0,23068672.0,10000.0,Free,0.00,Everyone,Adventure,"March 20, 2015",1.0.1,2.3.3 and up,Free


## Which is the most popular Finance App?

In [90]:
df.loc[df['Category']=='Finance'].sort_values(by='Installs',ascending=False)

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Pay Status
5601,Google Pay,Finance,4.200000,348132.0,0.0,100000000.0,Free,0.00,Everyone,Finance,"July 26, 2018",2.70.206190089,Varies with device,Free
1156,PayPal,Finance,4.300000,659760.0,49283072.0,50000000.0,Free,0.00,Everyone,Finance,"July 18, 2018",6.28.0,4.4 and up,Free
1173,Chase Mobile,Finance,4.600000,1374549.0,33554432.0,10000000.0,Free,0.00,Everyone,Finance,"July 23, 2018",3.52,5.0 and up,Free
1059,Itau bank,Finance,4.200000,957973.0,41943040.0,10000000.0,Free,0.00,Everyone,Finance,"July 30, 2018",6.5.7,4.2 and up,Free
1174,HDFC Bank MobileBanking,Finance,4.200000,208463.0,0.0,10000000.0,Free,0.00,Everyone,Finance,"July 26, 2018",Varies with device,Varies with device,Free
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9101,amm dz,Finance,4.191757,0.0,14680064.0,1.0,Paid,5.99,Everyone,Finance,"July 8, 2018",1.0,4.2 and up,Paid
9104,Dz kayas,Finance,4.191757,0.0,14680064.0,1.0,Paid,28.99,Everyone,Finance,"July 12, 2018",1.0,4.2 and up,Paid
10745,FP Boss,Finance,4.191757,1.0,6081740.8,1.0,Free,0.00,Everyone,Finance,"July 27, 2018",1.0.2,5.0 and up,Free
9905,Eu sou Rico,Finance,4.191757,0.0,2726297.6,0.0,Paid,30.99,Everyone,Finance,"January 9, 2018",1.0,4.0 and up,Paid


## What Teen Game has the most reviews?

In [92]:
df['Content Rating'].values

array(['Everyone', 'Everyone', 'Teen', ..., 'Everyone', 'Mature 17+',
       'Everyone'], dtype=object)

In [94]:
df.loc[(df['Content Rating']=='Teen') & (df['Category']=='Game')].sort_values(by='Reviews',ascending=False)

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Pay Status
3912,Asphalt 8: Airborne,Game,4.500000,8389714.0,96468992.0,100000000.0,Free,0.00,Teen,Racing,"July 4, 2018",3.7.1a,4.0.3 and up,Free
5417,Mobile Legends: Bang Bang,Game,4.400000,8219586.0,103809024.0,100000000.0,Free,0.00,Teen,Action,"July 24, 2018",1.2.97.3042,4.0.3 and up,Free
3973,Hungry Shark Evolution,Game,4.500000,6071542.0,104857600.0,100000000.0,Free,0.00,Teen,Arcade,"July 25, 2018",6.0.0,4.1 and up,Free
10327,Garena Free Fire,Game,4.500000,5534114.0,55574528.0,100000000.0,Free,0.00,Teen,Action,"August 3, 2018",1.21.0,4.0.3 and up,Free
3967,Pixel Gun 3D: Survival shooter & Battle Royale,Game,4.500000,4487182.0,57671680.0,50000000.0,Free,0.00,Teen,Action,"July 4, 2018",15.1.2,4.0.3 and up,Free
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5301,AK Blackjack,Game,4.191757,4.0,7969177.6,1000.0,Free,0.00,Teen,Card,"July 30, 2018",1.3,4.1 and up,Free
6335,BJ card game blackjack,Game,4.191757,3.0,22020096.0,500.0,Free,0.00,Teen,Card,"December 2, 2016",1.0,2.3 and up,Free
6555,Sic Bo,Game,4.191757,1.0,11534336.0,100.0,Paid,1.99,Teen,Card,"August 27, 2013",1.0.0,2.2 and up,Paid
7073,Animal Hunting: Sniper Shooting,Game,4.191757,0.0,50331648.0,50.0,Free,0.00,Teen,Action,"July 6, 2018",1.0,4.0 and up,Free


## Which is the free game with the most reviews?

In [96]:
df.loc[(df['Pay Status']=='Free')& (df['Category']=='Game')].sort_values(by='Reviews',ascending=False)

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Pay Status
3896,Subway Surfers,Game,4.500000,27711703.0,79691776.0,1.000000e+09,Free,0.0,Everyone 10+,Arcade,"July 12, 2018",1.90.0,4.1 and up,Free
7937,Shadow Fight 2,Game,4.600000,10981850.0,92274688.0,1.000000e+08,Free,0.0,Everyone 10+,Action,"July 2, 2018",1.9.38,3.0 and up,Free
3908,Pokémon GO,Game,4.100000,10421284.0,89128960.0,1.000000e+08,Free,0.0,Everyone,Adventure,"July 23, 2018",0.111.3,4.4 and up,Free
1893,Yes day,Game,4.500000,10055521.0,98566144.0,1.000000e+08,Free,0.0,Everyone,Casual,"June 28, 2018",1_39_93,4.0.3 and up,Free
4017,Hill Climb Racing,Game,4.400000,8921451.0,66060288.0,1.000000e+08,Free,0.0,Everyone,Racing,"July 2, 2018",1.37.2,4.2 and up,Free
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7073,Animal Hunting: Sniper Shooting,Game,4.191757,0.0,50331648.0,5.000000e+01,Free,0.0,Teen,Action,"July 6, 2018",1.0,4.0 and up,Free
8822,Simple x3DS Emulator - BETA,Game,4.191757,0.0,0.0,5.000000e+04,Free,0.0,Everyone,Arcade,"June 13, 2018",Varies with device,Varies with device,Free
6329,Basic Strategy Training BJ 21,Game,4.191757,0.0,24117248.0,5.000000e+02,Free,0.0,Teen,Casino,"March 7, 2016",1.1,2.3 and up,Free
6842,Nedir Bu ?,Game,4.191757,0.0,34603008.0,1.000000e+01,Free,0.0,Everyone,Trivia,"March 18, 2018",3.1.6z,4.0.3 and up,Free


## How many Tb (tebibytes) were transferred (overall) for the most popular Lifestyle app?

In [98]:
app=df[df['Category']=='Lifestyle'].sort_values(by='Installs',ascending=False).iloc[0]
app

App                       Tinder
Category               Lifestyle
Rating                       4.0
Reviews                2789775.0
Size                  71303168.0
Installs             100000000.0
Type                        Free
Price                        0.0
Content Rating        Mature 17+
Genres                 Lifestyle
Last Updated      August 2, 2018
Current Ver                9.5.0
Android Ver           4.4 and up
Pay Status                  Free
Name: 4587, dtype: object

In [100]:
np.round((app['Installs'] * app['Size'])/(1024*1024*1024*1024),2)

6484.99